diff --git a/gcc/config.gcc b/gcc/config.gcc
index 9383adb6e50f..4e17a87e4b04 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1174,6 +1174,11 @@ aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*)
 	done
 	TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'`
 	;;
+aarch64-*-darwin* | arm64-*-darwin*)
+	tm_file="${tm_file} aarch64/aarch64-errata.h"
+	tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-darwin"
+	tm_defines="${tm_defines} TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
+	;;
 aarch64*-*-freebsd*)
 	tm_file="${tm_file} elfos.h ${fbsd_tm_file}"
 	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-freebsd.h"
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 04f59fd9a543..f549a00f24eb 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -808,6 +808,8 @@ enum aarch64_builtins
   AARCH64_RBIT,
   AARCH64_RBITL,
   AARCH64_RBITLL,
+  /* OS-specific */
+  AARCH64_BUILTIN_CFSTRING,
   AARCH64_BUILTIN_MAX
 };
 
@@ -2044,6 +2046,14 @@ aarch64_general_init_builtins (void)
     handle_arm_acle_h ();
 }
 
+void
+aarch64_init_subtarget_builtins (void)
+{
+#ifdef SUBTARGET_INIT_BUILTINS
+  SUBTARGET_INIT_BUILTINS;
+#endif
+}
+
 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group.  */
 tree
 aarch64_general_builtin_decl (unsigned code, bool)
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 578ec6f45b06..1601887c86fe 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -359,4 +359,8 @@ aarch64_register_pragmas (void)
   targetm.check_builtin_call = aarch64_check_builtin_call;
 
   c_register_pragma ("GCC", "aarch64", aarch64_pragma_aarch64);
+
+#ifdef REGISTER_SUBTARGET_PRAGMAS
+  REGISTER_SUBTARGET_PRAGMAS ();
+#endif
 }
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 70303d6fd953..1087f703767a 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -109,6 +109,14 @@ enum aarch64_symbol_type
   SYMBOL_TLSLE24,
   SYMBOL_TLSLE32,
   SYMBOL_TLSLE48,
+  SYMBOL_MO_SMALL_ABS,
+  SYMBOL_MO_SMALL_PCR,
+  SYMBOL_MO_SMALL_GOT,
+  SYMBOL_MO_SMALL_TLS,
+  SYMBOL_MO_LARGE_ABS,
+  SYMBOL_MO_LARGE_PCR,
+  SYMBOL_MO_LARGE_GOT,
+  SYMBOL_MO_LARGE_TLS,
   SYMBOL_FORCE_TO_MEM
 };
 
@@ -745,6 +753,7 @@ void aarch64_post_cfi_startproc (void);
 poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
 int aarch64_get_condition_code (rtx);
 bool aarch64_address_valid_for_prefetch_p (rtx, bool);
+bool aarch64_address_valid_for_unscaled_prefetch_p (rtx, bool);
 bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
 unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
 unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
@@ -975,6 +984,7 @@ void aarch64_override_options_internal (struct gcc_options *);
 
 const char *aarch64_general_mangle_builtin_type (const_tree);
 void aarch64_general_init_builtins (void);
+void aarch64_init_subtarget_builtins (void);
 tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *);
 gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *,
 					     gimple_stmt_iterator *);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 560e5431636e..b7cc07a43c2c 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -297,8 +297,10 @@ static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
 						     const_tree,
 						     machine_mode *, int *,
 						     bool *, bool);
+#if !TARGET_MACHO
 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
+#endif
 static void aarch64_override_options_after_change (void);
 static bool aarch64_vector_mode_supported_p (machine_mode);
 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
@@ -2805,6 +2807,9 @@ static const struct attribute_spec aarch64_attribute_table[] =
   { "Advanced SIMD type", 1, 1, false, true,  false, true,  NULL, NULL },
   { "SVE type",		  3, 3, false, true,  false, true,  NULL, NULL },
   { "SVE sizeless type",  0, 0, false, true,  false, true,  NULL, NULL },
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+  SUBTARGET_ATTRIBUTE_TABLE,
+#endif
   { NULL,                 0, 0, false, false, false, false, NULL, NULL }
 };
 
@@ -3955,7 +3960,7 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
       if (known_le (GET_MODE_SIZE (mode), 8))
 	return true;
       if (known_le (GET_MODE_SIZE (mode), 16))
-	return (regno & 1) == 0;
+	return (regno & 1) == 0 || TARGET_MACHO; /* darwinpcs D.4 */
     }
   else if (FP_REGNUM_P (regno))
     {
@@ -4487,6 +4492,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
   switch (type)
     {
     case SYMBOL_SMALL_ABSOLUTE:
+    case SYMBOL_MO_SMALL_PCR:
       {
 	/* In ILP32, the mode of dest can be either SImode or DImode.  */
 	rtx tmp_reg = dest;
@@ -4497,6 +4503,21 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	if (can_create_pseudo_p ())
 	  tmp_reg = gen_reg_rtx (mode);
 
+	if (TARGET_MACHO)
+	  {
+	    rtx sym, off;
+	    split_const (imm, &sym, &off);
+	    /* Negative offsets don't work, whether by intention is TBD.  */
+	    if (INTVAL (off) < 0 || INTVAL (off) > 8 * 1024 * 1024)
+	      {
+		emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, sym));
+		emit_insn (gen_add_losym (dest, tmp_reg, sym));
+		/* FIXME: add the SI option if/when we support ilp32.  */
+		emit_insn (gen_adddi3 (dest, dest, off));
+		return;
+	      }
+	   /* else small enough positive offset is OK.  */
+	  }
 	emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, copy_rtx (imm)));
 	emit_insn (gen_add_losym (dest, tmp_reg, imm));
 	return;
@@ -4580,6 +4601,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	return;
       }
 
+    case SYMBOL_MO_SMALL_GOT:
     case SYMBOL_SMALL_GOT_4G:
       emit_insn (gen_rtx_SET (dest, imm));
       return;
@@ -6861,6 +6883,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	case SYMBOL_SMALL_TLSIE:
 	case SYMBOL_SMALL_GOT_28K:
 	case SYMBOL_SMALL_GOT_4G:
+	case SYMBOL_MO_SMALL_GOT:
 	case SYMBOL_TINY_GOT:
 	case SYMBOL_TINY_TLSIE:
 	  if (const_offset != 0)
@@ -6874,6 +6897,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	  /* FALLTHRU */
 
 	case SYMBOL_SMALL_ABSOLUTE:
+	case SYMBOL_MO_SMALL_PCR:
 	case SYMBOL_TINY_ABSOLUTE:
 	case SYMBOL_TLSLE12:
 	case SYMBOL_TLSLE24:
@@ -7455,6 +7479,7 @@ aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
   gcc_unreachable ();
 }
 
+#if !TARGET_MACHO
 static bool
 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
 			       const_tree type, int *nregs)
@@ -7464,6 +7489,7 @@ aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
 						  &pcum->aapcs_vfp_rmode,
 						  nregs, NULL, pcum->silent_p);
 }
+#endif
 
 /* Given MODE and TYPE of a function argument, return the alignment in
    bits.  The idea is to suppress any stronger alignment requested by
@@ -7718,13 +7744,25 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
     /* No frontends can create types with variable-sized modes, so we
        shouldn't be asked to pass or return them.  */
     size = GET_MODE_SIZE (mode).to_constant ();
+  pcum->darwinpcs_stack_bytes = size;
   size = ROUND_UP (size, UNITS_PER_WORD);
 
   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
+  bool is_ha = false;
+#if !TARGET_MACHO
   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
 						 mode,
 						 type,
 						 &nregs);
+#else
+  /* We care if the value is a homogenous aggregate when laying out the stack,
+     so use this call directly.  */
+  allocate_nvrn
+    = aarch64_vfp_is_call_or_return_candidate (mode, type,
+						&pcum->aapcs_vfp_rmode,
+						&nregs, &is_ha,
+						pcum->silent_p);
+#endif
   gcc_assert (!sve_p || !allocate_nvrn);
 
   unsigned int alignment
@@ -7749,7 +7787,13 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
       if (!pcum->silent_p && !TARGET_FLOAT)
 	aarch64_err_no_fpadvsimd (mode);
 
-      if (nvrn + nregs <= NUM_FP_ARG_REGS)
+      if (TARGET_MACHO
+	  && !arg.named)
+	{
+	  pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
+	  goto on_stack;
+	}
+      else if (nvrn + nregs <= NUM_FP_ARG_REGS)
 	{
 	  pcum->aapcs_nextnvrn = nvrn + nregs;
 	  if (!aarch64_composite_type_p (type, mode))
@@ -7795,10 +7839,18 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
   /* C6 - C9.  though the sign and zero extension semantics are
      handled elsewhere.  This is the case where the argument fits
      entirely general registers.  */
+
   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
     {
       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
 
+      if (TARGET_MACHO
+	  && !arg.named)
+	{
+	  pcum->aapcs_nextncrn = NUM_ARG_REGS;
+	  goto on_stack;
+	}
+
       /* C.8 if the argument has an alignment of 16 then the NGRN is
 	 rounded up to the next even number.  */
       if (nregs == 2
@@ -7826,7 +7878,9 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
 	     passed by reference rather than value.  */
 	  if (alignment == 16 * BITS_PER_UNIT)
 	    {
-	      if (warn_pcs_change && abi_break_gcc_9)
+	      if (warn_pcs_change && abi_break_gcc_9
+		  /* Darwin PCS deletes rule C.8.  */
+		  && !TARGET_MACHO)
 		inform (input_location, "parameter passing for argument of type "
 			"%qT changed in GCC 9.1", type);
 	      ++ncrn;
@@ -7882,7 +7936,6 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
   /* The argument is passed on stack; record the needed number of words for
      this argument and align the total size if necessary.  */
 on_stack:
-  pcum->aapcs_stack_words = size / UNITS_PER_WORD;
 
   if (warn_pcs_change
       && abi_break_gcc_13
@@ -7898,6 +7951,56 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
     inform (input_location, "parameter passing for argument of type "
 	    "%qT changed in GCC 14.1", type);
 
+  if (TARGET_MACHO)
+    {
+      /* Darwin does not round up the allocation for smaller entities to 8
+	 bytes.  It only requires the natural alignment for these.  There
+	 was no darwinpcs for GCC 9, so neither the implementation change
+	 nor the warning should fire here.
+
+	 size is rounded up to 8 bytes, so will account for enough slots to
+	 accommodate the entire argument - potentially, with some padding
+	 at the end.  When the current position is 0 - any allocation needs
+	 a stack slot.  CHECKME: do we need to align 16byte entities?
+
+	 but we don't do this for:
+	  * unnamed parms in variadic functions
+	  * complex types smaller than 4 bytes
+	 each get their own slot.  */
+      if (!arg.named
+	  || TREE_CODE (type) == COMPLEX_TYPE
+	  || (TREE_CODE (type) == RECORD_TYPE
+	      && !is_ha && !SCALAR_FLOAT_MODE_P (pcum->aapcs_vfp_rmode))
+	  || TREE_CODE (type) == UNION_TYPE)
+	{
+	  pcum->aapcs_stack_words = size / UNITS_PER_WORD;
+	  pcum->darwinpcs_sub_word_offset = 0;
+	  pcum->darwinpcs_sub_word_pos = 0;
+	  /* We skip the re-alignment for 16byte things, since we currently
+	     assume that the darwinpcs doesn't force such alignment.  */
+	  return;
+	}
+
+      if (pcum->darwinpcs_sub_word_pos == 0)
+	pcum->aapcs_stack_words = size / UNITS_PER_WORD;
+
+      int new_pos
+	= ROUND_UP (pcum->darwinpcs_sub_word_pos, alignment / BITS_PER_UNIT);
+      if (new_pos >= UNITS_PER_WORD)
+	{
+	  /* We are not catering for the possible 16byte alignment bump.  */
+	  pcum->aapcs_stack_words += 1;
+	  new_pos = 0;
+	}
+      pcum->darwinpcs_sub_word_offset = new_pos;
+      new_pos += pcum->darwinpcs_stack_bytes;
+      if (new_pos > UNITS_PER_WORD)
+	pcum->aapcs_stack_words += new_pos / UNITS_PER_WORD;
+      pcum->darwinpcs_sub_word_pos = new_pos % UNITS_PER_WORD;
+      return;
+    }
+
+  pcum->aapcs_stack_words = size / UNITS_PER_WORD;
   if (alignment == 16 * BITS_PER_UNIT)
     {
       int new_size = ROUND_UP (pcum->aapcs_stack_size, 16 / UNITS_PER_WORD);
@@ -7951,7 +8054,11 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
   pcum->aapcs_arg_processed = false;
   pcum->aapcs_stack_words = 0;
   pcum->aapcs_stack_size = 0;
+  pcum->darwinpcs_stack_bytes = 0;
+  pcum->darwinpcs_sub_word_offset = 0;
+  pcum->darwinpcs_sub_word_pos = 0;
   pcum->silent_p = silent_p;
+  pcum->aapcs_vfp_rmode = VOIDmode;
 
   if (!silent_p
       && !TARGET_FLOAT
@@ -7990,8 +8097,9 @@ aarch64_function_arg_advance (cumulative_args_t pcum_v,
       || pcum->pcs_variant == ARM_PCS_SVE)
     {
       aarch64_layout_arg (pcum_v, arg);
-      gcc_assert ((pcum->aapcs_reg != NULL_RTX)
-		  != (pcum->aapcs_stack_words != 0));
+      gcc_assert (TARGET_MACHO
+		  || (pcum->aapcs_reg != NULL_RTX)
+		      != (pcum->aapcs_stack_words != 0));
       pcum->aapcs_arg_processed = false;
       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
@@ -8010,12 +8118,17 @@ aarch64_function_arg_regno_p (unsigned regno)
 	  || (PR_REGNUM_P (regno) && regno < P0_REGNUM + NUM_PR_ARG_REGS));
 }
 
-/* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
-   PARM_BOUNDARY bits of alignment, but will be given anything up
-   to STACK_BOUNDARY bits if the type requires it.  This makes sure
-   that both before and after the layout of each argument, the Next
-   Stacked Argument Address (NSAA) will have a minimum alignment of
-   8 bytes.  */
+/* Implement FUNCTION_ARG_BOUNDARY.
+   For AAPCS64, Every parameter gets at least PARM_BOUNDARY bits of
+   alignment, but will be given anything up to STACK_BOUNDARY bits
+   if the type requires it.  This makes sure that both before and after
+   the layout of each argument, the Next Stacked Argument Address (NSAA)
+   will have a minimum alignment of 8 bytes.
+   For darwinpcs, parameters get their natural alignment (up to the
+   STACK_BOUNDARY).  Therefore, the stack can be aligned less than 8
+   bytes after a smaller aligned type is placed.  However, the stack will
+   always be counted in PARM_BOUNDARY chunks, darwinpcs will just fill
+   the last allocated chunk with several args, potentially.  */
 
 static unsigned int
 aarch64_function_arg_boundary (machine_mode mode, const_tree type)
@@ -8029,10 +8142,37 @@ aarch64_function_arg_boundary (machine_mode mode, const_tree type)
 							   &abi_break_gcc_14);
   /* We rely on aarch64_layout_arg and aarch64_gimplify_va_arg_expr
      to emit warnings about ABI incompatibility.  */
+#if TARGET_MACHO
+  /* Temporary fudge to put some non-scalar types in distinct stack slots.  */
+  machine_mode comp_mode = VOIDmode;
+  int nregs;
+  bool is_ha;
+  aarch64_vfp_is_call_or_return_candidate (mode, type, &comp_mode, &nregs,
+					   &is_ha, /*silent*/true);
+  if (TREE_CODE (type) == COMPLEX_TYPE
+      || (TREE_CODE (type) == RECORD_TYPE
+	  && !is_ha && !SCALAR_FLOAT_MODE_P (comp_mode))
+      || TREE_CODE (type) == UNION_TYPE)
+    return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
+  return MIN (alignment, STACK_BOUNDARY);
+#else
   alignment = MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
   return alignment;
+#endif
 }
 
+#if TARGET_MACHO
+/* Implement TARGET_FUNCTION_ARG_ROUND_BOUNDARY for darwinpcs which allows
+   non-standard passing of byte-aligned items [D.2].
+   TODO: check if this extends to packed aggregates.  */
+
+static unsigned int
+aarch64_function_arg_round_boundary (machine_mode, const_tree)
+{
+  return BITS_PER_UNIT;
+}
+#endif
+
 /* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE.  */
 
 static fixed_size_mode
@@ -11078,6 +11218,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
       /* load literal: pc-relative constant pool entry.  Only supported
          for SI mode or larger.  */
       info->type = ADDRESS_SYMBOLIC;
+      info->offset = NULL_RTX;
 
       if (!load_store_pair_p
 	  && GET_MODE_SIZE (mode).is_constant (&const_size)
@@ -11085,6 +11226,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	{
 	  poly_int64 offset;
 	  rtx sym = strip_offset_and_salt (x, &offset);
+
 	  return ((LABEL_REF_P (sym)
 		   || (SYMBOL_REF_P (sym)
 		       && CONSTANT_POOL_ADDRESS_P (sym)
@@ -11102,10 +11244,13 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	  poly_int64 offset;
 	  HOST_WIDE_INT const_offset;
 	  rtx sym = strip_offset_and_salt (info->offset, &offset);
+
 	  if (SYMBOL_REF_P (sym)
 	      && offset.is_constant (&const_offset)
 	      && (aarch64_classify_symbol (sym, const_offset)
-		  == SYMBOL_SMALL_ABSOLUTE))
+		    == SYMBOL_SMALL_ABSOLUTE
+		  || aarch64_classify_symbol (sym, const_offset)
+		      == SYMBOL_MO_SMALL_PCR))
 	    {
 	      /* The symbol and offset must be aligned to the access size.  */
 	      unsigned int align;
@@ -11155,6 +11300,55 @@ aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
   if (!res)
     return false;
 
+  /* For ELF targets using GAS, we emit prfm unconditionally; GAS will alter
+     the instruction to pick the prfum form where possible (i.e. when the
+     offset is in the range -256..255) and fall back to prfm otherwise.
+     We can reject cases where the offset exceeds the range usable by both
+     insns [-256..32760], or for offsets > 255 when the value is not divisible
+     by 8.
+     For Mach-O (Darwin) where the assembler uses the LLVM back end, that does
+     not yet do the substitution, so we must reject all prfum cases.  */
+  if (addr.offset)
+    {
+      HOST_WIDE_INT offs = INTVAL (addr.offset);
+      if (offs < -256) /* Out of range for both prfum and prfm.  */
+	return false;
+      if (offs > 32760) /* Out of range for prfm.  */
+	return false;
+      if (offs & 0x07) /* We cannot use prfm.  */
+	{
+	  if (offs > 255) /* Out of range for prfum.  */
+	    return false;
+	  if (TARGET_MACHO)
+	    return false;
+	}
+      if (TARGET_MACHO && offs < 0)
+	return false;
+    }
+
+  /* ... except writeback forms.  */
+  return addr.type != ADDRESS_REG_WB;
+}
+
+/* Return true if the address X is valid for a PRFUM instruction.
+   STRICT_P is true if we should do strict checking with
+   aarch64_classify_address.  */
+
+bool
+aarch64_address_valid_for_unscaled_prefetch_p (rtx x, bool strict_p)
+{
+  struct aarch64_address_info addr;
+
+  /* PRFUM accepts the same addresses as DImode, but constrained to a range
+     -256..255.  */
+  bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
+  if (!res)
+    return false;
+
+  if (addr.offset && ((INTVAL (addr.offset) > 255)
+		       || (INTVAL (addr.offset) < -256)))
+     return false;
+
   /* ... except writeback forms.  */
   return addr.type != ADDRESS_REG_WB;
 }
@@ -11918,6 +12112,144 @@ sizetochar (int size)
     }
 }
 
+static void
+output_macho_postfix_expr (FILE *file, rtx x, const char *postfix)
+{
+  char buf[256];
+
+ restart:
+  switch (GET_CODE (x))
+    {
+    case PC:
+      putc ('.', file);
+      break;
+
+    case SYMBOL_REF:
+      if (SYMBOL_REF_DECL (x))
+	assemble_external (SYMBOL_REF_DECL (x));
+      assemble_name (file, XSTR (x, 0));
+      fprintf (file, "@%s", postfix);
+      break;
+
+    case LABEL_REF:
+      x = label_ref_label (x);
+      /* Fall through.  */
+    case CODE_LABEL:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
+      assemble_name (file, buf);
+      fprintf (file, "@%s", postfix);
+      break;
+
+    case CONST_INT:
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      break;
+
+    case CONST:
+      /* This used to output parentheses around the expression,
+	 but that does not work on the 386 (either ATT or BSD assembler).  */
+      output_macho_postfix_expr (file, XEXP (x, 0), postfix);
+      break;
+
+    case CONST_WIDE_INT:
+      /* We do not know the mode here so we have to use a round about
+	 way to build a wide-int to get it printed properly.  */
+      {
+	wide_int w = wide_int::from_array (&CONST_WIDE_INT_ELT (x, 0),
+					   CONST_WIDE_INT_NUNITS (x),
+					   CONST_WIDE_INT_NUNITS (x)
+					   * HOST_BITS_PER_WIDE_INT,
+					   false);
+	print_decs (w, file);
+      }
+      break;
+
+    case CONST_DOUBLE:
+      if (CONST_DOUBLE_AS_INT_P (x))
+	{
+	  /* We can use %d if the number is one word and positive.  */
+	  if (CONST_DOUBLE_HIGH (x))
+	    fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
+		     (unsigned HOST_WIDE_INT) CONST_DOUBLE_HIGH (x),
+		     (unsigned HOST_WIDE_INT) CONST_DOUBLE_LOW (x));
+	  else if (CONST_DOUBLE_LOW (x) < 0)
+	    fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		     (unsigned HOST_WIDE_INT) CONST_DOUBLE_LOW (x));
+	  else
+	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
+	}
+      else
+	/* We can't handle floating point constants;
+	   PRINT_OPERAND must handle them.  */
+	output_operand_lossage ("floating constant misused");
+      break;
+
+    case CONST_FIXED:
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_FIXED_VALUE_LOW (x));
+      break;
+
+    case PLUS:
+      /* Some assemblers need integer constants to appear last (eg masm).  */
+      if (CONST_INT_P (XEXP (x, 0)))
+	{
+	  output_macho_postfix_expr (file, XEXP (x, 1), postfix);
+	  if (INTVAL (XEXP (x, 0)) >= 0)
+	    fprintf (file, "+");
+	  output_addr_const (file, XEXP (x, 0));
+	}
+      else
+	{
+	  output_macho_postfix_expr (file, XEXP (x, 0), postfix);
+	  if (!CONST_INT_P (XEXP (x, 1))
+	      || INTVAL (XEXP (x, 1)) >= 0)
+	    fprintf (file, "+");
+	  output_addr_const (file, XEXP (x, 1));
+	}
+      break;
+
+    case MINUS:
+      /* Avoid outputting things like x-x or x+5-x,
+	 since some assemblers can't handle that.  */
+      x = simplify_subtraction (x);
+      if (GET_CODE (x) != MINUS)
+	goto restart;
+
+      output_macho_postfix_expr (file, XEXP (x, 0), postfix);
+      fprintf (file, "-");
+      if ((CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) >= 0)
+	  || GET_CODE (XEXP (x, 1)) == PC
+	  || GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
+	output_addr_const (file, XEXP (x, 1));
+      else
+	{
+	  fputs (targetm.asm_out.open_paren, file);
+	  output_addr_const (file, XEXP (x, 1));
+	  fputs (targetm.asm_out.close_paren, file);
+	}
+      break;
+
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+    case SUBREG:
+    case TRUNCATE:
+      output_addr_const (file, XEXP (x, 0));
+      break;
+
+    case  UNSPEC:
+      if (XINT (x, 1) == UNSPEC_SALT_ADDR)
+	{
+	  output_macho_postfix_expr (file, XVECEXP (x, 0, 0), postfix);
+	  break;
+	}
+      /* FALLTHROUGH */
+    default:
+      if (targetm.asm_out.output_addr_const_extra (file, x))
+	break;
+
+      output_operand_lossage ("invalid expression as operand");
+    }
+
+}
+
 /* Print operand X to file F in a target specific manner according to CODE.
    The acceptable formatting commands given by CODE are:
      'c':		An integer or symbol address without a preceding #
@@ -11986,6 +12318,12 @@ aarch64_print_operand (FILE *f, rtx x, int code)
 	}
       break;
 
+    case 'K':
+      output_macho_postfix_expr (f, x, "PAGEOFF");
+      break;
+    case 'O':
+      output_macho_postfix_expr (f, x, "GOTPAGEOFF");
+      break;
     case 'e':
       {
 	x = unwrap_const_vec_duplicate (x);
@@ -12309,7 +12647,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
     case 'A':
       if (GET_CODE (x) == HIGH)
 	x = XEXP (x, 0);
-
+#if !TARGET_MACHO
       switch (aarch64_classify_symbolic_expression (x))
 	{
 	case SYMBOL_SMALL_GOT_4G:
@@ -12340,9 +12678,29 @@ aarch64_print_operand (FILE *f, rtx x, int code)
 	  break;
 	}
       output_addr_const (asm_out_file, x);
+#endif
+#if TARGET_MACHO
+  // FIXME update classify symbolic expression to handle macho.
+      switch (aarch64_classify_symbolic_expression (x))
+	{
+	case SYMBOL_MO_SMALL_PCR:
+	  output_macho_postfix_expr (asm_out_file, x, "PAGE");
+//	  asm_fprintf (asm_out_file, "@PAGE;mopcr");
+	  break;
+	case SYMBOL_MO_SMALL_GOT:
+	  output_macho_postfix_expr (asm_out_file, x, "GOTPAGE");
+//	  asm_fprintf (asm_out_file, "@GOTPAGE;mosg");
+	  break;
+	default:
+	  output_macho_postfix_expr (asm_out_file, x, "BLEAH");
+//	  asm_fprintf (asm_out_file, "@BLEAH");
+	  break;
+	}
+#endif
       break;
 
     case 'L':
+#if !TARGET_MACHO
       switch (aarch64_classify_symbolic_expression (x))
 	{
 	case SYMBOL_SMALL_GOT_4G:
@@ -12380,10 +12738,12 @@ aarch64_print_operand (FILE *f, rtx x, int code)
 	default:
 	  break;
 	}
+#endif
       output_addr_const (asm_out_file, x);
       break;
 
     case 'G':
+#if !TARGET_MACHO
       switch (aarch64_classify_symbolic_expression (x))
 	{
 	case SYMBOL_TLSLE24:
@@ -12392,6 +12752,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
 	default:
 	  break;
 	}
+#endif
       output_addr_const (asm_out_file, x);
       break;
 
@@ -12541,8 +12902,14 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
 	break;
 
       case ADDRESS_LO_SUM:
+#if TARGET_MACHO
+	asm_fprintf (f, "[%s, #", reg_names [REGNO (addr.base)]);
+	output_macho_postfix_expr (f, addr.offset, "PAGEOFF");
+//	output_addr_const (f, addr.offset);
+#else
 	asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
 	output_addr_const (f, addr.offset);
+#endif
 	asm_fprintf (f, "]");
 	return true;
 
@@ -13040,6 +13407,8 @@ aarch64_asm_output_labelref (FILE* f, const char *name)
   asm_fprintf (f, "%U%s", name);
 }
 
+#if !TARGET_MACHO
+
 static void
 aarch64_elf_asm_constructor (rtx symbol, int priority)
 {
@@ -13079,6 +13448,7 @@ aarch64_elf_asm_destructor (rtx symbol, int priority)
       assemble_aligned_integer (POINTER_BYTES, symbol);
     }
 }
+#endif
 
 const char*
 aarch64_output_casesi (rtx *operands)
@@ -15420,15 +15790,17 @@ aarch64_init_builtins ()
 {
   aarch64_general_init_builtins ();
   aarch64_sve::init_builtins ();
-#ifdef SUBTARGET_INIT_BUILTINS
-  SUBTARGET_INIT_BUILTINS;
-#endif
+  aarch64_init_subtarget_builtins ();
 }
 
 /* Implement TARGET_FOLD_BUILTIN.  */
 static tree
 aarch64_fold_builtin (tree fndecl, int nargs, tree *args, bool)
 {
+#ifdef SUBTARGET_FOLD_BUILTIN
+  if (tree res = SUBTARGET_FOLD_BUILTIN (fndecl, nargs, args, false))
+    return res;
+#endif
   unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
   unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
   tree type = TREE_TYPE (TREE_TYPE (fndecl));
@@ -18620,10 +18992,14 @@ initialize_aarch64_code_model (struct gcc_options *opts)
 	}
       break;
     case AARCH64_CMODEL_LARGE:
-      if (opts->x_flag_pic)
+      if (TARGET_MACHO)
+	/* We need to implement fPIC here (arm64_32 also accepts the large
+	   model).  */
+	;
+      else if (opts->x_flag_pic)
 	sorry ("code model %qs with %<-f%s%>", "large",
 	       opts->x_flag_pic > 1 ? "PIC" : "pic");
-      if (opts->x_aarch64_abi == AARCH64_ABI_ILP32)
+      else if (opts->x_aarch64_abi == AARCH64_ABI_ILP32)
 	sorry ("code model %qs not supported in ilp32 mode", "large");
       break;
     case AARCH64_CMODEL_TINY_PIC:
@@ -19509,7 +19885,9 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
 	case AARCH64_CMODEL_SMALL_SPIC:
 	case AARCH64_CMODEL_SMALL_PIC:
 	case AARCH64_CMODEL_SMALL:
-	  return SYMBOL_SMALL_ABSOLUTE;
+	  return TARGET_MACHO
+		 ? SYMBOL_MO_SMALL_PCR
+		 : SYMBOL_SMALL_ABSOLUTE;
 
 	default:
 	  gcc_unreachable ();
@@ -19545,10 +19923,22 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
 
 	  return SYMBOL_TINY_ABSOLUTE;
 
-
 	case AARCH64_CMODEL_SMALL_SPIC:
 	case AARCH64_CMODEL_SMALL_PIC:
 	case AARCH64_CMODEL_SMALL:
+#if TARGET_MACHO
+	  if (TARGET_MACHO)
+	    {
+	      /* Constant pool addresses are always TU-local and PC-
+		 relative.  We indirect common, external and weak
+		 symbols (but weak only if not hidden).  */
+	      if (!CONSTANT_POOL_ADDRESS_P (x)
+		  && (MACHO_SYMBOL_MUST_INDIRECT_P (x)
+		      || !aarch64_symbol_binds_local_p (x)))
+		return SYMBOL_MO_SMALL_GOT;
+	    }
+	  else
+#endif
 	  if ((flag_pic || SYMBOL_REF_WEAK (x))
 	      && !aarch64_symbol_binds_local_p (x))
 	    return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
@@ -19560,7 +19950,8 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
 		|| offset_within_block_p (x, offset)))
 	    return SYMBOL_FORCE_TO_MEM;
 
-	  return SYMBOL_SMALL_ABSOLUTE;
+	  return TARGET_MACHO ? SYMBOL_MO_SMALL_PCR
+			      : SYMBOL_SMALL_ABSOLUTE;
 
 	case AARCH64_CMODEL_LARGE:
 	  /* This is alright even in PIC code as the constant
@@ -19690,7 +20081,10 @@ static GTY(()) tree va_list_type;
      void *__vr_top;
      int   __gr_offs;
      int   __vr_offs;
-   };  */
+   };
+
+  darwinpcs uses 'char *' for the va_list (in common with other platform
+  ports).  */
 
 static tree
 aarch64_build_builtin_va_list (void)
@@ -19698,6 +20092,13 @@ aarch64_build_builtin_va_list (void)
   tree va_list_name;
   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
 
+  /* darwinpcs uses a simple char * for this.  */
+  if (TARGET_MACHO)
+    {
+      va_list_type = build_pointer_type (char_type_node);
+      return va_list_type;
+    }
+
   /* Create the type.  */
   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
   /* Give it the required name.  */
@@ -19769,6 +20170,13 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
   int vr_save_area_size = cfun->va_list_fpr_size;
   int vr_offset;
 
+  /* darwinpcs uses the default, char * va_list impl.  */
+  if (TARGET_MACHO)
+    {
+      std_expand_builtin_va_start (valist, nextarg);
+      return;
+    }
+
   cum = &crtl->args.info;
   if (cfun->va_list_gpr_size)
     gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
@@ -19859,6 +20267,9 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   HOST_WIDE_INT size, rsize, adjust, align;
   tree t, u, cond1, cond2;
 
+  if (TARGET_MACHO)
+    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+
   indirect_p = pass_va_arg_by_reference (type);
   if (indirect_p)
     type = build_pointer_type (type);
@@ -20141,6 +20552,9 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v,
   int gr_saved = cfun->va_list_gpr_size;
   int vr_saved = cfun->va_list_fpr_size;
 
+  if (TARGET_MACHO)
+    return;
+
   /* The caller has advanced CUM up to, but not beyond, the last named
      argument.  Advance a local copy of CUM past the last "real" named
      argument, to find out how many registers are left over.  */
@@ -20981,6 +21395,12 @@ aarch64_autovectorize_vector_modes (vector_modes *modes, bool)
 static const char *
 aarch64_mangle_type (const_tree type)
 {
+  /* The darwinpcs ABI documents say that "__va_list" has to be
+     mangled as char *.  */
+  if (TARGET_MACHO
+      && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
+    return "Pc";
+
   /* The AArch64 ABI documents say that "__va_list" has to be
      mangled as if it is in the "std" namespace.  */
   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
@@ -21690,7 +22110,8 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
 
   /* GOT accesses are valid moves.  */
   if (SYMBOL_REF_P (x)
-      && aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G)
+      && (aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G
+	  || aarch64_classify_symbolic_expression (x) == SYMBOL_MO_SMALL_GOT))
     return true;
 
   if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
@@ -22987,7 +23408,9 @@ aarch64_declare_function_name (FILE *stream, const char* name,
   aarch64_asm_output_variant_pcs (stream, fndecl, name);
 
   /* Don't forget the type directive for ELF.  */
+#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
+#endif
   ASM_OUTPUT_LABEL (stream, name);
 
   cfun->machine->label_is_assembled = true;
@@ -23048,12 +23471,17 @@ aarch64_output_patchable_area (unsigned int patch_area_size, bool record_p)
 /* Implement ASM_OUTPUT_DEF_FROM_DECLS.  Output .variant_pcs for aliases.  */
 
 void
-aarch64_asm_output_alias (FILE *stream, const tree decl, const tree target)
+aarch64_asm_output_alias (FILE *stream, const tree decl,
+			  const tree target ATTRIBUTE_UNUSED)
 {
   const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+#ifdef ASM_OUTPUT_DEF
   const char *value = IDENTIFIER_POINTER (target);
+#endif
   aarch64_asm_output_variant_pcs (stream, decl, name);
+#ifdef ASM_OUTPUT_DEF
   ASM_OUTPUT_DEF (stream, name, value);
+#endif
 }
 
 /* Implement ASM_OUTPUT_EXTERNAL.  Output .variant_pcs for undefined
@@ -23526,7 +23954,7 @@ aarch64_init_libfuncs (void)
 static machine_mode
 aarch64_c_mode_for_suffix (char suffix)
 {
-  if (suffix == 'q')
+  if (suffix == 'q' && !TARGET_MACHO)
     return TFmode;
 
   return VOIDmode;
@@ -23677,6 +24105,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
     }
 
   gcc_assert (CONST_INT_P (info.u.mov.value));
+  unsigned HOST_WIDE_INT value = UINTVAL (info.u.mov.value);
+
+  /* We have signed chars which can result in a sign-extended 8bit value
+     which is then emitted as an unsigned hex value, and the LLVM back end
+     assembler rejects that as being too big.  */
+  if (TARGET_MACHO && (known_eq (GET_MODE_BITSIZE (info.elt_mode), 8)))
+    {
+      unsigned HOST_WIDE_INT mask = (1U << GET_MODE_BITSIZE (info.elt_mode))-1;
+      value &= mask;
+    }
 
   if (which == AARCH64_CHECK_MOV)
     {
@@ -23685,16 +24123,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
 		  ? "msl" : "lsl");
       if (lane_count == 1)
 	snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
-		  mnemonic, UINTVAL (info.u.mov.value));
+		  mnemonic, value);
       else if (info.u.mov.shift)
 	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
 		  HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
-		  element_char, UINTVAL (info.u.mov.value), shift_op,
+		  element_char, value, shift_op,
 		  info.u.mov.shift);
       else
 	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
 		  HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
-		  element_char, UINTVAL (info.u.mov.value));
+		  element_char, value);
     }
   else
     {
@@ -23703,12 +24141,12 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
       if (info.u.mov.shift)
 	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
 		  HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
-		  element_char, UINTVAL (info.u.mov.value), "lsl",
+		  element_char, value, "lsl",
 		  info.u.mov.shift);
       else
 	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
 		  HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
-		  element_char, UINTVAL (info.u.mov.value));
+		  element_char, value);
     }
   return templ;
 }
@@ -27655,19 +28093,37 @@ aarch64_sls_emit_shared_blr_thunks (FILE *out_file)
 	continue;
 
       const char *name = indirect_symbol_names[regnum];
-      switch_to_section (get_named_section (decl, NULL, 0));
+      /* If the target uses a unique section for this switch to it.  */
+      if (DECL_SECTION_NAME (decl))
+	switch_to_section (get_named_section (decl, NULL, 0));
+      else
+	switch_to_section (text_section);
       ASM_OUTPUT_ALIGN (out_file, 2);
-      targetm.asm_out.globalize_label (out_file, name);
+      if (!TARGET_MACHO)
+	targetm.asm_out.globalize_label (out_file, name);
+#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+      ASM_OUTPUT_TYPE_DIRECTIVE (out_file, name, "function");
+#endif
+      if (TARGET_MACHO)
+	{
+#ifdef ASM_WEAKEN_DECL
+	  if (DECL_WEAK (decl))
+	    ASM_WEAKEN_DECL (out_file, decl, name, 0);
+	  else
+#endif
+	    targetm.asm_out.globalize_decl_name (out_file, decl);
+	}
       /* Only emits if the compiler is configured for an assembler that can
 	 handle visibility directives.  */
       targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
-      ASM_OUTPUT_TYPE_DIRECTIVE (out_file, name, "function");
       ASM_OUTPUT_LABEL (out_file, name);
       aarch64_sls_emit_function_stub (out_file, regnum);
       /* Use the most conservative target to ensure it can always be used by any
 	 function in the translation unit.  */
       asm_fprintf (out_file, "\tdsb\tsy\n\tisb\n");
+#ifdef ASM_DECLARE_FUNCTION_SIZE
       ASM_DECLARE_FUNCTION_SIZE (out_file, name, decl);
+#endif
     }
 }
 
@@ -27957,6 +28413,15 @@ aarch64_run_selftests (void)
 #undef TARGET_ASM_ALIGNED_SI_OP
 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 
+#if TARGET_MACHO
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
+#endif
+
 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
   hook_bool_const_tree_hwi_hwi_const_tree_true
@@ -28043,6 +28508,11 @@ aarch64_run_selftests (void)
 #undef TARGET_FUNCTION_ARG_BOUNDARY
 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
 
+#if TARGET_MACHO
+#undef  TARGET_FUNCTION_ARG_ROUND_BOUNDARY
+#define TARGET_FUNCTION_ARG_ROUND_BOUNDARY aarch64_function_arg_round_boundary
+#endif
+
 #undef TARGET_FUNCTION_ARG_PADDING
 #define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
 
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index a01f1ee99d85..2c450de57351 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -65,6 +65,10 @@
 #define TARGET_SIMD (AARCH64_ISA_SIMD)
 #define TARGET_FLOAT (AARCH64_ISA_FP)
 
+/* If this is non-zero then generated code of the object format, ABI and
+   assembler syntax used by Darwin (Mach-O) platforms.  */
+#define TARGET_MACHO		0
+
 #define UNITS_PER_WORD		8
 
 #define UNITS_PER_VREG		16
@@ -934,6 +938,12 @@ typedef struct
 				   aapcs_reg == NULL_RTX.  */
   int aapcs_stack_size;		/* The total size (in words, per 8 byte) of the
 				   stack arg area so far.  */
+  int darwinpcs_stack_bytes;	/* If the argument is passed on the stack, this
+				   the byte-size.  */
+  int darwinpcs_sub_word_offset;/* This is the offset of this arg within a word
+				   when placing smaller items for darwinpcs.  */
+  int darwinpcs_sub_word_pos;	/* The next byte available within the word for
+				   darwinpcs.  */
   bool silent_p;		/* True if we should act silently, rather than
 				   raise an error for invalid calls.  */
 } CUMULATIVE_ARGS;
@@ -1232,8 +1242,13 @@ extern const char *aarch64_rewrite_mcpu (int argc, const char **argv);
 #define ASM_CPU_SPEC \
    MCPU_TO_MARCH_SPEC
 
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
 #define EXTRA_SPECS						\
-  { "asm_cpu_spec",		ASM_CPU_SPEC }
+  { "asm_cpu_spec",		ASM_CPU_SPEC },			\
+  SUBTARGET_EXTRA_SPECS
 
 #define ASM_OUTPUT_POOL_EPILOGUE  aarch64_asm_output_pool_epilogue
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 01cf989641fc..b3509049e3f4 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -295,6 +295,7 @@
     UNSPEC_LD1RO
     UNSPEC_SALT_ADDR
     UNSPECV_PATCHABLE_AREA
+    UNSPEC_MACHOPIC_OFFSET
 ])
 
 (define_c_enum "unspecv" [
@@ -844,6 +845,37 @@
   [(set_attr "type" "load_4")]
 )
 
+(define_insn "prefetch_unscaled"
+  [(prefetch (match_operand:DI 0 "aarch64_unscaled_prefetch_operand" "Du")
+            (match_operand:QI 1 "const_int_operand" "")
+            (match_operand:QI 2 "const_int_operand" ""))]
+  ""
+  {
+    const char * pftype[2][4] =
+    {
+      {"prfum\\tPLDL1STRM, %0",
+       "prfum\\tPLDL3KEEP, %0",
+       "prfum\\tPLDL2KEEP, %0",
+       "prfum\\tPLDL1KEEP, %0"},
+      {"prfum\\tPSTL1STRM, %0",
+       "prfum\\tPSTL3KEEP, %0",
+       "prfum\\tPSTL2KEEP, %0",
+       "prfum\\tPSTL1KEEP, %0"},
+    };
+
+    int locality = INTVAL (operands[2]);
+
+    gcc_assert (IN_RANGE (locality, 0, 3));
+
+    /* PRFUM accepts the same addresses as a 64-bit LDR so wrap
+       the address into a DImode MEM so that aarch64_print_operand knows
+       how to print it.  */
+    operands[0] = gen_rtx_MEM (DImode, operands[0]);
+    return pftype[INTVAL(operands[1])][locality];
+  }
+  [(set_attr "type" "load_4")]
+)
+
 (define_insn "trap"
   [(trap_if (const_int 1) (const_int 8))]
   ""
@@ -1281,7 +1313,7 @@
      [w  , m  ; load_4   , fp  , 4] ldr\t%s0, %1
      [m  , r Z; store_4  , *   , 4] str\t%w1, %0
      [m  , w  ; store_4  , fp  , 4] str\t%s1, %0
-     [r  , Usw; load_4   , *   , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
+     [r  , Usw; load_4   , *   , 8] << TARGET_MACHO ? \"adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %O1]\" : \"adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]\";
      [r  , Usa; adr      , *   , 4] adr\t%x0, %c1
      [r  , Ush; adr      , *   , 4] adrp\t%x0, %A1
      [w  , r Z; f_mcr    , fp  , 4] fmov\t%s0, %w1
@@ -1315,7 +1347,7 @@
      [w, m  ; load_8   , fp  , 4] ldr\t%d0, %1
      [m, r Z; store_8  , *   , 4] str\t%x1, %0
      [m, w  ; store_8  , fp  , 4] str\t%d1, %0
-     [r, Usw; load_8   , *   , 8] << TARGET_ILP32 ? "adrp\t%0, %A1;ldr\t%w0, [%0, %L1]" : "adrp\t%0, %A1;ldr\t%0, [%0, %L1]";
+     [r, Usw; load_8   , *   , 8] << TARGET_ILP32 ? (TARGET_MACHO ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %O1]\" : \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\") : (TARGET_MACHO ? \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %O1]\" : \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\");
      [r, Usa; adr      , *   , 4] adr\t%x0, %c1
      [r, Ush; adr      , *   , 4] adrp\t%x0, %A1
      [w, r Z; f_mcr    , fp  , 4] fmov\t%d0, %x1
@@ -7108,7 +7140,10 @@
 	(lo_sum:P (match_operand:P 1 "register_operand" "r")
 		  (match_operand 2 "aarch64_valid_symref" "S")))]
   ""
-  "add\\t%<w>0, %<w>1, :lo12:%c2"
+  { return TARGET_MACHO
+    ? "add\\t%<w>0, %<w>1, %K2;momd"
+    : "add\\t%<w>0, %<w>1, :lo12:%c2";
+  }
   [(set_attr "type" "alu_imm")]
 )
 
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 4a0580435a8d..623856db7d7f 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -193,6 +193,13 @@ Enum(aarch64_abi) String(ilp32) Value(AARCH64_ABI_ILP32)
 EnumValue
 Enum(aarch64_abi) String(lp64) Value(AARCH64_ABI_LP64)
 
+EnumValue
+Enum(aarch64_abi) String(darwinpcs) Value(AARCH64_ABI_LP64)
+
+m64
+Target RejectNegative Alias(mabi=, darwinpcs)
+On Darwin for compatibility with other platform variants.
+
 mpc-relative-literal-loads
 Target Save Var(pcrelative_literal_loads) Init(2) Save
 PC relative literal loads.
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 6df1dbec2a80..5a2bbaafe4ea 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -168,7 +168,9 @@
    A constraint that matches a small GOT access."
   (and (match_code "const,symbol_ref")
        (match_test "aarch64_classify_symbolic_expression (op)
-		     == SYMBOL_SMALL_GOT_4G")))
+		     == SYMBOL_SMALL_GOT_4G
+		    || aarch64_classify_symbolic_expression (op)
+		     == SYMBOL_MO_SMALL_GOT")))
 
 (define_constraint "Uss"
   "@internal
@@ -505,6 +507,11 @@
  An address valid for a prefetch instruction."
  (match_test "aarch64_address_valid_for_prefetch_p (op, true)"))
 
+(define_address_constraint "Du"
+  "@internal
+ An address valid for a prefetch instruction with an unscaled offset."
+ (match_test "aarch64_address_valid_for_unscaled_prefetch_p (op, true)"))
+
 (define_constraint "vgb"
   "@internal
    A constraint that matches an immediate offset valid for SVE LD1B
diff --git a/gcc/config/aarch64/darwin.h b/gcc/config/aarch64/darwin.h
new file mode 100644
index 000000000000..4a3608014b86
--- /dev/null
+++ b/gcc/config/aarch64/darwin.h
@@ -0,0 +1,270 @@
+/* Target definitions for Arm64/Aarch64 running on macOS/iOS.
+
+Copyright The GNU Toolchain Authors.
+Contributed by Iain Sandoe.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Enable Mach-O bits in generic Aarch64 code.  */
+#undef TARGET_MACHO
+#define TARGET_MACHO 1
+
+#undef DARWIN_ARM64
+#define DARWIN_ARM64 1
+
+/* FIXME FIXME FIXME - these are mostly guesses right now.  */
+
+/* FIXME: this is only used in generic code in darwin.c.  */
+#undef TARGET_64BIT
+#define TARGET_64BIT 1
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* NOTE that arm64_32 is a valid thing and corresponds to darwinpcs
+   and TARGET_ILP32, but we are not implementing that for now.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do {						\
+    builtin_define ("__LITTLE_ENDIAN__");	\
+    builtin_define ("__arm64");			\
+    builtin_define ("__arm64__");		\
+    darwin_cpp_builtins (pfile);		\
+  } while (0)
+
+/* In Darwin's arm64 ABI, chars are signed, for consistency with other Darwin
+   architectures.  */
+
+#undef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* Non-PIE executables are forbidden by the aarch64-darwin security model;
+   remove the option from link-lines since they just produce a warning from
+   ld64 and are then ignored anyway.  */
+#undef DARWIN_NOPIE_SPEC
+#define DARWIN_NOPIE_SPEC \
+" %<no-pie %<fno-pie %<fno-PIE "
+
+/* Hack alert - we want the exported cas etc.  */
+#undef LIB_SPEC
+#define LIB_SPEC "%{!static:-lSystem} -lgcc"
+
+/* Force the default endianness and ABI flags onto the command line
+   in order to make the other specs easier to write.  Match clang in
+   silently ignoring mdynamic-no-pic */
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS \
+"%{mbig-endian:%eDarwin platforms do not support big-endian arm64}" \
+"%{!mlittle-endian:-mlittle-endian} " \
+"%{mabi=ilp32:%eSorry, support for Darwin ilp32 arm64 is not implemented} " \
+"%{!mabi=*:-mabi=lp64} " \
+" %<mdynamic-no-pic* " \
+  MCPU_MTUNE_NATIVE_SPECS \
+  SUBTARGET_DRIVER_SELF_SPECS
+
+/* We want -fPIC by default, unless we're using -static to compile for
+   the kernel or some such.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC \
+"%{!mkernel:%{!static:-fPIC}} " DARWIN_CC1_SPEC
+
+#undef ASM_SPEC
+#define ASM_SPEC "-arch %(darwin_arch) "\
+  ASM_OPTIONS " %{static} " ASM_MMACOSX_VERSION_MIN_SPEC
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  " " TM_DESTRUCTOR
+
+/* The arch is known as 'arm64' by the system tools.  */
+#define DARWIN_ARCH_SPEC "arm64"
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS					\
+  DARWIN_EXTRA_SPECS						\
+  { "darwin_arch", DARWIN_ARCH_SPEC },				\
+  { "darwin_crt2", "" },					\
+  { "darwin_subarch", DARWIN_ARCH_SPEC },
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END darwin_file_end
+
+/* For now, we do not give global entities any extra alignment
+   TODO: determine if we should for some optimisation level.  */
+#undef DATA_ALIGNMENT
+#define DATA_ALIGNMENT(EXP, ALIGN)			\
+  AARCH64_EXPAND_ALIGNMENT (false, EXP, ALIGN)
+
+/* Darwin binds locally for PIC code (the default) without which
+   we lose many in-lineing opportunities.  */
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
+
+/* Define the syntax of pseudo-ops, labels and comments.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* Support for -falign-* switches.  Use .p2align to ensure that code
+   sections are padded with NOP instructions, rather than zeros.  */
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP)		\
+  do								\
+    {								\
+      if ((LOG) != 0)						\
+	{							\
+	  if ((MAX_SKIP) == 0)					\
+	    fprintf ((FILE), "\t.p2align %d\n", (int) (LOG));	\
+	  else							\
+	    fprintf ((FILE), "\t.p2align %d,,%d\n",		\
+		     (int) (LOG), (int) (MAX_SKIP));		\
+	}							\
+    } while (0)
+
+#endif /* HAVE_GAS_MAX_SKIP_P2ALIGN */
+
+/* String containing the assembler's comment-starter.  */
+
+#define ASM_COMMENT_START ";"
+
+/* Define the syntax of pseudo-ops, labels and comments.  */
+
+#define LPREFIX "L"
+
+/* Assembler pseudos to introduce constants of various size.  */
+
+#define ASM_BYTE "\t.byte\t"
+#define ASM_SHORT "\t.word\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t"
+
+/* darwinpcs reserves X18.  */
+
+#undef FIXED_REGISTERS
+#define FIXED_REGISTERS					\
+  {							\
+    0, 0, 0, 0,   0, 0, 0, 0,	/* R0 - R7 */		\
+    0, 0, 0, 0,   0, 0, 0, 0,	/* R8 - R15 */		\
+    0, 0, 1, 0,   0, 0, 0, 0,	/* R16 - R23 */		\
+    0, 0, 0, 0,   0, 1, 0, 1,	/* R24 - R30, SP */	\
+    0, 0, 0, 0,   0, 0, 0, 0,   /* V0 - V7 */           \
+    0, 0, 0, 0,   0, 0, 0, 0,   /* V8 - V15 */		\
+    0, 0, 0, 0,   0, 0, 0, 0,   /* V16 - V23 */         \
+    0, 0, 0, 0,   0, 0, 0, 0,   /* V24 - V31 */         \
+    1, 1, 1, 1,			/* SFP, AP, CC, VG */	\
+    0, 0, 0, 0,   0, 0, 0, 0,   /* P0 - P7 */           \
+    0, 0, 0, 0,   0, 0, 0, 0,   /* P8 - P15 */          \
+    1, 1			/* FFR and FFRT */	\
+  }
+
+/* Although we cannot use executable stack, we still need to assign
+   a static chain regnum.  At the moment using R16 (IP0) is available.  */
+#undef STATIC_CHAIN_REGNUM
+#define STATIC_CHAIN_REGNUM	R16_REGNUM
+
+#define SUBTARGET_ENCODE_SECTION_INFO  darwin_encode_section_info
+
+#undef ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  if (TARGET_64BIT)							\
+    {									\
+      if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_pcrel)		\
+	{								\
+	  fputs (ASM_LONG, FILE);					\
+	  assemble_name (FILE, XSTR (ADDR, 0));				\
+	  fputs ("@GOT-.", FILE);					\
+	  goto DONE;							\
+	}								\
+    }									\
+  else									\
+    {									\
+      if (ENCODING == ASM_PREFERRED_EH_DATA_FORMAT (2, 1))		\
+	{								\
+	  gcc_unreachable (); /* no 32b support yet.*/			\
+	  /*darwin_non_lazy_pcrel (FILE, ADDR);*/			\
+	  goto DONE;							\
+	}								\
+    }
+
+/* Darwin x86 assemblers support the .ident directive.  */
+
+#undef TARGET_ASM_OUTPUT_IDENT
+#define TARGET_ASM_OUTPUT_IDENT default_asm_output_ident_directive
+
+/* Darwin has experimental support for section anchors on aarch64*; it is
+   not enabled by default (the -fsection-anchors is required).  */
+
+#undef TARGET_ASM_OUTPUT_ANCHOR
+#define TARGET_ASM_OUTPUT_ANCHOR darwin_asm_output_anchor
+
+#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
+#define TARGET_USE_ANCHORS_FOR_SYMBOL_P darwin_use_anchors_for_symbol_p
+
+#undef DARWIN_SECTION_ANCHORS
+#define DARWIN_SECTION_ANCHORS 1
+
+/* Pull in the stuff common to all Darwin-based platforms.  */
+#define C_COMMON_OVERRIDE_OPTIONS				\
+  do {								\
+    SUBTARGET_C_COMMON_OVERRIDE_OPTIONS;			\
+  } while (0)
+
+/* We do not have a definition for a tiny (or large) code model so
+   far.
+   Section anchors are (probably) not useful with ld64 atom model so
+   default them off - this can be overridden by the user at present.
+   mdynamic-no-pic is silently ignored by clang (and not applicable
+   to this port).  */
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+  do {									\
+    if (global_options.x_aarch64_cmodel_var == AARCH64_CMODEL_TINY)	\
+      sorry ("code model %qs is not supported on Darwin platforms",	\
+	     "tiny");							\
+    if (!global_options_set.x_flag_section_anchors)			\
+      flag_section_anchors = 0;						\
+    target_flags &= ~MASK_MACHO_DYNAMIC_NO_PIC;				\
+  } while (0); 								\
+  SUBSUBTARGET_OVERRIDE_OPTIONS
+
+#undef  SUBTARGET_INIT_BUILTINS
+#define SUBTARGET_INIT_BUILTINS						\
+  do {									\
+    aarch64_builtin_decls[AARCH64_BUILTIN_CFSTRING]			\
+      = darwin_init_cfstring_builtins ((AARCH64_BUILTIN_CFSTRING << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL); \
+  } while(0)
+
+/* Darwin on Arm64 uses dwarf-2.  */
+#ifndef DARWIN_PREFER_DWARF
+# undef PREFERRED_DEBUGGING_TYPE
+# define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#endif
+
+#undef REGISTER_SUBTARGET_PRAGMAS
+#define REGISTER_SUBTARGET_PRAGMAS() DARWIN_REGISTER_TARGET_PRAGMAS()
+
+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES darwin_set_default_type_attributes
+
+/* FIXME:  CHECK Define the shadow offset for asan.  */
+#undef SUBTARGET_SHADOW_OFFSET
+#define SUBTARGET_SHADOW_OFFSET (HOST_WIDE_INT_1 << 44)
+
+/* First available SYMBOL flag bit for use by subtargets.  */
+#define SYMBOL_FLAG_SUBT_DEP (SYMBOL_FLAG_MACH_DEP)
+
+#undef ASM_OUTPUT_DEF_FROM_DECLS
diff --git a/gcc/config/aarch64/darwinpcs.md b/gcc/config/aarch64/darwinpcs.md
new file mode 100644
index 000000000000..9b897472ea1b
--- /dev/null
+++ b/gcc/config/aarch64/darwinpcs.md
@@ -0,0 +1,455 @@
+# The Darwin ABI (darwinpcs) for AArch64 (Arm64) Mach-O
+
+## Introduction.
+
+This describes the Darwin PCS (darwinpcs) as implemented in GCC-12.
+
+Base information is taken from:
+
+[1] [ARM IHI 0055B : AAPCS64 (current through AArch64 ABI release 1.0, dated 22nd May 2013)](http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf)
+
+[2] [Apple iOS document : darwinpcs](https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html#//apple_ref/doc/uid/TP40013702-SW1)
+
+[3] [The LLVM backend for Mach-O/arm64 from GIT (master 12 at the document date).](https://github.com/llvm/llvm-project.git)
+
+## Terminology
+
+**Darwin** is the kernel used for macOS (10/OSX and 11) and iOS (all versions,
+so far).
+
+**Mach-O** is the file format used for object files and DSOs, including executables
+on Darwin platforms (to some extent, the two names are interchangeable in
+describing rules applicable to a back end).
+
+The `AArch64` port for Darwin is known as `arm64` (`arm64` is synonymous with
+`aarch64` for Darwin in GCC).   There is an ILP32 variant, `arm64_32` (not yet
+considered in detail or handled by these branches).
+
+There are two main technical sections
+
+* Part 1 which describes the darwinpcs deviations from AAPCS64.
+
+   This is a primarily cross-reference between [2] and [1].
+
+* Part 2 contains additional pertinent information.
+
+   Some is recorded in [2] but most is determined from generic Darwin/Mach-O
+   rules and the implementation [3].
+
+* Part 3 describes the deviations from darwinpcs present in GCC.
+
+   GCC implements some features not available ib the host tools which are
+   based on LLVM/clang.
+
+   GCC does not yet (as of GCC-12.1) support all parts of the darwinpcs as
+   written here.
+
+**Note** Since a macOS or iOS system is built with a toolchain based on [3],
+that sets the de facto ABI.  Therefore, the ABI as implemented by the XCode
+version appropriate to a given OS release shall take precedence over version(s)
+described in the referenced documents in the event of discrepancy.
+
+## PART 1 - AAPCS64 and darwinpcs.
+
+### Outline
+
+Darwin PCS Differences from AAPCS64.
+
+The intent of these notes are to match the differences described in [2] against
+the section numbers and rule designations of [1] since the AArch64 port code
+uses the rule designations in comments.
+
+The organisation of these notes is by section heading of [1].
+
+[2] Refers to the darwinpcs as "iOS" which was the first Darwin OS variant
+implementing it, however it is stated (albeit unofficially?) that Arm64 macOS
+will adopt the same ABI and is expected to be able to execute iOS executables.
+
+So, for the present, 'iOS' is considered to be equivalent to 'macOS'
+(generically 'Darwin').
+
+In the text from [2] the expression "generic procedure call standard" refers
+to the AAPCS64 [1].
+
+Darwin PCS rules are designated `D.N` below.
+
+### AAPCS64 Section 1.
+
+No amendments.
+
+### AAPCS64 Section 2.
+
+No amendments.
+
+### AAPCS64 Section 3.
+
+No amendments.
+
+The darwinpcs is non-conforming with the aapcs64 in the areas described below.
+
+### AAPCS64 Section 4.
+
+No amendments
+
+but note:
+* Darwin's `char` and `wchar_t` are both signed.
+* Where applicable, the `__fp16 type` is `IEEE754-2008` format.
+
+### AAPCS64 Section 5.
+
+5.1 Machine Registers
+5.1.1 General-purpose Registers
+
+Darwin reserves `x18` as the platform register (as permitted).
+
+5.2 Processes, Memory and the Stack
+5.2.3 The Frame Pointer
+
+From [2] : The frame pointer register (x29) must always address a valid frame
+record, although some functions—such as leaf functions or tail calls—may elect
+not to create an entry in this list.
+
+This corresponds to the first bullet and is conforming.  It implies that Darwin
+should warn if the user tries to use an option that omits the FP.
+
+5.4 Parameter Passing
+5.4.2 Parameter Passing Rules
+
+`D.1` From [2] : Empty struct types are ignored for parameter-passing purposes.
+This behavior applies to the GNU extension in C and, where permitted by the
+language, in C++.
+
+It is noted that this might not correspond to any specific rule - but,
+presumably, needs to be applied in marshalling arguments.
+
+`D.2` From [2] : In the generic procedure call standard, all function arguments
+passed on the stack consume slots in multiples of 8 bytes. In iOS, this
+requirement is dropped, and values consume only the space required.   Padding
+is still inserted on the stack to satisfy arguments’ alignment requirements.
+
+`D.3` From [2] : The general ABI specifies that it is the callee’s responsibility
+to sign or zero-extend arguments having fewer than 32 bits, and that unused bits
+in a register are unspecified. In iOS, however, the caller must perform such
+extensions, up to 32 bits.  This apparently conflicts with the `D.2` above and
+thus can only be applied to values passed in registers?
+
+(notwithstanding C rules for widening).
+
+`D.4` From [2] : The generic procedure call standard requires that arguments
+with 16-byte alignment passed in integer registers begin at an even-numbered
+xN, skipping a previous odd-numbered xN if necessary. The iOS ABI drops this
+requirement.
+
+#### Variadic Functions
+
+From [2]:
+The iOS ABI for functions that take a variable number of arguments is entirely
+different from the generic version.
+
+Stages A and B of the generic procedure call standard are performed as usual.
+in particular, even variadic aggregates larger than 16 bytes are passed via a
+reference to temporary memory allocated by the caller. After that, the fixed
+arguments are allocated to registers and stack slots as usual in iOS.
+
+The NSRN(*sic*) (?NSAA was intended?) is then rounded up to the next multiple
+of 8 bytes, and each variadic argument is assigned to the appropriate number
+of 8-byte stack slots.
+
+The C language requires arguments smaller than int to be promoted before a call,
+but beyond that, unused bytes on the stack are not specified by the darwinpcs.
+
+(see section 7) As a result of this change, the type va_list is an alias for
+char * rather than for the struct type specified in the generic PCS.
+It is also not in the std namespace when compiling C++ code.
+
+#### Stage A
+
+No Changes.
+
+#### Stage B
+
+No changes.
+
+#### Stage C
+
+Insert C.6.5 `D.3`
+If the argument is an Integral or Pointer Type, the size of the argument is
+less than 4 bytes and the NGRN is less than 8, the argument is sign or zero-
+extended as appropriate to 4 bytes.
+
+C.8 Delete rule per `D.4`
+
+C.12
+`D.2` Amend to:
+The NSAA is rounded up to Natural Alignment of the argument’s type.
+
+C.14 Delete rule per `D.2`
+
+From the observations section:
+"Both before and after the layout of each argument, then NSAA will have a
+ minimum alignment of 8."  This no longer applies.
+
+### AAPCS64 Section 6.
+
+No changes (noting that the `__fp16` type is `IEEE754-2008` format.)
+
+### AAPCS64 Section 7.
+
+7.1 Data Types
+7.1.1 Arithmetic Types
+
+Table 3 is amended thus.
+
+| C/C++ Type | Machine Type | Notes |
+| --- | --- | --- |
+| char | signed byte | compatible with other Darwin variants  |
+| wchar_t | int | ditto |
+
+**The size of long double (and by implication the _Imaginary and _Complex C99
+variants) is set to be the same as double.**
+
+We need to consider **_very_** carefully how to handle this.  The current
+(64bit) long double is mangled as 'e' by the clang toolchain.
+
+7.1.4 Additional Types
+
+Table 5 is amended to reflect the different variadic function rules.
+
+`D.6` The type `va_list` is an alias for `char *`
+`D.7` The `va_list` type is _not_ presented in `std::` for C++.
+
+7.2 Argument Passing Conventions
+
+Possibly, might require adjustment for `D.3`?
+
+## PART 2 - Other platform information
+
+### Additional comments from [2]
+
+#### Red Zone
+
+The ARM64 iOS red zone consists of the 128 bytes immediately below the stack
+pointer sp. As with the x86-64 ABI, the operating system has committed not to
+modify these bytes during exceptions. User-mode programs can rely on them not
+to change unexpectedly, and can potentially make use of the space for local
+variables.
+
+In some circumstances, this approach can save an sp-update instruction on
+function entry and exit.
+
+At present, it seems, that there's no port using a red zone for AArch64, and
+there's no implementation - so this optimisation opportunity will be unused
+at least initially.  TODO.
+
+#### Divergences from the Generic C++ ABI
+
+The generic ARM64 C++ ABI is specified in C++ Application Binary Interface
+Standard for the ARM 64-bit architecture, which is in turn based on the
+Itanium C++ ABI used by many UNIX-like systems.
+
+Some sections are ELF-specific and not applicable to the underlying object
+format used by iOS. There are, however, some significant differences from
+these specifications in iOS.
+
+##### Name Mangling
+
+When compiling C++ code, types get incorporated into the names of functions
+in a process referred to as “mangling.” The iOS ABI differs from the generic
+specification in the following small ways.
+
+Because `va_list` is an alias for `char *`, it is mangled in the same way—as
+`Pc` instead of `St9__va_list` (Section 7).
+
+NEON vector types are mangled in the same way as their 32-bit ARM counterparts,
+rather than using the 64-bit scheme. For example, iOS uses `17__simd128_int32_t`
+instead of the generic `11__Int32x4_t`.
+
+##### Other Itanium Divergences
+
+In the generic ABI, empty structs are treated as aggregates with a single byte
+member for parameter passing. In iOS, however, they are ignored unless they
+have a nontrivial destructor or copy-constructor. If they do have such
+functions, they are considered as aggregates with one byte member in the
+generic manner.
+
+As with the ARM 32-bit C++ ABI, iOS requires the complete-object (C1) and base-
+object (C2) constructors to return this to their callers. Similarly, the
+complete object (D1) and base object (D2) destructors return this. This
+requirement is not made by the generic ARM64 C++ ABI.
+
+In the generic C++ ABI, array cookies change their size and alignment according
+to the type being allocated. As with the 32-bit ARM, iOS provides a fixed
+layout of two size_t words, with no extra alignment requirements.
+
+In iOS, object initialization guards are nominally `uint64_t` rather than
+`int64_t`.
+This affects the prototypes of the functions `__cxa_guard_acquire`,
+`__cxa_guard_release` and `__cxa_guard_abort`.
+
+In the generic ARM64 ABI, function pointers whose type differ only in being
+extern "C" or extern "C++" are interchangeable. This is not the case in iOS.
+
+### Undocumented items
+
+* The platform ABI contains provisions for the swift language, but since GCC
+has no swift FE there's no need to implement them (it might be wise to ensure
+that any reserved registers are handled appropriately tho)
+
+* Darwin user-space code is PIC (2) = fPIC (so nominally 'large' but the code
+model is not modified by the PIC setting [I think FIXME: check]).
+
+FIXME: ??? I'm not clear about kernel mode at present.
+
+The following symbol kinds always have a GOT indirection for Mach-O-pic.
+
+* undefined external
+* weak [not hidden]
+* common
+
+FIXME: check other rules for GOT indirections.
+
+### Darwin code models
+
+* TINY is _not_ supported
+* SMALL supported (DEFAULT)
+* LARGE supported
+
+   AFAICT, Darwin's large model is PIC (with perhaps a very limited number of
+   modes)
+   However large+PIC is stated to be unimplemented in the current aarch64
+   backend so that's a TODO.
+
+FIXME: ??? I'm not clear about kernel mode at present.
+
+### Darwin arm64 TLS
+
+Darwin has a single TLS model (not attempting to implement in the short-
+term).  It's closest to ELF xxxxxxx FIXME: which one?
+
+### Generic Darwin/Mach-O Comments for people familiar with ELF.
+
+Darwin
+* does _not_ support strong symbol aliases
+* does support weak symbol aliases
+* supports visibility - default and hidden.
+* Has a "for linker only" symbol visibility.
+
+   Such symbols are visible to the static linker (`ld64`), but not externally.
+  These are used to support the Mach-O "subsections_by_symbol" linker mode
+  (default for > 10years).  Any symbol that is not 'global' and does not begin
+  with 'L' (the local symbol designation) is counted as 'linker visible'.
+ * does _not_ support 'static' code in the user space
+
+   Everything needs to be invoked using the dynamic linker (`dyld`).  There is
+  neither crt0.o nor a static edition of libc.
+
+Some versions of Darwin have used 'static' code for kernel modules.
+FIXME: ??? what is the kernel model here.
+
+### Darwin Relocations and Assembler syntax
+
+* `Mach-O` for `Arm64` uses a reduced set of relocations c.f. the ELF set.
+
+   There are only 11 entries but the relocation format allows for multiple sizes
+(1, 2, 4, 8) where that's appropriate, and for ancillary data (e.g. a scale),
+so the actual number of permutations is larger.
+
+* Generally, Darwin supports relocations of the form A - B + signed const
+
+   A must be known (i.e. defined in the current TU).
+
+* `Mach-O` for `Arm64` has postfix assembler syntax.
+
+   Where there's an assembly language representation for the relocation type
+   it appears after the name (e.g. `foo@PAGE` in contrast to the ELF
+   `:got:foo`).
+
+#### Relocs list
+
+For pointers (no source representation).
+
+`ARM64_RELOC_UNSIGNED = 0`
+
+Must be followed by an `ARM64_RELOC_UNSIGNED`
+
+`ARM64_RELOC_SUBTRACTOR = 1`
+
+A B/BL instruction with 26-bit displacement.
+(no source representation)
+
+`ARM64_RELOC_BRANCH26 = 2`
+
+PC-rel distance to page of target [adrp].
+
+`foo@PAGE`
+
+`ARM64_RELOC_PAGE21 = 3`
+
+Offset within page, scaled by r_length [add imm, ld/st].
+
+`foo@PAGEOFF`
+
+`ARM64_RELOC_PAGEOFF12 = 4`
+
+PC-rel distance to page of GOT slot [adrp].
+
+`foo@GOTPAGE`
+`ARM64_RELOC_GOT_LOAD_PAGE21 = 5`
+
+Offset within page of GOT slot, scaled by r_length [add imm, ld/st].
+
+`foo@GOTPAGEOFF`
+
+`ARM64_RELOC_GOT_LOAD_PAGEOFF12 = 6`
+
+
+For pointers to GOT slots.
+(4 and 8 byte versions)
+
+`foo@GOT`
+
+`ARM64_RELOC_POINTER_TO_GOT = 7`
+
+
+PC-rel distance to page of TLVP slot [adrp].
+
+`foo@TVLPPAGE`
+
+`ARM64_RELOC_TLVP_LOAD_PAGE21 = 8`
+
+Offset within page of TLVP slot, scaled by r_length [add imm, ld/st].
+
+`foo@TVLPPAGEOFF`
+
+`ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9`
+
+Must be followed by `ARM64_RELOC_PAGE21` or `ARM64_RELOC_PAGEOFF12`.
+(no source representation)
+
+The addend is a signed 24bit quantity (+/- 8M range).
+
+`ARM64_RELOC_ADDEND = 10`
+
+## PART 2 - GCC-12 deviations from the PCS and supporting information.
+
+### D.3 is not yet supported (github issue #74)
+
+  GCC promotes in the callee not the caller.
+
+### Support for nested functions
+
+  GCC provides nested functions which are used overtly from C but also to
+  implement some parts of Ada and Fortran.
+
+  This requires assigning a register to act as the STATIC CHAIN.
+  For GCC-12 this is X16
+
+  Support for nested function trampolines is provided by a heap-based table.
+
+### Support for __float128
+
+  The darwinpcs has no provision for a 128bit float type.
+  GCC-12 supports IEEE741 128bit float values by sof-float.
+  The ABI used for __float128 matches that for AAPCS64
+
+## End.
diff --git a/gcc/config/aarch64/falkor-tag-collision-avoidance.cc b/gcc/config/aarch64/falkor-tag-collision-avoidance.cc
index 39e3f5c2d1ba..78790cd1d155 100644
--- a/gcc/config/aarch64/falkor-tag-collision-avoidance.cc
+++ b/gcc/config/aarch64/falkor-tag-collision-avoidance.cc
@@ -740,7 +740,7 @@ dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
 		void *unused ATTRIBUTE_UNUSED)
 {
   gcc_assert (dump_file);
-  fprintf (dump_file, "Tag 0x%lx ::\n", INTVAL (t));
+  fprintf (dump_file, "Tag 0x%lx ::\n", (long unsigned int)INTVAL (t));
 
   for (unsigned i = 0; i < insn_info.length (); i++)
     dump_insn_slim (dump_file, insn_info[i]->insn);
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index d5a4a1cd9bf8..3ae5aae646b7 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -277,9 +277,24 @@
 (define_predicate "aarch64_prefetch_operand"
   (match_test "aarch64_address_valid_for_prefetch_p (op, false)"))
 
+(define_predicate "aarch64_unscaled_prefetch_operand"
+  (match_test "aarch64_address_valid_for_unscaled_prefetch_p (op, false)"))
+
 (define_predicate "aarch64_valid_symref"
   (match_code "const, symbol_ref, label_ref")
 {
+  if (TARGET_MACHO)
+    {
+      rtx x = op;
+      rtx offset;
+      split_const (x, &x, &offset);
+      if (GET_CODE (x) == CONST)
+	x = XEXP (x, 0);
+      if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SALT_ADDR)
+	x = XVECEXP (x, 0, 0);
+      if (SYMBOL_REF_P (x) && INTVAL (offset) < 0)
+        return false;
+    }
   return (aarch64_classify_symbolic_expression (op)
 	  != SYMBOL_FORCE_TO_MEM);
 })
diff --git a/gcc/config/aarch64/t-aarch64-darwin b/gcc/config/aarch64/t-aarch64-darwin
new file mode 100644
index 000000000000..9754e87ebcf0
--- /dev/null
+++ b/gcc/config/aarch64/t-aarch64-darwin
@@ -0,0 +1,25 @@
+# Machine description for AArch64 architecture.
+#  Copyright (C) 2020 Free Software Foundation, Inc.
+#
+#  This file is part of GCC.
+#
+#  GCC is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 3, or (at your option)
+#  any later version.
+#
+#  GCC is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with GCC; see the file COPYING3.  If not see
+#  <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC   = aarch64/lib1funcs.asm
+LIB1ASMFUNCS = _aarch64_sync_cache_range
+
+# FIXME - figure out what multilib provisions we should make for
+# a) arm64e
+# b) arm64_32
diff --git a/gcc/config/darwin-driver.cc b/gcc/config/darwin-driver.cc
index cdfcac93b008..a4d7cfe73764 100644
--- a/gcc/config/darwin-driver.cc
+++ b/gcc/config/darwin-driver.cc
@@ -268,10 +268,13 @@ darwin_driver_init (unsigned int *decoded_options_count,
   bool seenX86_64 = false;
   bool seenPPC = false;
   bool seenPPC64 = false;
+#if !DARWIN_ARM64
+  bool seenArm64 = false;
   bool seenM32 = false;
   bool seenM64 = false;
   bool appendM32 = false;
   bool appendM64 = false;
+#endif
   const char *vers_string = NULL;
   bool seen_version_min = false;
   bool seen_sysroot_p = false;
@@ -300,6 +303,12 @@ darwin_driver_init (unsigned int *decoded_options_count,
 	    seenPPC = true;
 	  else if (!strcmp ((*decoded_options)[i].arg, "ppc64"))
 	    seenPPC64 = true;
+	  else if (!strcmp ((*decoded_options)[i].arg, "arm64"))
+#if !DARWIN_ARM64
+	    seenArm64 = true;
+#else
+	    ; /* We accept the option, but don't need to act on it.  */
+#endif
 	  else
 	    error ("this compiler does not support %qs",
 		   (*decoded_options)[i].arg);
@@ -313,7 +322,7 @@ darwin_driver_init (unsigned int *decoded_options_count,
 	  --i;
 	  --*decoded_options_count; 
 	  break;
-
+#if !DARWIN_ARM64
 	case OPT_m32:
 	  seenM32 = true;
 	  break;
@@ -321,6 +330,7 @@ darwin_driver_init (unsigned int *decoded_options_count,
 	case OPT_m64:
 	  seenM64 = true;
 	  break;
+#endif
 
 	case OPT_mmacosx_version_min_:
 	  seen_version_min = true;
@@ -378,6 +388,9 @@ darwin_driver_init (unsigned int *decoded_options_count,
   if (seenPPC || seenPPC64)
     warning (0, "this compiler does not support PowerPC"
 		" (%<-arch%> option ignored)");
+  else if (seenArm64)
+    warning (0, "this compiler does not support Arm64"
+		" (%<-arch%> option ignored)");
   if (seenX86)
     {
       if (seenX86_64 || seenM64)
@@ -401,6 +414,9 @@ darwin_driver_init (unsigned int *decoded_options_count,
   if (seenX86 || seenX86_64)
     warning (0, "this compiler does not support x86"
 		" (%<-arch%> option ignored)");
+  else if (seenArm64)
+    warning (0, "this compiler does not support Arm64"
+		" (%<-arch%> option ignored)");
   if (seenPPC)
     {
       if (seenPPC64 || seenM64)
@@ -420,12 +436,20 @@ darwin_driver_init (unsigned int *decoded_options_count,
       if (! seenM64) /* Add -m64 if the User didn't. */
 	appendM64 = true;
     }
+#elif DARWIN_ARM64
+  if (seenPPC || seenPPC64)
+    warning (0, "this compiler does not support PowerPC"
+		" (%<-arch%> option ignored)");
+  if (seenX86 || seenX86_64)
+    warning (0, "this compiler does not support x86"
+		" (%<-arch%> option ignored)");
 #endif
 
   /* If there is nothing else on the command line, do not add sysroot etc.  */
   if (*decoded_options_count <= 1)
     return;
 
+#if !DARWIN_ARM64
   if (appendM32 || appendM64)
     {
       ++*decoded_options_count;
@@ -435,6 +459,7 @@ darwin_driver_init (unsigned int *decoded_options_count,
       generate_option (appendM32 ? OPT_m32 : OPT_m64, NULL, 1, CL_DRIVER,
 		       &(*decoded_options)[*decoded_options_count - 1]);
     }
+#endif
 
   if (!seen_sysroot_p)
     {
diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc
index efbcb3856ca6..52869c3de70a 100644
--- a/gcc/config/darwin.cc
+++ b/gcc/config/darwin.cc
@@ -118,7 +118,7 @@ static bool ld_init_term_start_labels = false;
 section * darwin_sections[NUM_DARWIN_SECTIONS];
 
 /* While we transition to using in-tests instead of ifdef'd code.  */
-#if !HAVE_lo_sum
+#if !HAVE_lo_sum || DARWIN_ARM64
 #define gen_macho_high(m,a,b) (a)
 #define gen_macho_low(m,a,b,c) (a)
 #endif
@@ -1052,6 +1052,7 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
   return pic_ref;
 }
 
+#if !DARWIN_ARM64
 /* Callbacks to output the stub or non-lazy pointers.
    Each works on the item in *SLOT,if it has been used.
    DATA is the FILE* for assembly output.
@@ -1207,6 +1208,7 @@ machopic_finish (FILE *out_file)
   machopic_indirections->traverse_noresize
     <FILE *, machopic_output_indirection> (out_file);
 }
+#endif
 
 int
 machopic_operand_p (rtx op)
@@ -2240,6 +2242,8 @@ darwin_emit_except_table_label (FILE *file)
 rtx
 darwin_make_eh_symbol_indirect (rtx orig, bool ARG_UNUSED (pubvis))
 {
+  if (DARWIN_ARM64)
+    return orig;
   if (DARWIN_PPC == 0 && TARGET_64BIT)
     return orig;
 
@@ -3060,7 +3064,12 @@ darwin_file_end (void)
       fprintf (asm_out_file, "\t.long\t0\n\t.long\t%u\n", flags);
      }
 
+#if !DARWIN_ARM64
   machopic_finish (asm_out_file);
+#else
+  gcc_checking_assert (!machopic_indirections);
+#endif
+
   if (flag_apple_kext)
     {
       /* These sections are only used for kernel code.  */
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index fca48c25a70b..d2e54b64ea8d 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -42,6 +42,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #define DARWIN_X86 0
 #define DARWIN_PPC 0
+#define DARWIN_ARM64 0
 
 #define OBJECT_FORMAT_MACHO 1