Skip to content

Commit

Permalink
2015-10-06 Venkataramanan Kumar <Venkataramanan.kumar@amd.com>
Browse files Browse the repository at this point in the history
	
	AMD znver1 enablement. 
	* config.gcc (i[34567]86-*-linux* | ...): Add znver1.
	(case ${target}): Add znver1.
	* config/i386/cpuid.h(bit_CLZERO):  Define.
	* config/i386/driver-i386.c: (host_detect_local_cpu): Let
	-march=native recognize znver1 processors. 
	* config/i386/i386-c.c (ix86_target_macros_internal): Add
	znver1, clzero def_and_undef. 
	* config/i386/i386.c (struct processor_costs znver1_cost): New.
	(m_znver1): New definition.
	(m_AMD_MULTIPLE): Includes m_znver1.
	(processor_target_table): Add znver1 entry.
	(ix86_target_string) : Add clzero entry.
	(static const char *const cpu_names): Add znver1 entry.
	(ix86_option_override_internal): Add znver1 instruction sets.
	(PTA_CLZERO) :  New definition.
	(ix86_option_override_internal): Handle new clzerooption.
	(ix86_issue_rate): Add znver1.
	(ix86_adjust_cost): Add znver1.                
	(ia32_multipass_dfa_lookahead): Add znver1.
	(has_dispatch): Add znver1.       
	* config/i386/i386.h (TARGET_znver1): New definition. 
	(TARGET_CLZERO): Define.
	(TARGET_CLZERO_P): Define.
	(struct ix86_size_cost): Add TARGET_ZNVER1.
	(enum processor_type): Add PROCESSOR_znver1.
	* config/i386/i386.md (define_attr "cpu"): Add znver1.
	(set_attr znver1_decode): New definitions for znver1.
	* config/i386/i386.opt (flag_dispatch_scheduler): Add znver1.
	(mclzero): New.
	* config/i386/mmx.md (set_attr znver1_decode): New definitions
	for znver1.
	* config/i386/sse.md (set_attr znver1_decode): Likewise.
	* config/i386/x86-tune.def:  Add znver1 tunings.
	* config/i386/znver1.md: Introduce znver1 cpu and include new md file.
	* gcc/doc/invoke.texi: Add details about znver1



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@228520 138bc75d-0d04-0410-961f-82ee72b054a4
  • Loading branch information
vekumar committed Oct 6, 2015
1 parent aa211fc commit 4c9faaa
Show file tree
Hide file tree
Showing 14 changed files with 1,239 additions and 29 deletions.
38 changes: 38 additions & 0 deletions gcc/ChangeLog
@@ -1,3 +1,41 @@
2015-10-06 Venkataramanan Kumar <Venkataramanan.kumar@amd.com>

* config.gcc (i[34567]86-*-linux* | ...): Add znver1.
(case ${target}): Add znver1.
* config/i386/cpuid.h(bit_CLZERO): Define.
* config/i386/driver-i386.c: (host_detect_local_cpu): Let
-march=native recognize znver1 processors.
* config/i386/i386-c.c (ix86_target_macros_internal): Add
znver1, clzero def_and_undef.
* config/i386/i386.c (struct processor_costs znver1_cost): New.
(m_znver1): New definition.
(m_AMD_MULTIPLE): Includes m_znver1.
(processor_target_table): Add znver1 entry.
(ix86_target_string) : Add clzero entry.
(static const char *const cpu_names): Add znver1 entry.
(ix86_option_override_internal): Add znver1 instruction sets.
(PTA_CLZERO) : New definition.
(ix86_option_override_internal): Handle new clzerooption.
(ix86_issue_rate): Add znver1.
(ix86_adjust_cost): Add znver1.
(ia32_multipass_dfa_lookahead): Add znver1.
(has_dispatch): Add znver1.
* config/i386/i386.h (TARGET_znver1): New definition.
(TARGET_CLZERO): Define.
(TARGET_CLZERO_P): Define.
(struct ix86_size_cost): Add TARGET_ZNVER1.
(enum processor_type): Add PROCESSOR_znver1.
* config/i386/i386.md (define_attr "cpu"): Add znver1.
(set_attr znver1_decode): New definitions for znver1.
* config/i386/i386.opt (flag_dispatch_scheduler): Add znver1.
(mclzero): New.
* config/i386/mmx.md (set_attr znver1_decode): New definitions
for znver1.
* config/i386/sse.md (set_attr znver1_decode): Likewise.
* config/i386/x86-tune.def: Add znver1 tunings.
* config/i386/znver1.md: Introduce znver1 cpu and include new md file.
* gcc/doc/invoke.texi: Add details about znver1

2015-10-06 Richard Biener <rguenther@suse.de> 2015-10-06 Richard Biener <rguenther@suse.de>


PR tree-optimization/67859 PR tree-optimization/67859
Expand Down
10 changes: 9 additions & 1 deletion gcc/config.gcc
Expand Up @@ -592,7 +592,7 @@ pentium4 pentium4m pentiumpro prescott lakemont"
# 64-bit x86 processors supported by --with-arch=. Each processor # 64-bit x86 processors supported by --with-arch=. Each processor
# MUST be separated by exactly one space. # MUST be separated by exactly one space.
x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \ x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
bdver3 bdver4 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \ bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \ core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \
sandybridge ivybridge haswell broadwell bonnell silvermont knl \ sandybridge ivybridge haswell broadwell bonnell silvermont knl \
skylake-avx512 x86-64 native" skylake-avx512 x86-64 native"
Expand Down Expand Up @@ -3119,6 +3119,10 @@ case ${target} in
;; ;;
i686-*-* | i786-*-*) i686-*-* | i786-*-*)
case ${target_noncanonical} in case ${target_noncanonical} in
znver1-*)
arch=znver1
cpu=znver1
;;
bdver4-*) bdver4-*)
arch=bdver4 arch=bdver4
cpu=bdver4 cpu=bdver4
Expand Down Expand Up @@ -3232,6 +3236,10 @@ case ${target} in
;; ;;
x86_64-*-*) x86_64-*-*)
case ${target_noncanonical} in case ${target_noncanonical} in
znver1-*)
arch=znver1
cpu=znver1
;;
bdver4-*) bdver4-*)
arch=bdver4 arch=bdver4
cpu=bdver4 cpu=bdver4
Expand Down
3 changes: 3 additions & 0 deletions gcc/config/i386/cpuid.h
Expand Up @@ -65,6 +65,9 @@
#define bit_3DNOWP (1 << 30) #define bit_3DNOWP (1 << 30)
#define bit_3DNOW (1 << 31) #define bit_3DNOW (1 << 31)


/* %ebx. */
#define bit_CLZERO (1 << 0)

/* Extended Features (%eax == 7) */ /* Extended Features (%eax == 7) */
/* %ebx */ /* %ebx */
#define bit_FSGSBASE (1 << 0) #define bit_FSGSBASE (1 << 0)
Expand Down
14 changes: 12 additions & 2 deletions gcc/config/i386/driver-i386.c
Expand Up @@ -414,6 +414,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0; unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0; unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
unsigned int has_pcommit = 0, has_mwaitx = 0; unsigned int has_pcommit = 0, has_mwaitx = 0;
unsigned int has_clzero = 0;


bool arch; bool arch;


Expand Down Expand Up @@ -533,6 +534,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_3dnowp = edx & bit_3DNOWP; has_3dnowp = edx & bit_3DNOWP;
has_3dnow = edx & bit_3DNOW; has_3dnow = edx & bit_3DNOW;
has_mwaitx = ecx & bit_MWAITX; has_mwaitx = ecx & bit_MWAITX;

__cpuid (0x80000008, eax, ebx, ecx, edx);
has_clzero = ebx & bit_CLZERO;
} }


/* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
Expand Down Expand Up @@ -607,6 +611,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
processor = PROCESSOR_GEODE; processor = PROCESSOR_GEODE;
else if (has_movbe && family == 22) else if (has_movbe && family == 22)
processor = PROCESSOR_BTVER2; processor = PROCESSOR_BTVER2;
else if (has_clzero)
processor = PROCESSOR_ZNVER1;
else if (has_avx2) else if (has_avx2)
processor = PROCESSOR_BDVER4; processor = PROCESSOR_BDVER4;
else if (has_xsaveopt) else if (has_xsaveopt)
Expand Down Expand Up @@ -872,6 +878,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
case PROCESSOR_BDVER4: case PROCESSOR_BDVER4:
cpu = "bdver4"; cpu = "bdver4";
break; break;
case PROCESSOR_ZNVER1:
cpu = "znver1";
break;
case PROCESSOR_BTVER1: case PROCESSOR_BTVER1:
cpu = "btver1"; cpu = "btver1";
break; break;
Expand Down Expand Up @@ -961,7 +970,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb"; const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
const char *pcommit = has_pcommit ? " -mpcommit" : " -mno-pcommit"; const char *pcommit = has_pcommit ? " -mpcommit" : " -mno-pcommit";
const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx"; const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";

const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul, sse4a, cx16, sahf, movbe, aes, sha, pclmul,
popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2, popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
Expand All @@ -970,7 +979,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
fxsr, xsave, xsaveopt, avx512f, avx512er, fxsr, xsave, xsaveopt, avx512f, avx512er,
avx512cd, avx512pf, prefetchwt1, clflushopt, avx512cd, avx512pf, prefetchwt1, clflushopt,
xsavec, xsaves, avx512dq, avx512bw, avx512vl, xsavec, xsaves, avx512dq, avx512bw, avx512vl,
avx512ifma, avx512vbmi, clwb, pcommit, mwaitx, NULL); avx512ifma, avx512vbmi, clwb, pcommit, mwaitx,
clzero, NULL);
} }


done: done:
Expand Down
9 changes: 9 additions & 0 deletions gcc/config/i386/i386-c.c
Expand Up @@ -123,6 +123,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__bdver4"); def_or_undef (parse_in, "__bdver4");
def_or_undef (parse_in, "__bdver4__"); def_or_undef (parse_in, "__bdver4__");
break; break;
case PROCESSOR_ZNVER1:
def_or_undef (parse_in, "__znver1");
def_or_undef (parse_in, "__znver1__");
break;
case PROCESSOR_BTVER1: case PROCESSOR_BTVER1:
def_or_undef (parse_in, "__btver1"); def_or_undef (parse_in, "__btver1");
def_or_undef (parse_in, "__btver1__"); def_or_undef (parse_in, "__btver1__");
Expand Down Expand Up @@ -252,6 +256,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
case PROCESSOR_BDVER4: case PROCESSOR_BDVER4:
def_or_undef (parse_in, "__tune_bdver4__"); def_or_undef (parse_in, "__tune_bdver4__");
break; break;
case PROCESSOR_ZNVER1:
def_or_undef (parse_in, "__tune_znver1__");
break;
case PROCESSOR_BTVER1: case PROCESSOR_BTVER1:
def_or_undef (parse_in, "__tune_btver1__"); def_or_undef (parse_in, "__tune_btver1__");
break; break;
Expand Down Expand Up @@ -424,6 +431,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__SSE2_MATH__"); def_or_undef (parse_in, "__SSE2_MATH__");
if (isa_flag & OPTION_MASK_ISA_CLFLUSHOPT) if (isa_flag & OPTION_MASK_ISA_CLFLUSHOPT)
def_or_undef (parse_in, "__CLFLUSHOPT__"); def_or_undef (parse_in, "__CLFLUSHOPT__");
if (isa_flag & OPTION_MASK_ISA_CLZERO)
def_or_undef (parse_in, "__CLZERO__");
if (isa_flag & OPTION_MASK_ISA_XSAVEC) if (isa_flag & OPTION_MASK_ISA_XSAVEC)
def_or_undef (parse_in, "__XSAVEC__"); def_or_undef (parse_in, "__XSAVEC__");
if (isa_flag & OPTION_MASK_ISA_XSAVES) if (isa_flag & OPTION_MASK_ISA_XSAVES)
Expand Down
125 changes: 117 additions & 8 deletions gcc/config/i386/i386.c
Expand Up @@ -1342,6 +1342,96 @@ struct processor_costs bdver4_cost = {
2, /* cond_not_taken_branch_cost. */ 2, /* cond_not_taken_branch_cost. */
}; };



/* ZNVER1 has optimized REP instruction for medium sized blocks, but for
very small blocks it is better to use loop. For large blocks, libcall
can do nontemporary accesses and beat inline considerably. */
static stringop_algs znver1_memcpy[2] = {
{libcall, {{6, loop, false}, {14, unrolled_loop, false},
{-1, rep_prefix_4_byte, false}}},
{libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
{-1, libcall, false}}}};
static stringop_algs znver1_memset[2] = {
{libcall, {{8, loop, false}, {24, unrolled_loop, false},
{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
{libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
{-1, libcall, false}}}};
struct processor_costs znver1_cost = {
COSTS_N_INSNS (1), /* cost of an add instruction. */
COSTS_N_INSNS (1), /* cost of a lea instruction. */
COSTS_N_INSNS (1), /* variable shift costs. */
COSTS_N_INSNS (1), /* constant shift costs. */
{COSTS_N_INSNS (4), /* cost of starting multiply for QI. */
COSTS_N_INSNS (4), /* HI. */
COSTS_N_INSNS (4), /* SI. */
COSTS_N_INSNS (6), /* DI. */
COSTS_N_INSNS (6)}, /* other. */
0, /* cost of multiply per each bit
set. */
{COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
COSTS_N_INSNS (35), /* HI. */
COSTS_N_INSNS (51), /* SI. */
COSTS_N_INSNS (83), /* DI. */
COSTS_N_INSNS (83)}, /* other. */
COSTS_N_INSNS (1), /* cost of movsx. */
COSTS_N_INSNS (1), /* cost of movzx. */
8, /* "large" insn. */
9, /* MOVE_RATIO. */
4, /* cost for loading QImode using
movzbl. */
{5, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{4, 4, 4}, /* cost of storing integer
registers. */
2, /* cost of reg,reg fld/fst. */
{5, 5, 12}, /* cost of loading fp registers
in SFmode, DFmode and XFmode. */
{4, 4, 8}, /* cost of storing fp registers
in SFmode, DFmode and XFmode. */
2, /* cost of moving MMX register. */
{4, 4}, /* cost of loading MMX registers
in SImode and DImode. */
{4, 4}, /* cost of storing MMX registers
in SImode and DImode. */
2, /* cost of moving SSE register. */
{4, 4, 4}, /* cost of loading SSE registers
in SImode, DImode and TImode. */
{4, 4, 4}, /* cost of storing SSE registers
in SImode, DImode and TImode. */
2, /* MMX or SSE register to integer. */
32, /* size of l1 cache. */
512, /* size of l2 cache. */
64, /* size of prefetch block. */
/* New AMD processors never drop prefetches; if they cannot be performed
immediately, they are queued. We set number of simultaneous prefetches
to a large constant to reflect this (it probably is not a good idea not
to limit number of prefetches at all, as their execution also takes some
time). */
100, /* number of parallel prefetches. */
2, /* Branch cost. */
COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (6), /* cost of FMUL instruction. */
COSTS_N_INSNS (42), /* cost of FDIV instruction. */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */

znver1_memcpy,
znver1_memset,
6, /* scalar_stmt_cost. */
4, /* scalar load_cost. */
4, /* scalar_store_cost. */
6, /* vec_stmt_cost. */
0, /* vec_to_scalar_cost. */
2, /* scalar_to_vec_cost. */
4, /* vec_align_load_cost. */
4, /* vec_unalign_load_cost. */
4, /* vec_store_cost. */
4, /* cond_taken_branch_cost. */
2, /* cond_not_taken_branch_cost. */
};

/* BTVER1 has optimized REP instruction for medium sized blocks, but for /* BTVER1 has optimized REP instruction for medium sized blocks, but for
very small blocks it is better to use loop. For large blocks, libcall can very small blocks it is better to use loop. For large blocks, libcall can
do nontemporary accesses and beat inline considerably. */ do nontemporary accesses and beat inline considerably. */
Expand Down Expand Up @@ -2113,11 +2203,13 @@ const struct processor_costs *ix86_cost = &pentium_cost;
#define m_BDVER2 (1<<PROCESSOR_BDVER2) #define m_BDVER2 (1<<PROCESSOR_BDVER2)
#define m_BDVER3 (1<<PROCESSOR_BDVER3) #define m_BDVER3 (1<<PROCESSOR_BDVER3)
#define m_BDVER4 (1<<PROCESSOR_BDVER4) #define m_BDVER4 (1<<PROCESSOR_BDVER4)
#define m_ZNVER1 (1<<PROCESSOR_ZNVER1)
#define m_BTVER1 (1<<PROCESSOR_BTVER1) #define m_BTVER1 (1<<PROCESSOR_BTVER1)
#define m_BTVER2 (1<<PROCESSOR_BTVER2) #define m_BTVER2 (1<<PROCESSOR_BTVER2)
#define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4) #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
#define m_BTVER (m_BTVER1 | m_BTVER2) #define m_BTVER (m_BTVER1 | m_BTVER2)
#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER) #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
| m_ZNVER1)


#define m_GENERIC (1<<PROCESSOR_GENERIC) #define m_GENERIC (1<<PROCESSOR_GENERIC)


Expand Down Expand Up @@ -2580,6 +2672,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
{"bdver2", &bdver2_cost, 16, 10, 16, 7, 11}, {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
{"bdver3", &bdver3_cost, 16, 10, 16, 7, 11}, {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
{"bdver4", &bdver4_cost, 16, 10, 16, 7, 11}, {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
{"znver1", &znver1_cost, 16, 10, 16, 7, 11},
{"btver1", &btver1_cost, 16, 10, 16, 7, 11}, {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
{"btver2", &btver2_cost, 16, 10, 16, 7, 11} {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
}; };
Expand Down Expand Up @@ -3672,6 +3765,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
{ "-mclwb", OPTION_MASK_ISA_CLWB }, { "-mclwb", OPTION_MASK_ISA_CLWB },
{ "-mpcommit", OPTION_MASK_ISA_PCOMMIT }, { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
{ "-mmwaitx", OPTION_MASK_ISA_MWAITX }, { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
{ "-mclzero", OPTION_MASK_ISA_CLZERO },
}; };


/* Flag options. */ /* Flag options. */
Expand Down Expand Up @@ -4216,6 +4310,7 @@ ix86_option_override_internal (bool main_args_p,
#define PTA_CLWB (HOST_WIDE_INT_1 << 55) #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
#define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56) #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
#define PTA_MWAITX (HOST_WIDE_INT_1 << 57) #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
#define PTA_CLZERO (HOST_WIDE_INT_1 << 58)


#define PTA_CORE2 \ #define PTA_CORE2 \
(PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \ (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
Expand Down Expand Up @@ -4378,7 +4473,16 @@ ix86_option_override_internal (bool main_args_p,
| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
| PTA_MOVBE | PTA_MWAITX}, | PTA_MOVBE | PTA_MWAITX},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC, {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
| PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
| PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
| PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
| PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
| PTA_SHA | PTA_LZCNT | PTA_POPCNT},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
| PTA_FXSR | PTA_XSAVE}, | PTA_FXSR | PTA_XSAVE},
Expand Down Expand Up @@ -4799,6 +4903,9 @@ ix86_option_override_internal (bool main_args_p,
if (processor_alias_table[i].flags & PTA_CLFLUSHOPT if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT)) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT; opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
if (processor_alias_table[i].flags & PTA_CLZERO
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
if (processor_alias_table[i].flags & PTA_XSAVEC if (processor_alias_table[i].flags & PTA_XSAVEC
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC)) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC; opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
Expand Down Expand Up @@ -27168,6 +27275,7 @@ ix86_issue_rate (void)
case PROCESSOR_BDVER2: case PROCESSOR_BDVER2:
case PROCESSOR_BDVER3: case PROCESSOR_BDVER3:
case PROCESSOR_BDVER4: case PROCESSOR_BDVER4:
case PROCESSOR_ZNVER1:
case PROCESSOR_CORE2: case PROCESSOR_CORE2:
case PROCESSOR_NEHALEM: case PROCESSOR_NEHALEM:
case PROCESSOR_SANDYBRIDGE: case PROCESSOR_SANDYBRIDGE:
Expand Down Expand Up @@ -27428,6 +27536,7 @@ ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
case PROCESSOR_BDVER2: case PROCESSOR_BDVER2:
case PROCESSOR_BDVER3: case PROCESSOR_BDVER3:
case PROCESSOR_BDVER4: case PROCESSOR_BDVER4:
case PROCESSOR_ZNVER1:
case PROCESSOR_BTVER1: case PROCESSOR_BTVER1:
case PROCESSOR_BTVER2: case PROCESSOR_BTVER2:
case PROCESSOR_GENERIC: case PROCESSOR_GENERIC:
Expand Down Expand Up @@ -35708,9 +35817,9 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
arg_str = "bdver4"; arg_str = "bdver4";
priority = P_PROC_AVX2; priority = P_PROC_AVX2;
break; break;
} }
} }

cl_target_option_restore (&global_options, &cur_target); cl_target_option_restore (&global_options, &cur_target);


if (predicate_list && arg_str == NULL) if (predicate_list && arg_str == NULL)
Expand Down Expand Up @@ -36659,7 +36768,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
{"bdver2", M_AMDFAM15H_BDVER2}, {"bdver2", M_AMDFAM15H_BDVER2},
{"bdver3", M_AMDFAM15H_BDVER3}, {"bdver3", M_AMDFAM15H_BDVER3},
{"bdver4", M_AMDFAM15H_BDVER4}, {"bdver4", M_AMDFAM15H_BDVER4},
{"btver2", M_AMD_BTVER2}, {"btver2", M_AMD_BTVER2},
}; };


static struct _isa_names_table static struct _isa_names_table
Expand Down Expand Up @@ -52714,8 +52823,8 @@ do_dispatch (rtx_insn *insn, int mode)
static bool static bool
has_dispatch (rtx_insn *insn, int action) has_dispatch (rtx_insn *insn, int action)
{ {
if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4) if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
&& flag_dispatch_scheduler) || TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler)
switch (action) switch (action)
{ {
default: default:
Expand Down

0 comments on commit 4c9faaa

Please sign in to comment.