diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index e06ed40c2..274c99c7a 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -209,6 +209,8 @@ namespace gbe { ctx = GBE_NEW(BxtContext, unit, name, deviceID, relaxMath); } else if (IS_KABYLAKE(deviceID)) { ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath); + } else if (IS_COFFEELAKE(deviceID)) { + ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath); } else if (IS_GEMINILAKE(deviceID)) { ctx = GBE_NEW(GlkContext, unit, name, deviceID, relaxMath); } @@ -328,6 +330,7 @@ namespace gbe { (IS_SKYLAKE(deviceID) && MATCH_SKL_HEADER(binary)) || \ (IS_BROXTON(deviceID) && MATCH_BXT_HEADER(binary)) || \ (IS_KABYLAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \ + (IS_COFFEELAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \ (IS_GEMINILAKE(deviceID) && MATCH_GLK_HEADER(binary)) \ ) @@ -436,6 +439,8 @@ namespace gbe { FILL_BXT_HEADER(*binary); }else if(IS_KABYLAKE(prog->deviceID)){ FILL_KBL_HEADER(*binary); + }else if(IS_COFFEELAKE(prog->deviceID)){ + FILL_KBL_HEADER(*binary); }else if(IS_GEMINILAKE(prog->deviceID)){ FILL_GLK_HEADER(*binary); }else { diff --git a/src/cl_device_data.h b/src/cl_device_data.h index 123b61926..79f7002ed 100644 --- a/src/cl_device_data.h +++ b/src/cl_device_data.h @@ -372,7 +372,59 @@ (devid == PCI_CHIP_GLK_3x6 || \ devid == PCI_CHIP_GLK_2x6) -#define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid) || IS_GEMINILAKE(devid)) +#define PCI_CHIP_COFFEELAKE_S_GT1_1 0x3E90 +#define PCI_CHIP_COFFEELAKE_S_GT1_2 0x3E93 +#define PCI_CHIP_COFFEELAKE_S_GT1_3 0x3E99 + +#define PCI_CHIP_COFFEELAKE_U_GT1_1 0x3EA1 +#define PCI_CHIP_COFFEELAKE_U_GT1_2 0x3EA4 + +#define PCI_CHIP_COFFEELAKE_S_GT2_1 0x3E91 +#define PCI_CHIP_COFFEELAKE_S_GT2_2 0x3E92 +#define PCI_CHIP_COFFEELAKE_S_GT2_3 0x3E96 +#define PCI_CHIP_COFFEELAKE_S_GT2_4 0x3E9A + +#define PCI_CHIP_COFFEELAKE_H_GT2_1 0x3E94 +#define PCI_CHIP_COFFEELAKE_H_GT2_2 0x3E9B + +#define PCI_CHIP_COFFEELAKE_U_GT2_1 0x3EA0 +#define PCI_CHIP_COFFEELAKE_U_GT2_2 0x3EA3 +#define PCI_CHIP_COFFEELAKE_U_GT2_3 0x3EA9 + +#define PCI_CHIP_COFFEELAKE_U_GT3_1 0x3EA2 +#define PCI_CHIP_COFFEELAKE_U_GT3_2 0x3EA5 +#define PCI_CHIP_COFFEELAKE_U_GT3_3 0x3EA6 +#define PCI_CHIP_COFFEELAKE_U_GT3_4 0x3EA7 +#define PCI_CHIP_COFFEELAKE_U_GT3_5 0x3EA8 + +#define IS_CFL_GT1(devid) \ + (devid == PCI_CHIP_COFFEELAKE_S_GT1_1 || \ + devid == PCI_CHIP_COFFEELAKE_S_GT1_2 || \ + devid == PCI_CHIP_COFFEELAKE_S_GT1_3 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT1_1 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT1_2) + +#define IS_CFL_GT2(devid) \ + (devid == PCI_CHIP_COFFEELAKE_S_GT2_1 || \ + devid == PCI_CHIP_COFFEELAKE_S_GT2_2 || \ + devid == PCI_CHIP_COFFEELAKE_S_GT2_3 || \ + devid == PCI_CHIP_COFFEELAKE_S_GT2_4 || \ + devid == PCI_CHIP_COFFEELAKE_H_GT2_1 || \ + devid == PCI_CHIP_COFFEELAKE_H_GT2_2 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT2_1 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT2_2 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT2_3) + +#define IS_CFL_GT3(devid) \ + (devid == PCI_CHIP_COFFEELAKE_U_GT3_1 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT3_2 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT3_3 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT3_4 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT3_5) + +#define IS_COFFEELAKE(devid) (IS_CFL_GT1(devid) || IS_CFL_GT2(devid) || IS_CFL_GT3(devid)) + +#define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid) || IS_GEMINILAKE(devid) || IS_COFFEELAKE(devid)) #define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120) diff --git a/src/cl_device_id.c b/src/cl_device_id.c index a41344041..c20f9edaa 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -274,6 +274,36 @@ static struct _cl_device_id intel_glk12eu_device = { #include "cl_gen9_device.h" }; +static struct _cl_device_id intel_cfl_gt1_device = { + .max_compute_unit = 12, + .max_thread_per_unit = 7, + .sub_slice_count = 2, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 256, + .max_clock_frequency = 1000, +#include "cl_gen9_device.h" +}; + +static struct _cl_device_id intel_cfl_gt2_device = { + .max_compute_unit = 24, + .max_thread_per_unit = 7, + .sub_slice_count = 3, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 256, + .max_clock_frequency = 1000, +#include "cl_gen9_device.h" +}; + +static struct _cl_device_id intel_cfl_gt3_device = { + .max_compute_unit = 48, + .max_thread_per_unit = 7, + .sub_slice_count = 6, + .max_work_item_sizes = {512, 512, 512}, + .max_work_group_size = 256, + .max_clock_frequency = 1000, +#include "cl_gen9_device.h" +}; + LOCAL cl_device_id cl_get_gt_device(cl_device_type device_type) { @@ -785,6 +815,64 @@ cl_get_gt_device(cl_device_type device_type) cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); break; + case PCI_CHIP_COFFEELAKE_S_GT1_1: + case PCI_CHIP_COFFEELAKE_S_GT1_2: + case PCI_CHIP_COFFEELAKE_S_GT1_3: + DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Desktop GT1"); + case PCI_CHIP_COFFEELAKE_U_GT1_1: + case PCI_CHIP_COFFEELAKE_U_GT1_2: + DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT1"); +cfl_gt1_break: + intel_cfl_gt1_device.device_id = device_id; + intel_cfl_gt1_device.platform = cl_get_platform_default(); + ret = &intel_cfl_gt1_device; + cl_intel_platform_get_default_extension(ret); +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + + case PCI_CHIP_COFFEELAKE_S_GT2_1: + case PCI_CHIP_COFFEELAKE_S_GT2_2: + case PCI_CHIP_COFFEELAKE_S_GT2_3: + case PCI_CHIP_COFFEELAKE_S_GT2_4: + DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Desktop GT2"); + case PCI_CHIP_COFFEELAKE_H_GT2_1: + case PCI_CHIP_COFFEELAKE_H_GT2_2: + DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Halo GT2"); + case PCI_CHIP_COFFEELAKE_U_GT2_1: + case PCI_CHIP_COFFEELAKE_U_GT2_2: + case PCI_CHIP_COFFEELAKE_U_GT2_3: + DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT2"); +cfl_gt2_break: + intel_cfl_gt2_device.device_id = device_id; + intel_cfl_gt2_device.platform = cl_get_platform_default(); + ret = &intel_cfl_gt2_device; + cl_intel_platform_get_default_extension(ret); +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + + case PCI_CHIP_COFFEELAKE_U_GT3_1: + case PCI_CHIP_COFFEELAKE_U_GT3_2: + case PCI_CHIP_COFFEELAKE_U_GT3_3: + case PCI_CHIP_COFFEELAKE_U_GT3_4: + case PCI_CHIP_COFFEELAKE_U_GT3_5: + DECL_INFO_STRING(cfl_gt3_break, intel_cfl_gt3_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT3"); +cfl_gt3_break: + intel_cfl_gt3_device.device_id = device_id; + intel_cfl_gt3_device.platform = cl_get_platform_default(); + ret = &intel_cfl_gt3_device; + cl_intel_platform_get_default_extension(ret); +#ifdef ENABLE_FP64 + cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); +#endif + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + case PCI_CHIP_SANDYBRIDGE_BRIDGE: case PCI_CHIP_SANDYBRIDGE_GT1: case PCI_CHIP_SANDYBRIDGE_GT2: @@ -992,7 +1080,10 @@ LOCAL cl_bool is_gen_device(cl_device_id device) { device == &intel_kbl_gt3_device || device == &intel_kbl_gt4_device || device == &intel_glk18eu_device || - device == &intel_glk12eu_device; + device == &intel_glk12eu_device || + device == &intel_cfl_gt1_device || + device == &intel_cfl_gt2_device || + device == &intel_cfl_gt3_device; } LOCAL cl_int @@ -1420,7 +1511,9 @@ cl_device_get_version(cl_device_id device, cl_int *ver) || device == &intel_bxt18eu_device || device == &intel_bxt12eu_device || device == &intel_kbl_gt1_device || device == &intel_kbl_gt2_device || device == &intel_kbl_gt3_device || device == &intel_kbl_gt4_device || device == &intel_kbl_gt15_device - || device == &intel_glk18eu_device || device == &intel_glk12eu_device) { + || device == &intel_glk18eu_device || device == &intel_glk12eu_device + || device == &intel_cfl_gt1_device || device == &intel_cfl_gt1_device + || device == &intel_cfl_gt3_device) { *ver = 9; } else return CL_INVALID_VALUE;