Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x8-packw-x16c4 call x32 packw-x16 #6356

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmake/microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1517,6 +1517,9 @@ SET(ALL_AVX512F_MICROKERNEL_SRCS
src/math/f32-sqrt-avx512f-nr1fma1adj.c
src/math/f32-sqrt-avx512f-nr1fma.c
src/math/f32-sqrt-avx512f-nr2fma.c
src/x8-packw/gen/x8-packw-x16c4-gemm-goi-avx512f-u16-prfm.c
src/x8-packw/gen/x8-packw-x32c4-gemm-goi-avx512f-u16-prfm.c
src/x8-packw/gen/x8-packw-x64c4-gemm-goi-avx512f-u16-prfm.c
src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4-prfm.c
src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4.c)

Expand Down
3 changes: 3 additions & 0 deletions microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -1517,6 +1517,9 @@ ALL_AVX512F_MICROKERNEL_SRCS = [
"src/math/f32-sqrt-avx512f-nr1fma1adj.c",
"src/math/f32-sqrt-avx512f-nr1fma.c",
"src/math/f32-sqrt-avx512f-nr2fma.c",
"src/x8-packw/gen/x8-packw-x16c4-gemm-goi-avx512f-u16-prfm.c",
"src/x8-packw/gen/x8-packw-x32c4-gemm-goi-avx512f-u16-prfm.c",
"src/x8-packw/gen/x8-packw-x64c4-gemm-goi-avx512f-u16-prfm.c",
"src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4-prfm.c",
"src/x32-packw/gen/x32-packw-x16-gemm-goi-avx512f-u4.c",
]
Expand Down
7 changes: 7 additions & 0 deletions scripts/generate-x8-packw.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,11 @@ tools/xngen src/x8-packw/scalar.c.in -D NR=8 -D KBLOCK=4 -D TYPE=int8_t -o src/
tools/xngen src/x8-packw/scalar.c.in -D NR=16 -D KBLOCK=4 -D TYPE=int8_t -o src/x8-packw/gen/x8-packw-x16-gemm-goi-scalar-int-u4.c &
tools/xngen src/x8-packw/scalar.c.in -D NR=32 -D KBLOCK=4 -D TYPE=int8_t -o src/x8-packw/gen/x8-packw-x32-gemm-goi-scalar-int-u4.c &


################################### x86 AVX512 ##################################
### NR multiple of 16
tools/xngen src/x8-packw/c4.c.in -D NR=16 -D KBLOCK=16 -D TYPE=int8_t -D X8KERNEL=avx512f_u16_prfm -D X32KERNEL=avx512f_u4_prfm -o src/x8-packw/gen/x8-packw-x16c4-gemm-goi-avx512f-u16-prfm.c &
tools/xngen src/x8-packw/c4.c.in -D NR=32 -D KBLOCK=16 -D TYPE=int8_t -D X8KERNEL=avx512f_u16_prfm -D X32KERNEL=avx512f_u4_prfm -o src/x8-packw/gen/x8-packw-x32c4-gemm-goi-avx512f-u16-prfm.c &
tools/xngen src/x8-packw/c4.c.in -D NR=64 -D KBLOCK=16 -D TYPE=int8_t -D X8KERNEL=avx512f_u16_prfm -D X32KERNEL=avx512f_u4_prfm -o src/x8-packw/gen/x8-packw-x64c4-gemm-goi-avx512f-u16-prfm.c &

wait
3 changes: 1 addition & 2 deletions src/configs/gemm-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

#define XNN_MR_TO_INDEX(MR) (MR-1)


static struct xnn_gemm_config f16_gemm_config = {0};
static struct xnn_gemm_config f32_gemm_config = {0};
static struct xnn_gemm_config f32_gemm_nr2_config = {0};
Expand Down Expand Up @@ -2490,7 +2489,7 @@ static void init_qd8_f32_qc8w_gemm_config(void) {
qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(16)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_16x64c4__avx512amx);
qd8_f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_scalar_params;
qd8_f32_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w;
qd8_f32_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w;
qd8_f32_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_x8_packw_gemm_goi_ukernel_x16c4__avx512f_u16_prfm;
qd8_f32_qc8w_gemm_config.mr = 16;
qd8_f32_qc8w_gemm_config.nr = 64;
qd8_f32_qc8w_gemm_config.log2_kr = 2;
Expand Down
53 changes: 53 additions & 0 deletions src/x8-packw/c4.c.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2024 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

$assert NR > 1
$assert KBLOCK >= 1
$assert TYPE in ["int8_t"]

#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include <xnnpack/math.h>
#include <xnnpack/packw.h>


$BITS = {"int8_t": 8}[TYPE]
$BTYPE = {"int8_t": "uint32_t"}[TYPE]
$WTYPE = {"int8_t": "int8_t"}[TYPE]
void xnn_x${BITS}_packw_gemm_goi_ukernel_x${NR}c4__${X8KERNEL}(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const ${WTYPE}* weights,
const ${BTYPE}* bias,
const void* scale,
${WTYPE}* packed_weights,
size_t extra_bytes,
const void* params)
{
assert(g != 0);
assert(nc != 0);
assert(kc != 0);
assert(nr == ${NR}); // This kernel is for NR=${NR}
assert(kr == 4);
assert(sr == 1);
assert(weights != NULL);
assert(packed_weights != NULL);

assert((kc & 3) == 0);

xnn_x32_packw_gemm_goi_ukernel_x${NR}__${X32KERNEL}(g, nc, kc / 4, nr, 1, sr,
(const uint32_t*) weights,
(const uint32_t*) bias,
scale,
(uint32_t*)packed_weights,
extra_bytes,
params);
}
51 changes: 51 additions & 0 deletions src/x8-packw/gen/x8-packw-x16c4-gemm-goi-avx512f-u16-prfm.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Auto-generated file. Do not edit!
// Template: src/x8-packw/c4.c.in
// Generator: tools/xngen
//
// Copyright 2024 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.


#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include <xnnpack/math.h>
#include <xnnpack/packw.h>


void xnn_x8_packw_gemm_goi_ukernel_x16c4__avx512f_u16_prfm(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const int8_t* weights,
const uint32_t* bias,
const void* scale,
int8_t* packed_weights,
size_t extra_bytes,
const void* params)
{
assert(g != 0);
assert(nc != 0);
assert(kc != 0);
assert(nr == 16); // This kernel is for NR=16
assert(kr == 4);
assert(sr == 1);
assert(weights != NULL);
assert(packed_weights != NULL);

assert((kc & 3) == 0);

xnn_x32_packw_gemm_goi_ukernel_x16__avx512f_u4_prfm(g, nc, kc / 4, nr, 1, sr,
(const uint32_t*) weights,
(const uint32_t*) bias,
scale,
(uint32_t*)packed_weights,
extra_bytes,
params);
}
51 changes: 51 additions & 0 deletions src/x8-packw/gen/x8-packw-x32c4-gemm-goi-avx512f-u16-prfm.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Auto-generated file. Do not edit!
// Template: src/x8-packw/c4.c.in
// Generator: tools/xngen
//
// Copyright 2024 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.


#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include <xnnpack/math.h>
#include <xnnpack/packw.h>


void xnn_x8_packw_gemm_goi_ukernel_x32c4__avx512f_u16_prfm(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const int8_t* weights,
const uint32_t* bias,
const void* scale,
int8_t* packed_weights,
size_t extra_bytes,
const void* params)
{
assert(g != 0);
assert(nc != 0);
assert(kc != 0);
assert(nr == 32); // This kernel is for NR=32
assert(kr == 4);
assert(sr == 1);
assert(weights != NULL);
assert(packed_weights != NULL);

assert((kc & 3) == 0);

xnn_x32_packw_gemm_goi_ukernel_x32__avx512f_u4_prfm(g, nc, kc / 4, nr, 1, sr,
(const uint32_t*) weights,
(const uint32_t*) bias,
scale,
(uint32_t*)packed_weights,
extra_bytes,
params);
}
51 changes: 51 additions & 0 deletions src/x8-packw/gen/x8-packw-x64c4-gemm-goi-avx512f-u16-prfm.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Auto-generated file. Do not edit!
// Template: src/x8-packw/c4.c.in
// Generator: tools/xngen
//
// Copyright 2024 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.


#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include <xnnpack/math.h>
#include <xnnpack/packw.h>


void xnn_x8_packw_gemm_goi_ukernel_x64c4__avx512f_u16_prfm(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const int8_t* weights,
const uint32_t* bias,
const void* scale,
int8_t* packed_weights,
size_t extra_bytes,
const void* params)
{
assert(g != 0);
assert(nc != 0);
assert(kc != 0);
assert(nr == 64); // This kernel is for NR=64
assert(kr == 4);
assert(sr == 1);
assert(weights != NULL);
assert(packed_weights != NULL);

assert((kc & 3) == 0);

xnn_x32_packw_gemm_goi_ukernel_x64__avx512f_u4_prfm(g, nc, kc / 4, nr, 1, sr,
(const uint32_t*) weights,
(const uint32_t*) bias,
scale,
(uint32_t*)packed_weights,
extra_bytes,
params);
}
4 changes: 4 additions & 0 deletions src/xnnpack/packw.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ DECLARE_X8_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x8_packw_gemm_goi_ukernel_x8__sca
DECLARE_X8_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x8_packw_gemm_goi_ukernel_x16__scalar_int_u4)
DECLARE_X8_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x8_packw_gemm_goi_ukernel_x32__scalar_int_u4)

DECLARE_X8_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x8_packw_gemm_goi_ukernel_x16c4__avx512f_u16_prfm)
DECLARE_X8_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x8_packw_gemm_goi_ukernel_x32c4__avx512f_u16_prfm)
DECLARE_X8_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x8_packw_gemm_goi_ukernel_x64c4__avx512f_u16_prfm)

#define DECLARE_X16_PACKW_GEMM_GOI_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name( \
size_t g, \
Expand Down
Loading
Loading