Skip to content

Commit

Permalink
ggml : general conv_2d CPU implementation (#352)
Browse files Browse the repository at this point in the history
* Conv2d s0 == s1 == 1, d0 == d1 == 1, variable padding

* Mark unused varibles

* Support variable strides

* Handle all non-kernel-width convolutions with same general conv2d

* General 2d Conv

* Remove old function

* Group functions

* Rearrange

* General Conv2d implementation

* Clean up whitespace
  • Loading branch information
dmille committed Jul 14, 2023
1 parent 64d645a commit a1bd982
Showing 1 changed file with 131 additions and 12 deletions.
143 changes: 131 additions & 12 deletions src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -13102,6 +13102,125 @@ static void ggml_compute_forward_conv_2d_sk_p0(
} break;
}
}
// ggml_compute_forward_conv_2d_any

static void ggml_compute_forward_conv_2d_f16_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
const struct ggml_tensor * opt0,
struct ggml_tensor * dst) {
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);

int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
const int32_t s0 = ((const int32_t*)(opt0->data))[0];
const int32_t s1 = ((const int32_t*)(opt0->data))[1];
const int32_t p0 = ((const int32_t*)(opt0->data))[2];
const int32_t p1 = ((const int32_t*)(opt0->data))[3];
const int32_t d0 = ((const int32_t*)(opt0->data))[4];
const int32_t d1 = ((const int32_t*)(opt0->data))[5];
GGML_TENSOR_BINARY_OP_LOCALS;

const int ith = params->ith;
const int nth = params->nth;

const int nk0 = ne00;
const int nk1 = ne01;

const int ew0 = nk0*nk1*ne02;

GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb10 == sizeof(float));

if (params->type == GGML_TASK_INIT) {
memset(params->wdata, 0, params->wsize);

// prepare source data (src1)
{
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;

for (int i12 = 0; i12 < ne12; i12++) {
const float * const src = (float *)((char *) src1->data + i12*nb12);
ggml_fp16_t * dst_data = wdata;

for (int i1 = 0; i1 < ne1; i1++) {
for (int i0 = 0; i0 < ne0; i0++) {
for (int ik1 = 0; ik1 < nk1; ik1++) {
for (int ik0 = 0; ik0 < nk0; ik0++) {
const int idx1 = i1*s1 + ik1*d1 - p1;
const int idx0 = i0*s0 + ik0*d0 - p0;

if (!(idx1 < 0 || idx1 >= ne11 || idx0 < 0 || idx0 >= ne10)) {
dst_data[(i1*ne0 + i0)*ew0 + i12*(nk0*nk1) + ik1*nk0 + ik0] =
GGML_FP32_TO_FP16(src[idx1 * ne10 + idx0]);
}
}
}
}
}
}
}

return;
}

if (params->type == GGML_TASK_FINALIZE) {
return;
}

// total patches in dst
const int np = ne2;

// patches per thread
const int dp = (np + nth - 1)/nth;

// patch range for this thread
const int ip0 = dp*ith;
const int ip1 = MIN(ip0 + dp, np);

ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;

for (int i2 = ip0; i2 < ip1; i2++) {
float * dst_data = (float *)((char *) dst->data + i2*nb2);

for (int i1 = 0; i1 < ne1; ++i1) {
for (int i0 = 0; i0 < ne0; ++i0) {
ggml_vec_dot_f16(ew0, dst_data + i1*ne0 + i0,
(ggml_fp16_t *) ((char *) src0->data + i2*nb03),
(ggml_fp16_t *) wdata + (i1*ne0 + i0)*ew0);
}

}
}
}

static void ggml_compute_forward_conv_2d_any(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
const struct ggml_tensor * opt0,
struct ggml_tensor * dst
) {
switch (src0->type) {
case GGML_TYPE_F16:
{
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1, opt0, dst);
} break;
case GGML_TYPE_F32:
{
//ggml_compute_forward_conv_2d_f32(params, src0, src1, opt0, dst);
GGML_ASSERT(false);
} break;
default:
{
GGML_ASSERT(false);
} break;
}

}

// ggml_compute_forward_conv_2d

Expand All @@ -13117,16 +13236,13 @@ static void ggml_compute_forward_conv_2d(
const int32_t p1 = ((const int32_t*)(opt0->data))[3];
const int32_t d0 = ((const int32_t*)(opt0->data))[4];
const int32_t d1 = ((const int32_t*)(opt0->data))[5];
GGML_ASSERT(d0 == 1); // dilation not supported
GGML_ASSERT(d1 == 1);
GGML_ASSERT(p0 == 0); // padding not supported
GGML_ASSERT(p1 == 0);

if (s0 == src0->ne[0] && s1 == src0->ne[1]) {
if (s0 == src0->ne[0] && s1 == src0->ne[1] && p0 == 0 && p1 == 0) {
ggml_compute_forward_conv_2d_sk_p0(params, src0, src1, dst);
} else {
GGML_ASSERT(false); // only stride equal to kernel size is supported
}
else {
ggml_compute_forward_conv_2d_any(params, src0, src1, opt0, dst);
};
}

// ggml_compute_forward_pool_1d_sk_p0
Expand Down Expand Up @@ -16585,19 +16701,22 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
const int64_t ne11 = node->src[1]->ne[1]; // H
const int64_t ne12 = node->src[1]->ne[2]; // C

const int64_t ne0 = node->ne[0];
const int64_t ne1 = node->ne[1];
const int64_t ne2 = node->ne[2];
const int64_t nk = ne00*ne01;
const int64_t ew0 = nk * ne02;

UNUSED(ne02);
UNUSED(ne03);
UNUSED(nk);
UNUSED(ne2);

size_t cur = 0;

if (node->src[0]->type == GGML_TYPE_F16 &&
node->src[1]->type == GGML_TYPE_F32) {
cur = sizeof(ggml_fp16_t)*(ne10*ne11*ne12);
node->src[1]->type == GGML_TYPE_F32) {
cur = sizeof(ggml_fp16_t)*(ne0*ne1*ew0);
} else if (node->src[0]->type == GGML_TYPE_F32 &&
node->src[1]->type == GGML_TYPE_F32) {
node->src[1]->type == GGML_TYPE_F32) {
cur = sizeof(float)* (ne10*ne11*ne12);
} else {
GGML_ASSERT(false);
Expand Down

0 comments on commit a1bd982

Please sign in to comment.