Skip to content

Commit

Permalink
Add Futhark implementation (#146)
Browse files Browse the repository at this point in the history
* Add Futhark.
  • Loading branch information
athas committed Oct 3, 2023
1 parent 2e3ebee commit 92fed70
Show file tree
Hide file tree
Showing 8 changed files with 411 additions and 3 deletions.
13 changes: 12 additions & 1 deletion .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -225,4 +225,15 @@ jobs:
run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_24_BIN }}
- name: Test compile hipsycl @ CMake 3.24
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_24_BIN }}
run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_24_BIN }}

test-futhark:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Prepare Futhark compiler
uses: diku-dk/install-futhark@HEAD
with:
version: 'latest'
- run: cmake -Bbuild -H. -DMODEL=futhark -DFUTHARK_BACKEND=multicore
- run: cmake --build build
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
cmake_policy(SET CMP0135 NEW)
endif ()

project(BabelStream VERSION 4.0 LANGUAGES CXX)
project(BabelStream VERSION 4.0 LANGUAGES CXX C)

# uncomment for debugging build issues:
#set(CMAKE_VERBOSE_MAKEFILE ON)
Expand Down Expand Up @@ -162,6 +162,7 @@ register_model(acc ACC ACCStream.cpp)
register_model(raja USE_RAJA RAJAStream.cpp)
register_model(tbb TBB TBBStream.cpp)
register_model(thrust THRUST ThrustStream.cu) # Thrust uses cu, even for rocThrust
register_model(futhark FUTHARK FutharkStream.cpp)


set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model")
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ BabelStream is currently implemented in the following parallel programming model
- SYCL and SYCL2020 (USM and accessors)
- TBB
- Thrust (via CUDA or HIP)
- Futhark

This project also contains implementations in alternative languages with different build systems:
* Julia - [JuliaStream.jl](./src/julia/JuliaStream.jl)
Expand Down Expand Up @@ -101,7 +102,7 @@ The source for each model's implementations are located in `./src/<model>`.

Currently available models are:
```
omp;ocl;std-data;std-indices;std-ranges;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust
omp;ocl;std-data;std-indices;std-ranges;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust;futhark
```

#### Overriding default flags
Expand Down
212 changes: 212 additions & 0 deletions src/futhark/FutharkStream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
// University of Bristol HPC
// Copyright (c) 2022 Troels Henriksen
// University of Copenhagen
//
// For full license terms please see the LICENSE file distributed with this
// source code

#include <cstdlib> // For aligned_alloc
#include <string>
#include "FutharkStream.h"

template <class T>
FutharkStream<T>::FutharkStream(const int ARRAY_SIZE, int device)
{
this->array_size = ARRAY_SIZE;
this->cfg = futhark_context_config_new();
this->device = "#" + std::to_string(device);
#if defined(FUTHARK_BACKEND_cuda) || defined(FUTHARK_BACKEND_opencl)
futhark_context_config_set_device(cfg, this->device.c_str());
#endif
this->ctx = futhark_context_new(cfg);
this->a = NULL;
this->b = NULL;
this->c = NULL;
}

template <>
FutharkStream<float>::~FutharkStream()
{
if (this->a) {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->a);
}
if (this->b) {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->b);
}
if (this->c) {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
}
futhark_context_free(this->ctx);
futhark_context_config_free(this->cfg);
}

template <>
FutharkStream<double>::~FutharkStream()
{
if (this->a) {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->a);
}
if (this->b) {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->b);
}
if (this->c) {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
}
futhark_context_free(this->ctx);
futhark_context_config_free(this->cfg);
}

template <>
void FutharkStream<float>::init_arrays(float initA, float initB, float initC) {
int array_size = this->array_size;
float *a = new float[array_size];
float *b = new float[array_size];
float *c = new float[array_size];
for (int i = 0; i < array_size; i++) {
a[i] = initA;
b[i] = initB;
c[i] = initC;
}
this->a = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, a, array_size);
this->b = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, b, array_size);
this->c = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, c, array_size);
futhark_context_sync(this->ctx);
delete[] a;
delete[] b;
delete[] c;
}

template <>
void FutharkStream<double>::init_arrays(double initA, double initB, double initC) {
int array_size = this->array_size;
double *a = new double[array_size];
double *b = new double[array_size];
double *c = new double[array_size];
for (int i = 0; i < array_size; i++) {
a[i] = initA;
b[i] = initB;
c[i] = initC;
}
this->a = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, a, array_size);
this->b = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, b, array_size);
this->c = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, c, array_size);
futhark_context_sync(this->ctx);
delete[] a;
delete[] b;
delete[] c;
}

template <>
void FutharkStream<float>::read_arrays(std::vector<float>& h_a, std::vector<float>& h_b, std::vector<float>& h_c) {
futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->a, h_a.data());
futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->b, h_b.data());
futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->c, h_c.data());
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<double>::read_arrays(std::vector<double>& h_a, std::vector<double>& h_b, std::vector<double>& h_c) {
futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->a, h_a.data());
futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->b, h_b.data());
futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->c, h_c.data());
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<float>::copy() {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
futhark_entry_f32_copy(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a);
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<double>::copy() {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
futhark_entry_f64_copy(this->ctx, (futhark_f64_1d**)&this->c, (futhark_f64_1d*)this->a);
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<float>::mul() {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->b);
futhark_entry_f32_mul(this->ctx, (futhark_f32_1d**)&this->b, (futhark_f32_1d*)this->c);
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<double>::mul() {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->b);
futhark_entry_f64_mul(this->ctx, (futhark_f64_1d**)&this->b, (futhark_f64_1d*)this->c);
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<float>::add() {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
futhark_entry_f32_add(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<double>::add() {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
futhark_entry_f64_add(this->ctx, (futhark_f64_1d**)&this->c, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b);
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<float>::triad() {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
futhark_entry_f32_triad(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<double>::triad() {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->a);
futhark_entry_f64_triad(this->ctx, (futhark_f64_1d**)&this->a, (futhark_f64_1d*)this->b, (futhark_f64_1d*)this->c);
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<float>::nstream() {
futhark_f32_1d* d;
futhark_entry_f32_triad(this->ctx, &d, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
this->c = d;
futhark_context_sync(this->ctx);
}

template <>
void FutharkStream<double>::nstream() {
futhark_f64_1d* d;
futhark_entry_f64_triad(this->ctx, &d, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b);
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
this->c = d;
futhark_context_sync(this->ctx);
}

template <>
float FutharkStream<float>::dot() {
float res;
futhark_entry_f32_dot(this->ctx, &res, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
futhark_context_sync(this->ctx);
return res;
}

template <>
double FutharkStream<double>::dot() {
double res;
futhark_entry_f64_dot(this->ctx, &res, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b);
futhark_context_sync(this->ctx);
return res;
}

void listDevices(void)
{
std::cout << "Device selection not supported." << std::endl;
}

template class FutharkStream<float>;
template class FutharkStream<double>;
60 changes: 60 additions & 0 deletions src/futhark/FutharkStream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
// University of Bristol HPC
// Copyright (c) 2022 Troels Henriksen
// University of Copenhagen
//
// For full license terms please see the LICENSE file distributed with this
// source code

#pragma once

#include <iostream>
#include <stdexcept>

#include "Stream.h"
#include "babelstream.h"

#if defined(FUTHARK_BACKEND_c)
#define IMPLEMENTATION_STRING "Futhark (sequential)"
#elif defined(FUTHARK_BACKEND_multicore)
#define IMPLEMENTATION_STRING "Futhark (parallel CPU)"
#elif defined(FUTHARK_BACKEND_opencl)
#define IMPLEMENTATION_STRING "Futhark (OpencL)"
#elif defined(FUTHARK_BACKEND_cuda)
#define IMPLEMENTATION_STRING "Futhark (CUDA)"
#else
#define IMPLEMENTATION_STRING "Futhark (unknown backend)"
#endif

template <class T>
class FutharkStream : public Stream<T>
{
protected:
// Size of arrays
int array_size;
// For device selection.
std::string device;

// Futhark stuff
struct futhark_context_config *cfg;
struct futhark_context *ctx;

// Device side arrays
void* a;
void* b;
void* c;

public:
FutharkStream(const int, int);
~FutharkStream();

virtual void copy() override;
virtual void add() override;
virtual void mul() override;
virtual void triad() override;
virtual void nstream() override;
virtual T dot() override;

virtual void init_arrays(T initA, T initB, T initC) override;
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
};
62 changes: 62 additions & 0 deletions src/futhark/babelstream.fut
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
module type kernels = {
type t
val copy [n] : [n]t -> *[n]t
val mul [n] : t -> [n]t -> [n]t
val add [n] : [n]t -> [n]t -> [n]t
val triad [n] : t -> [n]t -> [n]t -> [n]t
val dot [n] : [n]t -> [n]t -> t
-- Uniqueness allows nstream to mutate the 'a' array.
val nstream [n] : t -> *[n]t -> [n]t -> [n]t -> [n]t
}

module kernels (P: real) : kernels with t = P.t = {
type t = P.t
def copy = copy
def mul scalar c = map (P.*scalar) c
def add = map2 (P.+)
def triad scalar b c = map2 (P.+) b (map (P.* scalar) c)
def dot a b = reduce (P.+) (P.i32 0) (map2 (P.*) a b)
def nstream scalar a b c = map2 (P.+) a (map2 (P.+) b (map (P.*scalar) c))
}

module f32_kernels = kernels f32
def f32_start_scalar : f32 = 0.4
entry f32_copy = f32_kernels.copy
entry f32_mul = f32_kernels.mul f32_start_scalar
entry f32_add = f32_kernels.add
entry f32_triad = f32_kernels.triad f32_start_scalar
entry f32_nstream = f32_kernels.nstream f32_start_scalar
entry f32_dot = f32_kernels.dot

module f64_kernels = kernels f64
def f64_start_scalar : f64 = 0.4
entry f64_copy = f64_kernels.copy
entry f64_mul = f64_kernels.mul f64_start_scalar
entry f64_add = f64_kernels.add
entry f64_triad = f64_kernels.triad f64_start_scalar
entry f64_nstream = f64_kernels.nstream f64_start_scalar
entry f64_dot = f64_kernels.dot

-- ==
-- entry: f32_copy f32_mul
-- random input { [33554432]f32 }

-- ==
-- entry: f32_add f32_dot f32_triad
-- random input { [33554432]f32 [33554432]f32 }

-- ==
-- entry: f32_nstream
-- random input { [33554432]f32 [33554432]f32 [33554432]f32 }

-- ==
-- entry: f64_copy f64_mul
-- random input { [33554432]f64 }

-- ==
-- entry: f64_add f64_dot f64_triad
-- random input { [33554432]f64 [33554432]f64 }

-- ==
-- entry: f64_nstream
-- random input { [33554432]f64 [33554432]f64 [33554432]f64 }
Loading

0 comments on commit 92fed70

Please sign in to comment.