Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make way for aarch64 #2

Merged
merged 38 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
a7a3e8d
Add ruy as a dependency
jerinphilip Aug 12, 2023
3f6c1e7
Moving things around, refactor begins
jerinphilip Aug 12, 2023
08afe7f
Add android-ndk build script
jerinphilip Aug 12, 2023
015bf8a
Setting i8 specific header for time being
jerinphilip Aug 12, 2023
290a51f
Have we regained compilation?
jerinphilip Aug 12, 2023
3e2ed1b
Using templates for ODR co-existence
jerinphilip Aug 12, 2023
1a93363
Ruy code in place, now I find the segfault
jerinphilip Aug 12, 2023
d0391d9
B_cols changes to indices.size(), but not sure if this sticks?
jerinphilip Aug 12, 2023
7132b44
Remove hacks around Wemb_QuantMultA
jerinphilip Aug 12, 2023
7a8372b
Only selected_B_cols, not B_cols
jerinphilip Aug 12, 2023
a171aef
Templating vectors...
jerinphilip Aug 12, 2023
234005b
Move vectorized_ to simd.hh
jerinphilip Aug 12, 2023
f1c1149
w8 -> w4
jerinphilip Aug 12, 2023
ecf4968
Finishing up android ndk compile
jerinphilip Aug 12, 2023
8cf0bc9
MacOS M1 patches
jerinphilip Aug 13, 2023
06206c7
Fix formatting after M1
jerinphilip Aug 13, 2023
9611c50
Add neon.h
jerinphilip Aug 13, 2023
c387d33
Updates to README.md
jerinphilip Aug 13, 2023
2714f58
Add submodule clone instructions in the README.md
jerinphilip Aug 13, 2023
7a234c3
Add Apple Accelerate BLAS
jerinphilip Aug 13, 2023
01d2c72
Refine as a cmake-target?
jerinphilip Aug 13, 2023
5e5c74f
Fix linking on actual M1
jerinphilip Aug 13, 2023
5c57a51
Fix formatting
jerinphilip Aug 13, 2023
cc339e6
Indirect CI to sh via scripts
jerinphilip Aug 13, 2023
1c8e507
Update android paths and try
jerinphilip Aug 13, 2023
cc2e477
fail-fast false
jerinphilip Aug 13, 2023
d9143fa
Add an 03-test.sh placeholder
jerinphilip Aug 13, 2023
7c27280
Empty, trigger CI
jerinphilip Aug 13, 2023
1dcdcbd
Fix path from CI
jerinphilip Aug 13, 2023
1c5b502
Quote prefix
jerinphilip Aug 13, 2023
f20023b
Simplify
jerinphilip Aug 13, 2023
70a4a10
Use matrix.name
jerinphilip Aug 13, 2023
e39e995
Remove the i8xi8 ugliness
jerinphilip Aug 13, 2023
ec14a6f
Tensori8Ops -> QMM
jerinphilip Aug 13, 2023
58e39db
Few leftover rename updates
jerinphilip Aug 13, 2023
1d955a6
Default ruy to off
jerinphilip Aug 14, 2023
6aab8c8
Update android cmake configure args
jerinphilip Aug 14, 2023
d4f406c
Remove __SSE__
jerinphilip Aug 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 11 additions & 5 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,15 @@ jobs:
build-test:
name: "build-test"
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
include:
- name: "ubuntu"
os: "ubuntu-latest"
- name: "macos"
os: "macos-latest"
- name: "android"
os: "ubuntu-latest"

runs-on: ${{ matrix.os }}

Expand All @@ -48,13 +55,12 @@ jobs:

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential cmake
bash scripts/ci/${{ matrix.name }}/01-setup.sh

- name: Build
run:
bash scripts/ci/build.sh
bash scripts/ci/${{ matrix.name }}/02-build.sh

- name: Test
run:
bash scripts/ci/test.sh
bash scripts/ci/${{ matrix.name }}/03-test.sh
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "3rd-party/intgemm"]
path = 3rd-party/intgemm
url = https://github.com/kpu/intgemm
[submodule "3rd-party/ruy"]
path = 3rd-party/ruy
url = https://github.com/google/ruy
3 changes: 3 additions & 0 deletions 3rd-party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ add_library(Sentencepiece::Sentencepiece ALIAS spiece_iface)

# intgemm
add_subdirectory(intgemm EXCLUDE_FROM_ALL)

# ruy
add_subdirectory(ruy EXCLUDE_FROM_ALL)
1 change: 1 addition & 0 deletions 3rd-party/ruy
Submodule ruy added at c04e5e
45 changes: 41 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,48 @@ option(WITH_ASAN "Enable AddressSanitizer in builds" OFF)
option(WITH_TESTS "Enable building tests (requires a few extra libraries)" OFF)

add_subdirectory(3rd-party)
find_package(BLAS REQUIRED)

set(SLIMT_EXTRA_COMPILE_OPTIONS -Wall -Wpedantic -Wextra -Wno-unknown-pragmas
-Werror -march=native)
set(SLIMT_EXTERNAL_LIBS Sentencepiece::Sentencepiece intgemm BLAS::BLAS)
set(SLIMT_COMPILE_OPTIONS -Wall -Wpedantic -Wextra -Wno-unknown-pragmas -Werror)
set(SLIMT_COMPILE_DEFINITIONS "")

option(WITH_INTGEMM "Use intgemm" ON)
option(WITH_RUY "Use ruy" OFF)
option(WITH_BLAS "Use BLAS. Otherwise moves to ruy" ON)

set(SLIMT_EXTERNAL_LIBS Sentencepiece::Sentencepiece)

if(WITH_BLAS)
find_package(BLAS)
set(SLIMT_EXTERNAL_LIBS ${SLIMT_EXTERNAL_LIBS} BLAS::BLAS)
set(SLIMT_COMPILE_DEFINITIONS ${SLIMT_COMPILE_DEFINITIONS} HAS_BLAS)
if(APPLE)
set(BLAS_VENDOR "Accelerate")
# see https://developer.apple.com/documentation/accelerate for more info you
# may need to install Xcode command line tools if you don't have them
# already (https://developer.apple.com/xcode/features/)
add_library(apple_accelerate INTERFACE)
target_link_libraries(apple_accelerate INTERFACE "-framework Accelerate")
# https://stackoverflow.com/a/58304566
target_include_directories(
apple_accelerate
INTERFACE
"/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Headers"
)
add_library(AppleLibs::accelerate ALIAS apple_accelerate)
set(SLIMT_EXTERNAL_LIBS ${SLIMT_EXTERNAL_LIBS} AppleLibs::accelerate)
endif(APPLE)
endif(WITH_BLAS)

if(WITH_INTGEMM)
set(SLIMT_EXTERNAL_LIBS ${SLIMT_EXTERNAL_LIBS} intgemm)
set(SLIMT_COMPILE_DEFINITIONS ${SLIMT_COMPILE_DEFINITIONS} HAS_INTGEMM)
set(SLIMT_COMPILE_OPTIONS ${SLIMT_COMPILE_OPTIONS} -march=native)
endif(WITH_INTGEMM)

if(WITH_RUY)
set(SLIMT_COMPILE_DEFINITIONS ${SLIMT_COMPILE_DEFINITIONS} HAS_RUY)
set(SLIMT_EXTERNAL_LIBS ${SLIMT_EXTERNAL_LIBS} ruy)
endif(WITH_RUY)

# cmake-format: off
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg" CACHE STRING "Flags used by the C++ compiler during profile builds." FORCE)
Expand Down
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ library with focus on machine translation, all the bells and whistles that come
with it are not necessary to run inference on client-machines (e.g: autograd,
multiple sequence-to-sequence architecture support, beam-search). For some use
cases like an input-method engine doing translation (see
[lemonade](https://github.com/jerinphilip/lemonade)). Single-thread operation
[lemonade](https://github.com/jerinphilip/lemonade)) - single-thread operation
existing along with other processes on the system suffices. This is the
motivation for this transplant repository. There's not much novel here except
easiness to wield. This repository is simply just the _tiny11_ part of marian.
Expand All @@ -30,10 +30,9 @@ The large-list of dependencies from bergamot-translator have currently been
reduced to:

* For `int8_t` matrix-multiply [intgemm](https://github.com/kpu/intgemm) (`x86_64`) or
[ruy](https://github.com/google/ruy) (`aarch64`, planned).
[ruy](https://github.com/google/ruy) (`aarch64`).
* For vocabulary - [sentencepiece](https://github.com/browsermt/sentencepiece).
* For `sgemm` - Whatever BLAS provider is found via CMake.
* OpenMP is used in `layer_norm`, and is pending removal.
* CLI11 (only a dependency for cmdline)

Source code is made public where basic functionality (text-translation) works.
Expand All @@ -42,6 +41,13 @@ relevant) is a work-in-progress. Contributions are welcome and appreciated.

## Getting started

Clone with submodules.

```
git clone --recursive https://github.com/jerinphilip/slimt.git
```

Configure and build.

```bash
# Configure
Expand Down
3 changes: 2 additions & 1 deletion app/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ void run(const Options &options) {
};

// Tokenize into numeric-ids using sentencepiece.
size_t max_sequence_length = 0;
Vocabulary vocab(mmap.vocab.data(), mmap.vocab.size());
ShortlistGenerator shortlist_generator( //
mmap.shortlist.data(), mmap.shortlist.size(), //
Expand Down Expand Up @@ -95,6 +94,7 @@ void run(const Options &options) {
};

std::string line;
size_t max_sequence_length = 0;
size_t token_count = 0;
size_t line_no = 0;
Sentences sentences;
Expand All @@ -114,6 +114,7 @@ void run(const Options &options) {
if (token_count > options.max_tokens_per_batch) {
batch_and_translate(sentences, max_sequence_length);
sentences.clear();
max_sequence_length = 0;
}
sentences.push_back(std::move(words));
max_sequence_length = candidate_max_sequence_length;
Expand Down
20 changes: 14 additions & 6 deletions app/test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <iostream>
#include <unordered_map>

#include "3rd-party/intgemm/intgemm/intgemm.h"
#include "slimt/slimt.hh"

namespace slimt {
Expand Down Expand Up @@ -267,6 +266,11 @@ void LayerNormalizationOp() {
SLIMT_TRACE(lhs_expected);
SLIMT_CHECK(lhs == lhs_expected);
}
} // namespace slimt

#ifdef HAS_INTGEMM
#include "3rd-party/intgemm/intgemm/intgemm.h"
namespace slimt {

void AffineIntgemm() {
// clang-format off
Expand Down Expand Up @@ -609,8 +613,8 @@ void AffineIntgemm() {

// Compute from the intgemm_affine function, used in the library.
// This ensures what we checked in there is consistent with what we expect.
Tensor y_whole = intgemm_affine(actual.A, actual.B, actual.bias, quant.a,
quant.b, "y_whole");
Tensor y_whole = qmm::affine(actual.A, actual.B, actual.bias, quant.a,
quant.b, "y_whole");
SLIMT_TRACE(y_whole.shape());
SLIMT_TRACE(y_expected.shape());
SLIMT_TRACE(mse(y_whole, y_expected));
Expand All @@ -626,7 +630,10 @@ void AffineIntgemm() {

// SLIMT_TRACE2(y_whole, y_expected);
}
} // namespace slimt
#endif

namespace slimt {
template <class Field>
struct Record {
Field model;
Expand Down Expand Up @@ -759,8 +766,10 @@ int main(int argc, char **argv) {
TEST_ENTRY(DotBatchedNodeOp), //
TEST_ENTRY(TransposeNodeOp), //
TEST_ENTRY(LayerNormalizationOp), //
TEST_ENTRY(AffineIntgemm), //
TEST_ENTRY(ShortlistGen) //
#ifdef HAS_INTGEMM
TEST_ENTRY(AffineIntgemm), //
#endif
TEST_ENTRY(ShortlistGen) //
});

// std::cout << "slimt test\n";
Expand Down Expand Up @@ -796,6 +805,5 @@ int main(int argc, char **argv) {
std::cerr << "Unknown test " << test << "\n";
std::exit(EXIT_FAILURE);
}

return 0;
}
5 changes: 5 additions & 0 deletions scripts/ci/android/01-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

sudo apt-get -y install ccache cmake
wget -c --quiet https://dl.google.com/android/repository/android-ndk-r23b-linux.zip
unzip -qq android-ndk-r23b-linux.zip
41 changes: 41 additions & 0 deletions scripts/ci/android/02-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash

function cmake-configure {
NDK=android-ndk-r23b
ABI="arm64-v8a"
MINSDK_VERSION=28
ANDROID_PLATFORM=android-28

mkdir -p build
pushd build

SLIMT_ARGS=(
-DWITH_RUY=ON
-DWITH_INTGEMM=OFF
-DWITH_BLAS=OFF
)

OTHER_ANDROID_ARGS=(
-DANDROID_ARM_NEON=TRUE
)
# Additionally list variables finally configured.
set -x
cmake -L \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake \
-DANDROID_TOOLCHAIN=clang \
-DANDROID_ABI=$ABI \
-DANDROID_PLATFORM=$ANDROID_PLATFORM \
-DANDROID_NATIVE_API_LEVEL=$MINSDKVERSION \
-DANDROID_TOOLCHAIN_NAME=arm-linux-androideabi-4.8 \
-DANDROID_STL=c++_static \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache \
"${SLIMT_ARGS[@]}" \
"${OTHER_ANDROID_ARGS[@]}" \
..
set +x
popd
}

cmake-configure
cmake --build build --target all
3 changes: 3 additions & 0 deletions scripts/ci/android/03-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

echo "Android is cross-compiled, no tests for now."
9 changes: 0 additions & 9 deletions scripts/ci/build.sh

This file was deleted.

3 changes: 3 additions & 0 deletions scripts/ci/macos/01-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

brew install cmake
9 changes: 9 additions & 0 deletions scripts/ci/macos/02-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

set -eo pipefail

# Configure
cmake -B build -S $PWD -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_INTGEMM=ON -DWITH_RUY=OFF

# Build
cmake --build build --target all
16 changes: 16 additions & 0 deletions scripts/ci/macos/03-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Install `bergamot` CLI via pip.
python3 -m pip install bergamot -f https://github.com/jerinphilip/bergamot-translator/releases/expanded_assets/latest

# Download en-de-tiny and de-en-tiny models.
bergamot download -m en-de-tiny

BROWSERMT="$HOME/Library/Application Support/bergamot/models/browsermt/"
PREFIX="$BROWSERMT/ende.student.tiny11"

MODEL=model.intgemm.alphas.bin
VOCAB=vocab.deen.spm
SHORTLIST=lex.s2t.bin

./build/bin/slimt --root "${PREFIX}" \
--model ${MODEL} --vocab ${VOCAB} --shortlist ${SHORTLIST} \
< data/sample.txt
4 changes: 4 additions & 0 deletions scripts/ci/ubuntu/01-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

sudo apt-get update
sudo apt-get install -y build-essential cmake
9 changes: 9 additions & 0 deletions scripts/ci/ubuntu/02-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

set -eo pipefail

# Configure
cmake -B build -S $PWD -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_INTGEMM=ON -DWITH_RUY=OFF

# Build
cmake --build build --target all
File renamed without changes.
4 changes: 3 additions & 1 deletion slimt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ add_library(
Tensor.cc
Model.cc
TensorOps.cc
QMM.cc
Batch.cc
Utils.cc
Shortlist.cc)

target_compile_options(slimt PRIVATE ${SLIMT_EXTRA_COMPILE_OPTIONS})
target_compile_options(slimt PRIVATE ${SLIMT_COMPILE_OPTIONS})
target_compile_definitions(slimt PRIVATE ${SLIMT_COMPILE_DEFINITIONS})
target_link_libraries(slimt PUBLIC ${SLIMT_EXTERNAL_LIBS})

if(WITH_ASAN)
Expand Down