diff --git a/.github/bot-base.sh b/.github/bot-base.sh new file mode 100644 index 00000000000..608cd6d6308 --- /dev/null +++ b/.github/bot-base.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -e + +API_HEADER="Accept: application/vnd.github.v3+json" +AUTH_HEADER="Authorization: token $GITHUB_TOKEN" + +api_get() { + curl -X GET -s -H "${AUTH_HEADER}" -H "${API_HEADER}" "$1" +} + +api_post() { + curl -X POST -s -H "${AUTH_HEADER}" -H "${API_HEADER}" "$1" -d "$2" +} + +api_patch() { + curl -X PATCH -s -H "${AUTH_HEADER}" -H "${API_HEADER}" "$1" -d "$2" +} + +api_delete() { + curl -X DELETE -s -H "${AUTH_HEADER}" -H "${API_HEADER}" "$1" +} diff --git a/.github/bot-pr-base.sh b/.github/bot-pr-base.sh new file mode 100644 index 00000000000..67f42e91bd0 --- /dev/null +++ b/.github/bot-pr-base.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +source .github/bot-base.sh + +echo -n "Collecting information on triggering PR" +PR_URL=$(jq -r .pull_request.url "$GITHUB_EVENT_PATH") +if [[ "$PR_URL" == "null" ]]; then + # if this was triggered by an issue comment: get PR and commenter + echo -n ............. + PR_URL=$(jq -er .issue.pull_request.url "$GITHUB_EVENT_PATH") + echo -n . + USER_LOGIN=$(jq -er ".comment.user.login" "$GITHUB_EVENT_PATH") + echo -n . + USER_URL=$(jq -er ".comment.user.url" "$GITHUB_EVENT_PATH") + echo -n . +else + # else it was triggered by a PR sync: get PR creator + USER_LOGIN=$(jq -er ".pull_request.user.login" "$GITHUB_EVENT_PATH") + echo -n . + USER_URL=$(jq -er ".pull_request.user.url" "$GITHUB_EVENT_PATH") + echo -n . +fi +echo -n . +PR_JSON=$(api_get $PR_URL) +echo -n . +PR_MERGED=$(echo "$PR_JSON" | jq -r .merged) +echo -n . +ISSUE_URL=$(echo "$PR_JSON" | jq -er ".issue_url") +echo -n . +BASE_REPO=$(echo "$PR_JSON" | jq -er .base.repo.full_name) +echo -n . +BASE_BRANCH=$(echo "$PR_JSON" | jq -er .base.ref) +echo -n . +HEAD_REPO=$(echo "$PR_JSON" | jq -er .head.repo.full_name) +echo -n . +HEAD_BRANCH=$(echo "$PR_JSON" | jq -er .head.ref) +echo . + +BASE_URL="https://${GITHUB_ACTOR}:${GITHUB_TOKEN}@github.com/$BASE_REPO" +HEAD_URL="https://${GITHUB_ACTOR}:${GITHUB_TOKEN}@github.com/$HEAD_REPO" + +JOB_URL="https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" + +bot_delete_comments_matching() { + local search_matching="$1" + COMMENTS=$(api_get "$ISSUE_URL/comments" | jq -r '.[] | select((.user.login == "ginkgo-bot") and (.body | startswith('"\"$search_matching\""'))) | .url') + for URL in $COMMENTS; do + api_delete "$URL" > /dev/null + done +} + +bot_comment() { + api_post "$ISSUE_URL/comments" "{\"body\":\"$1\"}" > /dev/null +} + +bot_error() { + echo "$1" + bot_comment "Error: $1" + exit 1 +} + +bot_get_all_changed_files() { + local pr_url="$1" + local pr_files="" + local page="1" + while true; do + # this api allows 100 items per page + # github action uses `bash -e`. The last empty page will leads jq error, use `|| :` to ignore the error. + local pr_page_files=$(api_get "$pr_url/files?&per_page=100&page=${page}" | jq -er '.[] | select(.status != "removed") | .filename' || :) + if [ "${pr_page_files}" = "" ]; then + break + fi + if [ ! "${pr_files}" = "" ]; then + # add the same new line format as jq output + pr_files="${pr_files}"$'\n' + fi + pr_files="${pr_files}${pr_page_files}" + page=$(( page + 1 )) + done + echo "${pr_files}" +} + +# collect info on the user that invoked the bot +echo -n "Collecting information on triggering user" +USER_JSON=$(api_get $USER_URL) +echo . + +USER_NAME=$(echo "$USER_JSON" | jq -r ".name") +if [[ "$USER_NAME" == "null" ]]; then + USER_NAME=$USER_LOGIN +fi +USER_EMAIL=$(echo "$USER_JSON" | jq -r ".email") +if [[ "$USER_EMAIL" == "null" ]]; then + USER_EMAIL="$USER_LOGIN@users.noreply.github.com" +fi +USER_COMBINED="$USER_NAME <$USER_EMAIL>" + +if [[ "$PR_MERGED" == "true" ]]; then + bot_error "PR already merged!" +fi diff --git a/.github/bot-pr-format-base.sh b/.github/bot-pr-format-base.sh new file mode 100644 index 00000000000..e72539a9d61 --- /dev/null +++ b/.github/bot-pr-format-base.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +source .github/bot-pr-base.sh + +EXTENSION_REGEX='\.(cuh?|hpp|hpp\.inc?|cpp)$' +FORMAT_HEADER_REGEX='^(benchmark|core|cuda|hip|include/ginkgo/core|omp|reference|dpcpp)/' +FORMAT_REGEX='^(common|examples|test)/' + +echo "Retrieving PR file list" +PR_FILES=$(bot_get_all_changed_files ${PR_URL}) +NUM=$(echo "${PR_FILES}" | wc -l) +echo "PR has ${NUM} changed files" + +TO_FORMAT="$(echo "$PR_FILES" | grep -E $EXTENSION_REGEX || true)" + +git remote add fork "$HEAD_URL" +git fetch fork "$HEAD_BRANCH" + +git config user.email "ginkgo.library@gmail.com" +git config user.name "ginkgo-bot" + +# save scripts from develop +pushd dev_tools/scripts +cp add_license.sh format_header.sh update_ginkgo_header.sh /tmp +popd + +# checkout current PR head +LOCAL_BRANCH=format-tmp-$HEAD_BRANCH +git checkout -b $LOCAL_BRANCH fork/$HEAD_BRANCH + +# restore files from develop +cp /tmp/add_license.sh dev_tools/scripts/ +cp /tmp/format_header.sh dev_tools/scripts/ +cp /tmp/update_ginkgo_header.sh dev_tools/scripts/ + +# format files +CLANG_FORMAT=clang-format-9 +dev_tools/scripts/add_license.sh +dev_tools/scripts/update_ginkgo_header.sh +for f in $(echo "$TO_FORMAT" | grep -E $FORMAT_HEADER_REGEX); do dev_tools/scripts/format_header.sh "$f"; done +for f in $(echo "$TO_FORMAT" | grep -E $FORMAT_REGEX); do "$CLANG_FORMAT" -i -style=file "$f"; done + +# restore formatting scripts so they don't appear in the diff +git checkout -- dev_tools/scripts/*.sh diff --git a/.github/check-format.sh b/.github/check-format.sh new file mode 100755 index 00000000000..635b64c943f --- /dev/null +++ b/.github/check-format.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +cp .github/bot-pr-format-base.sh /tmp +source /tmp/bot-pr-format-base.sh + +# check for changed files, replace newlines by \n +LIST_FILES=$(git diff --name-only | sed '$!s/$/\\n/' | tr -d '\n') + +git diff > /tmp/format.patch +mv /tmp/format.patch . + +bot_delete_comments_matching "Error: The following files need to be formatted" + +if [[ "$LIST_FILES" != "" ]]; then + MESSAGE="The following files need to be formatted:\n"'```'"\n$LIST_FILES\n"'```' + MESSAGE="$MESSAGE\nYou can find a formatting patch under **Artifacts** [here]" + MESSAGE="$MESSAGE($JOB_URL) or run "'`format!` if you have write access to Ginkgo' + bot_error "$MESSAGE" +fi diff --git a/.github/format.sh b/.github/format.sh new file mode 100755 index 00000000000..8829b719b8f --- /dev/null +++ b/.github/format.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +cp .github/bot-pr-format-base.sh /tmp +source /tmp/bot-pr-format-base.sh + +# check for changed files, replace newlines by \n +LIST_FILES=$(git diff --name-only | sed '$!s/$/\\n/' | tr -d '\n') + +# commit changes if necessary +if [[ "$LIST_FILES" != "" ]]; then + git commit -a -m "Format files + +Co-authored-by: $USER_COMBINED" + git push fork $LOCAL_BRANCH:$HEAD_BRANCH 2>&1 || bot_error "Cannot push formatted branch, are edits for maintainers allowed?" +fi diff --git a/.github/label.sh b/.github/label.sh new file mode 100755 index 00000000000..aa6b4f13f75 --- /dev/null +++ b/.github/label.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +source .github/bot-pr-base.sh + +echo "Retrieving PR file list" +PR_FILES=$(bot_get_all_changed_files ${PR_URL}) +NUM=$(echo "${PR_FILES}" | wc -l) +echo "PR has ${NUM} changed files" + +echo "Retrieving PR label list" +OLD_LABELS=$(api_get "$ISSUE_URL" | jq -er '[.labels | .[] | .name]') + + +label_match() { + if echo "$PR_FILES" | grep -qE "$2"; then + echo "+[\"$1\"]" + fi +} + +LABELS="[]" +LABELS=$LABELS$(label_match mod:core '(^core/|^include/)') +LABELS=$LABELS$(label_match mod:reference '^reference/') +LABELS=$LABELS$(label_match mod:openmp '^omp/') +LABELS=$LABELS$(label_match mod:cuda '(^cuda/|^common/)') +LABELS=$LABELS$(label_match mod:hip '(^hip/|^common/)') +LABELS=$LABELS$(label_match mod:dpcpp '^dpcpp/') +LABELS=$LABELS$(label_match reg:benchmarking '^benchmark/') +LABELS=$LABELS$(label_match reg:example '^examples/') +LABELS=$LABELS$(label_match reg:build '(cm|CM)ake') +LABELS=$LABELS$(label_match reg:ci-cd '(^\.github/|\.yml$)') +LABELS=$LABELS$(label_match reg:documentation '^doc/|\.md$') +LABELS=$LABELS$(label_match reg:testing /test/) +LABELS=$LABELS$(label_match reg:helper-scripts '^dev_tools/') +LABELS=$LABELS$(label_match type:factorization /factorization/) +LABELS=$LABELS$(label_match type:matrix-format /matrix/) +LABELS=$LABELS$(label_match type:multigrid /multigrid/) +LABELS=$LABELS$(label_match type:preconditioner /preconditioner/) +LABELS=$LABELS$(label_match type:reordering /reorder/) +LABELS=$LABELS$(label_match type:solver /solver/) +LABELS=$LABELS$(label_match type:stopping-criteria /stop/) + +# if all mod: labels present: replace by mod:all +LABELS=$(echo "$LABELS" | sed 's/.*mod:.*mod:.*mod:.*mod:.*mod:.*mod:[^"]*"\]/[]+["mod:all"]/') + +PATCH_BODY=$(jq -rn "{labels:($OLD_LABELS + $LABELS | unique)}") +api_patch "$ISSUE_URL" "$PATCH_BODY" > /dev/null diff --git a/.github/rebase.sh b/.github/rebase.sh new file mode 100755 index 00000000000..7367ed04fb1 --- /dev/null +++ b/.github/rebase.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +source .github/bot-pr-base.sh + +git remote add base "$BASE_URL" +git remote add fork "$HEAD_URL" + +git fetch base $BASE_BRANCH +git fetch fork $HEAD_BRANCH + +git config user.email "$USER_EMAIL" +git config user.name "$USER_NAME" + +LOCAL_BRANCH=rebase-tmp-$HEAD_BRANCH +git checkout -b $LOCAL_BRANCH fork/$HEAD_BRANCH + +bot_delete_comments_matching "Error: Rebase failed" + +# do the rebase +git rebase base/$BASE_BRANCH 2>&1 || bot_error "Rebase failed, see the related [Action]($JOB_URL) for details" + +# push back +git push --force-with-lease fork $LOCAL_BRANCH:$HEAD_BRANCH 2>&1 || bot_error "Cannot push rebased branch, are edits for maintainers allowed?" diff --git a/.github/workflows/bot-pr-comment.yml b/.github/workflows/bot-pr-comment.yml new file mode 100644 index 00000000000..0058e25208c --- /dev/null +++ b/.github/workflows/bot-pr-comment.yml @@ -0,0 +1,65 @@ +on: + issue_comment: + types: [created] +name: OnCommentPR +jobs: + label: + runs-on: ubuntu-latest + if: github.event.issue.pull_request != '' && github.event.comment.body == 'label!' && (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') + steps: + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + with: + ref: develop + - name: Add appropriate labels + env: + GITHUB_TOKEN: ${{ secrets.BOT_TOKEN }} + run: cp .github/label.sh /tmp && /tmp/label.sh + check_format: + name: check-format + runs-on: ubuntu-18.04 + if: github.event.issue.pull_request != '' && github.event.comment.body == 'check-format!' && (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') + steps: + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + with: + ref: develop + - name: Check for formatting changes + env: + GITHUB_TOKEN: ${{ secrets.BOT_TOKEN }} + run: cp .github/check-format.sh /tmp && /tmp/check-format.sh + - name: Upload code formatting patch + if: failure() + uses: actions/upload-artifact@v2 + with: + name: patch + path: format.patch + format: + name: format + runs-on: ubuntu-18.04 + if: github.event.issue.pull_request != '' && github.event.comment.body == 'format!' && (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') + steps: + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + with: + ref: develop + persist-credentials: false + - name: Commit formatting changes + env: + GITHUB_TOKEN: ${{ secrets.BOT_TOKEN }} + run: cp .github/format.sh /tmp && /tmp/format.sh + rebase: + name: rebase + if: github.event.issue.pull_request != '' && github.event.comment.body == 'rebase!' && (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') + runs-on: ubuntu-latest + steps: + - name: Checkout the latest code + uses: actions/checkout@v2 + with: + ref: develop + fetch-depth: 0 + persist-credentials: false + - name: Automatic Rebase + env: + GITHUB_TOKEN: ${{ secrets.BOT_TOKEN }} + run: cp .github/rebase.sh /tmp && /tmp/rebase.sh diff --git a/.github/workflows/bot-pr-created.yml b/.github/workflows/bot-pr-created.yml new file mode 100644 index 00000000000..aa306e1f3aa --- /dev/null +++ b/.github/workflows/bot-pr-created.yml @@ -0,0 +1,18 @@ +on: + pull_request_target: + types: [opened] + +name: OnNewPR +jobs: + label: + runs-on: ubuntu-latest + if: github.event.pull_request.author_association == 'COLLABORATOR' || github.event.pull_request.author_association == 'MEMBER' || github.event.pull_request.author_association == 'OWNER' + steps: + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + with: + ref: develop + - name: Add appropriate labels + env: + GITHUB_TOKEN: ${{ secrets.BOT_TOKEN }} + run: .github/label.sh diff --git a/.github/workflows/bot-pr-updated.yml b/.github/workflows/bot-pr-updated.yml new file mode 100644 index 00000000000..c83d1866b57 --- /dev/null +++ b/.github/workflows/bot-pr-updated.yml @@ -0,0 +1,24 @@ +on: + pull_request_target: + types: [opened,synchronize] + +name: OnSyncPR +jobs: + check-format: + runs-on: ubuntu-18.04 + if: github.event.pull_request.author_association == 'COLLABORATOR' || github.event.pull_request.author_association == 'MEMBER' || github.event.pull_request.author_association == 'OWNER' + steps: + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + with: + ref: develop + - name: Check for formatting changes + env: + GITHUB_TOKEN: ${{ secrets.BOT_TOKEN }} + run: cp .github/check-format.sh /tmp && /tmp/check-format.sh + - name: Upload code formatting patch + if: failure() + uses: actions/upload-artifact@v2 + with: + name: patch + path: format.patch diff --git a/.github/workflows/joss.yml b/.github/workflows/joss.yml index 67d52410466..92aca560633 100644 --- a/.github/workflows/joss.yml +++ b/.github/workflows/joss.yml @@ -11,7 +11,9 @@ jobs: runs-on: [ubuntu-latest] steps: - - uses: actions/checkout@v2 + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + - name: setup run: sudo apt-get install texlive-xetex pandoc pandoc-citeproc - name: info diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 59935e07273..5807599e800 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -1,6 +1,13 @@ name: OSX-build -on: [push] +on: + push: + workflow_dispatch: + inputs: + debug_enabled: + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false jobs: osx-clang-omp: @@ -8,26 +15,36 @@ jobs: fail-fast: false matrix: config: - - {shared: "ON", build_type: "Debug", name: "omp/debug/shared"} - - {shared: "OFF", build_type: "Release", name: "omp/release/static"} + - {shared: "ON", build_type: "Debug", name: "omp/debug/shared", "mixed": "OFF"} + - {shared: "OFF", build_type: "Release", name: "omp/release/static", "mixed": "ON"} name: ${{ matrix.config.name }} runs-on: [macos-latest] steps: - - uses: actions/checkout@v2 + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + - name: setup - run: brew install libomp + run: | + brew install libomp + - name: info run: | g++ -v cmake --version + + - name: Debug over SSH (tmate) + uses: mxschmitt/action-tmate@v3.5 + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }} + - name: configure run: | mkdir build cd build - cmake .. -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} + cmake .. -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }} make -j8 ctest -j10 --output-on-failure + - name: install run: | cd build diff --git a/.github/workflows/windows-build.yml b/.github/workflows/windows-build.yml deleted file mode 100644 index c1bda6bc2f3..00000000000 --- a/.github/workflows/windows-build.yml +++ /dev/null @@ -1,169 +0,0 @@ -name: Windows-build - -on: [push] - -jobs: - windows_cuda: - strategy: - fail-fast: false - matrix: - config: - - {version: "10.2.89.20191206", name: "cuda102/release/shared"} - - {version: "latest", name: "cuda-latest/release/shared"} - name: msvc/${{ matrix.config.name }} (only compile) - runs-on: [windows-latest] - steps: - - uses: actions/checkout@v2 - - name: setup (versioned) - if: matrix.config.version != 'latest' - run: | - choco install cuda --version=${{ matrix.config.version }} -y - - name: setup (latest) - if: matrix.config.version == 'latest' - run: | - choco install cuda -y - - name: configure - run: | - $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.." - Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" - refreshenv - mkdir build - cd build - $env:PATH="$pwd\windows_shared_library;$env:PATH" - cmake -DGINKGO_BUILD_CUDA=ON -DGINKGO_BUILD_OMP=OFF .. - cmake --build . -j4 --config Release - - windows_ref: - strategy: - fail-fast: false - matrix: - config: - - {shared: "ON", build_type: "Debug", name: "reference/debug/shared"} - - {shared: "OFF", build_type: "Release", name: "reference/release/static"} - # Debug static needs too much storage - # - {shared: "OFF", build_type: "Debug", name: "reference/debug/static"} - name: msvc/${{ matrix.config.name }} - runs-on: [windows-latest] - steps: - - uses: actions/checkout@v2 - - name: shared_env - if: matrix.config.shared == 'ON' - run: | - echo "::set-env name=origin_path::$env:PATH" - echo "::add-path::$pwd\build\windows_shared_library" - - name: debug_env - if: matrix.config.build_type == 'Debug' - run: | - echo "::set-env name=CXXFLAGS::/bigobj" - - name: configure - run: | - mkdir build - cd build - cmake -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DGINKGO_BUILD_CUDA=OFF -DGINKGO_BUILD_OMP=OFF .. - cmake --build . -j4 --config ${{ matrix.config.build_type }} - ctest . -C ${{ matrix.config.build_type }} --output-on-failure - - name: install_shared_env - if: matrix.config.shared == 'ON' - run: | - echo "::set-env name=PATH::C:\Program Files (x86)\Ginkgo\lib;$env:origin_path" - - name: install - run: | - cd build - cmake --install . --config ${{ matrix.config.build_type }} - cmake --build . --target test_install --config ${{ matrix.config.build_type }} - windows_mingw: - strategy: - fail-fast: false - matrix: - config: - - {shared: "ON", build_type: "Debug", name: "omp/debug/shared", cflags: "-O1"} - - {shared: "OFF", build_type: "Release", name: "omp/release/static", cflags: ""} - name: mingw/${{ matrix.config.name }} - runs-on: [windows-latest] - steps: - - uses: actions/checkout@v2 - - name: shared_env - if: matrix.config.shared == 'ON' - run: | - echo "::set-env name=origin_path::$env:PATH" - echo "::add-path::$pwd\build\windows_shared_library" - - name: debug_env - if: matrix.config.build_type == 'Debug' - run: | - echo "::set-env name=CXXFLAGS::-Wa,-mbig-obj" - - name: configure - # Use cmd to remove the path easily - run: | - set PATH=%PATH:C:\Program Files\Git\bin;=% - set PATH=%PATH:C:\Program Files\Git\usr\bin;=% - bcdedit /set IncreaseUserVa 3072 - editbin /LARGEADDRESSAWARE "C:\Program Files\Git\mingw64\bin\cc1plus.exe" - mkdir build - cd build - cmake -G "MinGW Makefiles" -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_COMPILER_FLAGS=${{ matrix.config.cflags }} .. - cmake --build . -j4 - ctest . --output-on-failure - shell: cmd - - name: install_shared_env - if: matrix.config.shared == 'ON' - run: | - echo "::set-env name=PATH::C:\Program Files (x86)\Ginkgo\lib;$env:origin_path" - - name: install - run: | - set PATH=%PATH:C:\Program Files\Git\bin;=% - set PATH=%PATH:C:\Program Files\Git\usr\bin;=% - cd build - cmake --install . - cmake --build . --target test_install - shell: cmd - - windows_cygwin: - strategy: - fail-fast: false - matrix: - config: - - {shared: "ON", build_type: "Debug", name: "omp/debug/shared", cflags: "-O1"} - - {shared: "OFF", build_type: "Release", name: "omp/release/static", cflags: ""} - name: cygwin/${{ matrix.config.name }} - runs-on: [windows-latest] - steps: - - run: git config --global core.autocrlf input - - uses: actions/checkout@v2 - - name: setup - run: | - choco install cygwin -y - choco install cyg-get -y - cyg-get cmake make gcc-g++ git - - name: shared_static_env - run: | - echo "::set-env name=shared_ON_path::;$pwd\build\windows_shared_library" - echo "::set-env name=shared_OFF_path::" - - name: debug_env - if: matrix.config.build_type == 'Debug' - run: | - echo "::set-env name=CXXFLAGS::-Wa,-mbig-obj" - - name: configure - run: | - path C:\tools\cygwin\bin%shared_${{ matrix.config.shared }}_path% - mkdir build - cd build - bash -c "cmake -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_COMPILER_FLAGS=${{ matrix.config.cflags }} .." - bash -c "make -j4" - bash -c "ctest . --output-on-failure" - shell: cmd - - name: install_shared - if: matrix.config.shared == 'ON' - run: | - path C:\tools\cygwin\bin - cd build - bash -c "make install" - bash -c "export PATH=/usr/local/lib:$PATH && make test_install" - shell: cmd - - name: install_static - if: matrix.config.shared == 'OFF' - run: | - path C:\tools\cygwin\bin - cd build - bash -c "make install" - bash -c "make test_install" - shell: cmd diff --git a/.github/workflows/windows-cygwin.yml b/.github/workflows/windows-cygwin.yml new file mode 100644 index 00000000000..253a5dd38f6 --- /dev/null +++ b/.github/workflows/windows-cygwin.yml @@ -0,0 +1,54 @@ +name: Windows-Cygwin + +on: + push: + workflow_dispatch: + inputs: + debug_enabled: + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false + +jobs: + windows_cygwin: + strategy: + fail-fast: false + matrix: + config: + - {shared: "OFF", build_type: "Release", name: "omp/release/static", cflags: ""} + name: cygwin/${{ matrix.config.name }} + runs-on: [windows-latest] + steps: + - run: git config --global core.autocrlf input + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + + - name: setup + run: | + choco install cygwin -y + choco install cyg-get -y + cyg-get cmake make gcc-g++ git + + - name: Debug over SSH (tmate) + uses: mxschmitt/action-tmate@v3.5 + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }} + with: + limit-access-to-actor: true + + - name: configure + run: | + path C:\tools\cygwin\bin + mkdir build + cd build + bash -c "cmake -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_COMPILER_FLAGS=${{ matrix.config.cflags }} .." + bash -c "make -j4" + bash -c "ctest . --output-on-failure" + shell: cmd + + - name: install + run: | + path C:\tools\cygwin\bin + cd build + bash -c "make install" + bash -c "export PATH=/usr/local/bin:$PATH && make test_install" + shell: cmd diff --git a/.github/workflows/windows-mingw.yml b/.github/workflows/windows-mingw.yml new file mode 100644 index 00000000000..7e774c3cc21 --- /dev/null +++ b/.github/workflows/windows-mingw.yml @@ -0,0 +1,52 @@ +name: Windows-MinGW + +on: + push: + workflow_dispatch: + inputs: + debug_enabled: + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false + +jobs: + windows_mingw: + strategy: + fail-fast: false + matrix: + config: + - {shared: "OFF", build_type: "Release", name: "omp/release/static", cflags: ""} + name: mingw/${{ matrix.config.name }} + runs-on: [windows-latest] + steps: + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + + - name: Debug over SSH (tmate) + uses: mxschmitt/action-tmate@v3.5 + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }} + with: + limit-access-to-actor: true + + - name: configure + # Use cmd to remove the path easily + run: | + bcdedit /set IncreaseUserVa 3072 + editbin /LARGEADDRESSAWARE "C:\Program Files\Git\mingw64\bin\cc1plus.exe" + path %PATH:C:\Program Files\Git\bin;=% + path %PATH:C:\Program Files\Git\usr\bin;=% + mkdir build + cd build + cmake -G "MinGW Makefiles" -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_COMPILER_FLAGS=${{ matrix.config.cflags }} .. + cmake --build . -j4 + ctest . --output-on-failure + shell: cmd + + - name: install + run: | + set PATH=%PATH:C:\Program Files\Git\bin;=% + set PATH=%PATH:C:\Program Files\Git\usr\bin;=%;C:\Program Files (x86)\Ginkgo\bin + cd build + cmake --install . + cmake --build . --target test_install + shell: cmd diff --git a/.github/workflows/windows-msvc-cuda.yml b/.github/workflows/windows-msvc-cuda.yml new file mode 100644 index 00000000000..e2c0a3b8c43 --- /dev/null +++ b/.github/workflows/windows-msvc-cuda.yml @@ -0,0 +1,49 @@ +name: Windows-MSVC-CUDA (compile-only) + +on: + push: + workflow_dispatch: + inputs: + debug_enabled: + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false + +jobs: + windows_cuda: + strategy: + fail-fast: false + matrix: + config: + - {version: "latest", name: "cuda-latest/release/shared", "mixed": "ON"} + name: msvc/${{ matrix.config.name }} (only compile) + runs-on: [windows-latest] + + steps: + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + - name: setup (versioned) + if: matrix.config.version != 'latest' + run: | + choco install cuda --version=${{ matrix.config.version }} -y + + - name: setup (latest) + if: matrix.config.version == 'latest' + run: | + choco install cuda -y + + - name: Debug over SSH (tmate) + uses: mxschmitt/action-tmate@v3.5 + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }} + with: + limit-access-to-actor: true + + - name: configure + run: | + $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.." + Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" + refreshenv + mkdir build + cd build + cmake -T version=14.25 -DGINKGO_BUILD_CUDA=ON -DGINKGO_BUILD_OMP=OFF -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }} -DGINKGO_CUDA_ARCHITECTURES=60 .. + cmake --build . -j4 --config Release diff --git a/.github/workflows/windows-msvc-ref.yml b/.github/workflows/windows-msvc-ref.yml new file mode 100644 index 00000000000..aa11a2ffba1 --- /dev/null +++ b/.github/workflows/windows-msvc-ref.yml @@ -0,0 +1,47 @@ +name: Windows-MSVC-Reference + +on: + push: + workflow_dispatch: + inputs: + debug_enabled: + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false + +jobs: + windows_ref: + strategy: + fail-fast: false + matrix: + config: + - {shared: "ON", build_type: "Debug", name: "reference/debug/shared", "mixed": "ON"} + - {shared: "OFF", build_type: "Release", name: "reference/release/static", "mixed": "OFF"} + # Debug static needs too much storage + # - {shared: "OFF", build_type: "Debug", name: "reference/debug/static"} + name: msvc/${{ matrix.config.name }} + runs-on: [windows-latest] + steps: + - name: Checkout the latest code (shallow clone) + uses: actions/checkout@v2 + + - name: Debug over SSH (tmate) + uses: mxschmitt/action-tmate@v3.5 + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }} + with: + limit-access-to-actor: true + + - name: configure + run: | + mkdir build + cd build + cmake -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_CXX_FLAGS_DEBUG="/MDd /Zi /Ob1 /Od /RTC1" -DGINKGO_BUILD_CUDA=OFF -DGINKGO_BUILD_OMP=OFF -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }} .. + cmake --build . -j4 --config ${{ matrix.config.build_type }} + ctest . -C ${{ matrix.config.build_type }} --output-on-failure + + - name: install + run: | + $env:PATH="$env:PATH;C:\Program Files (x86)\Ginkgo\bin" + cd build + cmake --install . --config ${{ matrix.config.build_type }} + cmake --build . --target test_install --config ${{ matrix.config.build_type }} diff --git a/.gitignore b/.gitignore index d8b09ffa944..af0a88ef513 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ # Created by https://www.gitignore.io/api/ -###CMake +### CMake CMakeCache.txt CMakeFiles CMakeScripts @@ -12,10 +12,21 @@ compile_commands.json CTestTestfile.cmake build +### IDE +# Clion +.idea + +# Eclipse +.cproject +.project + +# Visual studio code +.vscode + ### Generated header file include/config.hpp -###C++ +### C++ # Prerequisites *.d @@ -49,10 +60,25 @@ include/config.hpp *.out *.app -#other +# Others diff.patch GRTAGS GSYMS GTAGS GPATH .gitignore + +### OS +# temporary and backup files +*~ +*# +*.swp + +# MacOS self-generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c2ab6d813a1..2cfb0faf44f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,7 @@ stages: + - init-status - sync + - trigger_pipeline - build - code_quality - deploy @@ -9,6 +11,11 @@ stages: - benchmark-omp - benchmark-reference - on-failure + - finalize-status + +include: + - local: '.gitlab/condition.yml' + - local: '.gitlab/image.yml' # Templates with reasonable defaults for builds and tests .variables_template: &default_variables @@ -22,14 +29,22 @@ stages: BUILD_OMP: "OFF" BUILD_CUDA: "OFF" BUILD_HIP: "OFF" + BUILD_HWLOC: "ON" + FAST_TESTS: "OFF" + DPCPP_SINGLE_MODE: "OFF" + MIXED_PRECISION: "ON" + RUN_EXAMPLES: "OFF" CONFIG_LOG: "ON" CXX_FLAGS: "" EXTRA_CMAKE_FLAGS: "" + EXPORT_BUILD_DIR: "OFF" + CI_PROJECT_DIR_SUFFIX: "" .before_script_template: &default_before_script - export NUM_CORES=${CI_PARALLELISM} - export OMP_NUM_THREADS=${NUM_CORES} - - export CUDA_VISIBLE_DEVICES=0 + - export CCACHE_DIR=${CCACHE_DIR} + - export CCACHE_MAXSIZE=${CCACHE_MAXSIZE} .before_script_git_template: &git_before_script - eval $(ssh-agent -s) @@ -50,7 +65,7 @@ stages: CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH}; CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER}); fi - - cmake ${CI_PROJECT_DIR} + - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} -GNinja -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER} -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} @@ -59,9 +74,16 @@ stages: -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} -DGINKGO_BUILD_HIP=${BUILD_HIP} + -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON + -DGINKGO_FAST_TESTS=${FAST_TESTS} + -DGINKGO_MIXED_PRECISION=${MIXED_PRECISION} + -DGINKGO_RUN_EXAMPLES=${RUN_EXAMPLES} -DGINKGO_CONFIG_LOG_DETAILED=${CONFIG_LOG} - - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} + -DGINKGO_DPCPP_SINGLE_MODE=${DPCPP_SINGLE_MODE} + -DGINKGO_EXPORT_BUILD_DIR=${EXPORT_BUILD_DIR} + - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install + - if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi dependencies: [] except: - schedules @@ -76,7 +98,9 @@ stages: CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH}; CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER}); fi - - cmake ${CI_PROJECT_DIR} + - if [ -n "${SYCL_DEVICE_TYPE}" ]; then export SYCL_DEVICE_TYPE; fi + - if [ -n "${SYCL_DEVICE_FILTER}" ]; then export SYCL_DEVICE_FILTER; fi + - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX} -GNinja -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER} -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} @@ -85,20 +109,100 @@ stages: -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE} -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA} -DGINKGO_BUILD_HIP=${BUILD_HIP} + -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC} -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON + -DGINKGO_FAST_TESTS=${FAST_TESTS} + -DGINKGO_MIXED_PRECISION=${MIXED_PRECISION} -DGINKGO_CONFIG_LOG_DETAILED=${CONFIG_LOG} + -DGINKGO_DPCPP_SINGLE_MODE=${DPCPP_SINGLE_MODE} + -DGINKGO_RUN_EXAMPLES=${RUN_EXAMPLES} + -DGINKGO_EXPORT_BUILD_DIR=${EXPORT_BUILD_DIR} - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install - | (( $(ctest -N | tail -1 | sed 's/Total Tests: //') != 0 )) || exit 1 - ctest -V - ninja test_install + - pushd test/test_install + - ninja install + - popd + - | + if [ "${RUN_EXAMPLES}" == "ON" ]; then + export EX_ARG="reference" + ninja run_all_examples + ninja validate_all_examples + if [ "{BUILD_OMP}" == "ON" ]; then + export EX_ARG="omp" + ninja run_all_examples + ninja validate_all_examples + fi + if [ "{BUILD_CUDA}" == "ON" ]; then + export EX_ARG="cuda" + ninja run_all_examples + ninja validate_all_examples + fi + if [ "{BUILD_HIP}" == "ON" ]; then + export EX_ARG="hip" + ninja run_all_examples + ninja validate_all_examples + fi + fi + - if [ -n "${SYCL_DEVICE_TYPE}" ]; then unset SYCL_DEVICE_TYPE; fi + - if [ -n "${SYCL_DEVICE_FILTER}" ]; then unset SYCL_DEVICE_FILTER; fi + - if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi dependencies: [] except: - schedules +status_pending: + stage: init-status + extends: + - .pr_condition + - .use_gko-nocuda-gnu9-llvm8 + variables: + STATUS_CONTEXT: "quick" + script: | + curl -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ + https://api.github.com/repos/ginkgo-project/ginkgo/statuses/${CI_COMMIT_SHA} \ + -d "{\"state\":\"pending\",\"context\":\"ci/gitlab/${STATUS_CONTEXT}\",\"target_url\":\"${CI_PIPELINE_URL}\"}" + +status_success: + stage: finalize-status + extends: + - .pr_condition + - .use_gko-nocuda-gnu9-llvm8 + variables: + STATUS_CONTEXT: "quick" + # we always exit with the code 3 such that it will process when retrying + script: | + curl -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ + https://api.github.com/repos/ginkgo-project/ginkgo/statuses/${CI_COMMIT_SHA} \ + -d "{\"state\":\"success\",\"context\":\"ci/gitlab/${STATUS_CONTEXT}\",\"target_url\":\"${CI_PIPELINE_URL}\"}" + exit 3 + allow_failure: + exit_codes: 3 + +status_failure: + stage: finalize-status + extends: + - .pr_condition + - .use_gko-nocuda-gnu9-llvm8 + variables: + STATUS_CONTEXT: "quick" + # we always exit with the code 3 such that it will process when retrying + script: | + curl -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ + https://api.github.com/repos/ginkgo-project/ginkgo/statuses/${CI_COMMIT_SHA} \ + -d "{\"state\":\"failure\",\"context\":\"ci/gitlab/${STATUS_CONTEXT}\",\"target_url\":\"${CI_PIPELINE_URL}\"}" + exit 3 + when: on_failure + allow_failure: + exit_codes: 3 + + sync: stage: sync - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .use_gko-nocuda-gnu9-llvm8 variables: GIT_STRATEGY: none PRIVATE_REPO: git@gitlab.com:ginkgo-project/ginkgo.git @@ -115,153 +219,138 @@ sync: - develop except: - schedules - tags: - - private_ci - - cpu +trigger_pipeline: + stage: trigger_pipeline + extends: + - .pr_condition + - .use_gko-nocuda-gnu9-llvm8 + variables: + STATUS_CONTEXT: "quick" + script: + - PR_ID=$(curl -s "https://api.github.com/search/issues?q=sha:${CI_COMMIT_SHA}" + | jq '.items[0].number') + - | + if [[ "${PR_ID}" != "null" ]]; then + echo "Finding the corresponding Pull Request - ${PR_ID}" + echo "Checking whether the PR contains ST:ready-to-merge or ST:run-full-test labels" + ENABLE_FULL_PIPELINE=$(curl -X GET -s -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${BOT_STATUS_TOKEN}" \ + "https://api.github.com/repos/ginkgo-project/ginkgo/issues/${PR_ID}" | jq -r \ + 'any( [.labels | .[] | .name ] | .[] ; . == "ST:ready-to-merge" or . == "ST:run-full-test")') + if [[ "${ENABLE_FULL_PIPELINE}" == "true" ]]; then + echo "trigger full pipeline" + curl -X POST -F token=${CI_JOB_TOKEN} -F "ref=${CI_COMMIT_REF_NAME}" -F "variables[STATUS_CONTEXT]=full" \ + https://gitlab.com/api/v4/projects/6431537/trigger/pipeline + else + echo "does not contain required labels" + fi + else + echo "Can not find the corresponding Pull Request" + fi + # Override variables condition + only: + variables: + - $RUN_CI_TAG && $STATUS_CONTEXT == "quick" # Build jobs +# Job with example runs. build/cuda90/gcc/all/debug/shared: - <<: *default_build_with_test - image: localhost:5000/gko-cuda90-gnu5-llvm39 + <<: *default_build + extends: + - .quick_test_condition + - .use_gko-cuda90-gnu5-llvm39 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" BUILD_TYPE: "Debug" + FAST_TESTS: "ON" + RUN_EXAMPLES: "ON" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu build/cuda90/clang/all/release/static: - <<: *default_build_with_test - image: localhost:5000/gko-cuda90-gnu5-llvm39 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda90-gnu5-llvm39 variables: <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu # cuda 9.1 and friends build/cuda91/gcc/all/debug/static: - <<: *default_build_with_test - image: localhost:5000/gko-cuda91-gnu6-llvm40 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda91-gnu6-llvm40 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" BUILD_TYPE: "Debug" + FAST_TESTS: "ON" BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu build/cuda91/clang/all/release/shared: - <<: *default_build_with_test - image: localhost:5000/gko-cuda91-gnu6-llvm40 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda91-gnu6-llvm40 variables: <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" BUILD_TYPE: "Release" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu # cuda 9.2 and friends -build/cuda92/gcc/all/release/debug: +build/cuda92/gcc/all/release/shared: <<: *default_build_with_test - image: localhost:5000/gko-cuda92-gnu7-llvm50-intel2017 + extends: + - .quick_test_condition + - .use_gko-cuda92-gnu7-llvm50-intel2017 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" BUILD_TYPE: "Release" - CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu - -build/cuda92/intel/cuda/release/static: - <<: *default_build_with_test - image: localhost:5000/gko-cuda92-gnu7-llvm50-intel2017 - variables: - <<: *default_variables - C_COMPILER: "icc" - CXX_COMPILER: "icpc" - BUILD_OMP: "ON" - BUILD_CUDA: "ON" - BUILD_TYPE: "Release" - BUILD_SHARED_LIBS: "OFF" - CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu + CUDA_ARCH: 61 # cuda 10.0 and friends +# Make sure that our jobs run when using self-installed +# third-party HWLOC. build/cuda100/gcc/all/debug/shared: - <<: *default_build_with_test - image: localhost:5000/gko-cuda100-gnu7-llvm60-intel2018 + <<: *default_build + extends: + - .quick_test_condition + - .use_gko-cuda100-gnu7-llvm60-intel2018 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" BUILD_TYPE: "Debug" + FAST_TESTS: "ON" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu +# Make sure that our jobs run when HWLOC is +# forcibly switched off build/cuda100/clang/all/release/static: - <<: *default_build_with_test - image: localhost:5000/gko-cuda100-gnu7-llvm60-intel2018 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda100-gnu7-llvm60-intel2018 variables: <<: *default_variables C_COMPILER: "clang" @@ -269,20 +358,16 @@ build/cuda100/clang/all/release/static: BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" + BUILD_HWLOC: "OFF" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu build/cuda100/intel/cuda/release/shared: - <<: *default_build_with_test - image: localhost:5000/gko-cuda100-gnu7-llvm60-intel2018 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda100-gnu7-llvm60-intel2018 variables: <<: *default_variables C_COMPILER: "icc" @@ -291,36 +376,43 @@ build/cuda100/intel/cuda/release/shared: BUILD_CUDA: "ON" BUILD_TYPE: "Release" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu + +# Build CUDA NVIDIA without omp +build/cuda100/intel/cuda_wo_omp/release/shared: + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda100-gnu7-llvm60-intel2018 + variables: + <<: *default_variables + C_COMPILER: "icc" + CXX_COMPILER: "icpc" + BUILD_CUDA: "ON" + BUILD_HIP: "ON" + BUILD_HWLOC: "OFF" + BUILD_TYPE: "Release" + CUDA_ARCH: 35 # cuda 10.1 and friends build/cuda101/gcc/all/debug/shared: - <<: *default_build_with_test - image: localhost:5000/gko-cuda101-gnu8-llvm7-intel2019 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda101-gnu8-llvm7-intel2019 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" BUILD_TYPE: "Debug" + FAST_TESTS: "ON" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu build/cuda101/clang/all/release/static: - <<: *default_build_with_test - image: localhost:5000/gko-cuda101-gnu8-llvm7-intel2019 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda101-gnu8-llvm7-intel2019 variables: <<: *default_variables C_COMPILER: "clang" @@ -331,37 +423,27 @@ build/cuda101/clang/all/release/static: BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu # clang-cuda with cuda 10.1 and friends -build/clang-cuda101/gcc/all/release/shared: - <<: *default_build_with_test - image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019 +build/clang-cuda101/gcc/cuda/release/shared: + <<: *default_build + extends: + - .quick_test_condition + - .use_gko-cuda101-gnu8-llvm10-intel2019 variables: <<: *default_variables CUDA_COMPILER: "clang++" BUILD_OMP: "ON" BUILD_CUDA: "ON" - BUILD_HIP: "ON" + BUILD_HIP: "OFF" BUILD_TYPE: "Release" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu build/clang-cuda101/clang/cuda/debug/static: - <<: *default_build_with_test - image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda101-gnu8-llvm10-intel2019 variables: <<: *default_variables C_COMPILER: "clang" @@ -370,38 +452,34 @@ build/clang-cuda101/clang/cuda/debug/static: BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_TYPE: "Debug" + FAST_TESTS: "ON" BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu # cuda 10.2 and friends + +# works when there is no hwloc and tpl hwloc is also switched off. build/cuda102/gcc/all/debug/shared: - <<: *default_build_with_test - image: localhost:5000/gko-cuda102-gnu8-llvm8-intel2019 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda102-gnu8-llvm8-intel2019 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" BUILD_TYPE: "Debug" + FAST_TESTS: "ON" + BUILD_HWLOC: "OFF" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu +# Use TPL hwloc when no system hwloc is available build/cuda102/clang/all/release/static: - <<: *default_build_with_test - image: localhost:5000/gko-cuda102-gnu8-llvm8-intel2019 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda102-gnu8-llvm8-intel2019 variables: <<: *default_variables C_COMPILER: "clang" @@ -412,17 +490,12 @@ build/cuda102/clang/all/release/static: BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu build/cuda102/intel/cuda/debug/static: - <<: *default_build_with_test - image: localhost:5000/gko-cuda102-gnu8-llvm8-intel2019 + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda102-gnu8-llvm8-intel2019 variables: <<: *default_variables C_COMPILER: "icc" @@ -430,37 +503,29 @@ build/cuda102/intel/cuda/debug/static: BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_TYPE: "Debug" + FAST_TESTS: "ON" BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu # cuda 11.0 and friends build/cuda110/gcc/cuda/debug/shared: <<: *default_build_with_test - image: localhost:5000/gko-cuda110-gnu9-llvm9-intel2020 + extends: + - .full_test_condition + - .use_gko-cuda110-gnu9-llvm9-intel2020 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_TYPE: "Debug" - CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu + FAST_TESTS: "ON" + CUDA_ARCH: 61 build/cuda110/clang/cuda/release/static: <<: *default_build_with_test - image: localhost:5000/gko-cuda110-gnu9-llvm9-intel2020 + extends: + - .full_test_condition + - .use_gko-cuda110-gnu9-llvm9-intel2020 variables: <<: *default_variables C_COMPILER: "clang" @@ -469,18 +534,13 @@ build/cuda110/clang/cuda/release/static: BUILD_CUDA: "ON" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" - CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu + CUDA_ARCH: 61 build/cuda110/intel/cuda/debug/static: <<: *default_build_with_test - image: localhost:5000/gko-cuda110-gnu9-llvm9-intel2020 + extends: + - .quick_test_condition + - .use_gko-cuda110-gnu9-llvm9-intel2020 variables: <<: *default_variables C_COMPILER: "icc" @@ -488,36 +548,29 @@ build/cuda110/intel/cuda/debug/static: BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_TYPE: "Debug" + FAST_TESTS: "ON" BUILD_SHARED_LIBS: "OFF" - CUDA_ARCH: 35 - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu + CUDA_ARCH: 61 # HIP AMD build/amd/gcc/hip/debug/shared: <<: *default_build_with_test - image: localhost:5000/gko-amd-gnu8-llvm7 + extends: + - .quick_test_condition + - .use_gko-amd-gnu8-llvm7 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_HIP: "ON" + RUN_EXAMPLES: "ON" BUILD_TYPE: "Debug" - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - amd - - gpu + FAST_TESTS: "ON" build/amd/clang/hip/release/static: <<: *default_build_with_test - image: localhost:5000/gko-amd-gnu8-llvm7 + extends: + - .quick_test_condition + - .use_gko-amd-gnu8-llvm7 variables: <<: *default_variables C_COMPILER: "clang" @@ -526,97 +579,88 @@ build/amd/clang/hip/release/static: BUILD_HIP: "ON" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - amd - - gpu + +# Build HIP AMD without omp +build/amd/clang/hip_wo_omp/release/shared: + <<: *default_build_with_test + extends: + - .full_test_condition + - .use_gko-amd-gnu8-llvm7 + variables: + <<: *default_variables + C_COMPILER: "clang" + CXX_COMPILER: "clang++" + BUILD_HIP: "ON" + BUILD_TYPE: "Release" # no cuda but latest gcc and clang build/nocuda/gcc/core/debug/static: <<: *default_build_with_test - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .quick_test_condition + - .use_gko-nocuda-gnu9-llvm8 variables: <<: *default_variables BUILD_REFERENCE: "OFF" BUILD_TYPE: "Debug" + FAST_TESTS: "ON" BUILD_SHARED_LIBS: "OFF" - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cpu + BUILD_HWLOC: "OFF" build/nocuda/clang/core/release/shared: <<: *default_build_with_test - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .quick_test_condition + - .use_gko-nocuda-gnu9-llvm8 variables: <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_REFERENCE: "OFF" BUILD_TYPE: "Release" - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cpu build/nocuda/intel/core/debug/shared: <<: *default_build_with_test - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .quick_test_condition + - .use_gko-nocuda-gnu9-llvm8-intel variables: <<: *default_variables C_COMPILER: "icc" CXX_COMPILER: "icpc" BUILD_REFERENCE: "OFF" BUILD_TYPE: "Debug" - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - cpu + FAST_TESTS: "ON" build/nocuda/gcc/omp/release/shared: <<: *default_build_with_test - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .quick_test_condition + - .use_gko-nocuda-gnu9-llvm8 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_TYPE: "Release" - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cpu build/nocuda/clang/omp/debug/static: <<: *default_build_with_test - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .full_test_condition + - .use_gko-nocuda-gnu9-llvm8 variables: <<: *default_variables C_COMPILER: "clang" CXX_COMPILER: "clang++" BUILD_OMP: "ON" BUILD_TYPE: "Debug" + FAST_TESTS: "ON" BUILD_SHARED_LIBS: "OFF" - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cpu build/nocuda/intel/omp/release/static: <<: *default_build_with_test - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .quick_test_condition + - .use_gko-nocuda-gnu9-llvm8-intel variables: <<: *default_variables C_COMPILER: "icc" @@ -624,109 +668,219 @@ build/nocuda/intel/omp/release/static: BUILD_OMP: "ON" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - cpu +build/nocuda-nomixed/gcc/omp/release/shared: + <<: *default_build_with_test + extends: + - .quick_test_condition + - .use_gko-nocuda-gnu9-llvm8 + variables: + <<: *default_variables + BUILD_OMP: "ON" + BUILD_TYPE: "Release" + MIXED_PRECISION: "OFF" + +build/nocuda-nomixed/clang/omp/debug/static: + <<: *default_build_with_test + extends: + - .full_test_condition + - .use_gko-nocuda-gnu9-llvm8 + variables: + <<: *default_variables + C_COMPILER: "clang" + CXX_COMPILER: "clang++" + BUILD_OMP: "ON" + BUILD_TYPE: "Debug" + BUILD_SHARED_LIBS: "OFF" + MIXED_PRECISION: "OFF" + +build/nocuda-nomixed/intel/omp/release/static: + <<: *default_build_with_test + extends: + - .full_test_condition + - .use_gko-nocuda-gnu9-llvm8-intel + variables: + <<: *default_variables + C_COMPILER: "icc" + CXX_COMPILER: "icpc" + BUILD_OMP: "ON" + BUILD_TYPE: "Release" + BUILD_SHARED_LIBS: "OFF" + MIXED_PRECISION: "OFF" + +build/dpcpp/cpu/release/static: + <<: *default_build_with_test + extends: + - .quick_test_condition + - .use_gko-oneapi-cpu + variables: + <<: *default_variables + C_COMPILER: "gcc" + CXX_COMPILER: "dpcpp" + BUILD_DPCPP: "ON" + BUILD_TYPE: "Release" + BUILD_SHARED_LIBS: "ON" + SYCL_DEVICE_TYPE: "CPU" + +# It gives two available backends of GPU on tests +build/dpcpp/igpu/release/shared: + <<: *default_build_with_test + extends: + - .quick_test_condition + - .use_gko-oneapi-igpu + variables: + <<: *default_variables + C_COMPILER: "gcc" + CXX_COMPILER: "dpcpp" + BUILD_DPCPP: "ON" + BUILD_TYPE: "Release" + BUILD_SHARED_LIBS: "ON" + DPCPP_SINGLE_MODE: "ON" + SYCL_DEVICE_TYPE: "GPU" + +build/dpcpp/level_zero_igpu/debug/shared: + <<: *default_build_with_test + extends: + - .full_test_condition + - .use_gko-oneapi-igpu + variables: + <<: *default_variables + C_COMPILER: "gcc" + CXX_COMPILER: "dpcpp" + BUILD_DPCPP: "ON" + BUILD_TYPE: "Debug" + BUILD_SHARED_LIBS: "ON" + DPCPP_SINGLE_MODE: "ON" + SYCL_DEVICE_FILTER: "Level_Zero:GPU" + +# It gives two available backends of GPU on tests +build/dpcpp/dgpu/release/static: + <<: *default_build_with_test + extends: + - .full_test_condition + - .use_gko-oneapi-igpu + variables: + <<: *default_variables + C_COMPILER: "gcc" + CXX_COMPILER: "dpcpp" + BUILD_DPCPP: "ON" + BUILD_TYPE: "Release" + BUILD_SHARED_LIBS: "OF" + DPCPP_SINGLE_MODE: "ON" + SYCL_DEVICE_TYPE: "GPU" + +build/dpcpp/level_zero_dgpu/release/shared: + <<: *default_build_with_test + extends: + - .quick_test_condition + - .use_gko-oneapi-dgpu + variables: + <<: *default_variables + C_COMPILER: "gcc" + CXX_COMPILER: "dpcpp" + BUILD_DPCPP: "ON" + BUILD_TYPE: "Release" + DPCPP_SINGLE_MODE: "ON" + SYCL_DEVICE_FILTER: "Level_Zero:GPU" # Job with important warnings as error warnings: <<: *default_build stage: code_quality - image: localhost:5000/gko-cuda101-gnu8-llvm7-intel2019 + extends: + - .full_test_condition + - .use_gko-cuda101-gnu8-llvm7-intel2019 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" CXX_FLAGS: "-Werror=pedantic -pedantic-errors" - only: - variables: - - $RUN_CI_TAG - dependencies: [] allow_failure: yes - tags: - - private_ci - - cuda - - gpu # Ensure kernel modules do not depend on core no-circular-deps: <<: *default_build stage: code_quality - image: localhost:5000/gko-cuda101-gnu8-llvm7-intel2019 + extends: + - .quick_test_condition + - .use_gko-cuda101-gnu8-llvm7-intel2019 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" EXTRA_CMAKE_FLAGS: '-DGINKGO_CHECK_CIRCULAR_DEPS=on' - only: - variables: - - $RUN_CI_TAG - dependencies: [] allow_failure: no - tags: - - private_ci - - cuda - - gpu + +# Ensure Ginkgo builds from a subdirectory +subdir-build: + <<: *default_build + stage: code_quality + extends: + - .full_test_condition + - .use_gko-nocuda-gnu9-llvm8 + variables: + <<: *default_variables + BUILD_OMP: "ON" + CI_PROJECT_PATH_SUFFIX: "/test_subdir" + allow_failure: no + +# Ensure Ginkgo can be used when exporting the build directory +export-build: + <<: *default_build + stage: code_quality + extends: + - .full_test_condition + - .use_gko-nocuda-gnu9-llvm8 + variables: + <<: *default_variables + BUILD_OMP: "ON" + EXPORT_BUILD_DIR: "ON" + allow_failure: no # Run clang-tidy and iwyu clang-tidy: <<: *default_build stage: code_quality - image: localhost:5000/gko-cuda101-gnu8-llvm7-intel2019 + extends: + - .full_test_condition + - .use_gko-cuda101-gnu8-llvm7-intel2019 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" EXTRA_CMAKE_FLAGS: '-DGINKGO_WITH_CLANG_TIDY=ON' - only: - variables: - - $RUN_CI_TAG - dependencies: [] allow_failure: yes - tags: - - private_ci - - cuda - - gpu iwyu: <<: *default_build stage: code_quality - image: localhost:5000/gko-cuda101-gnu8-llvm7-intel2019 + extends: + - .full_test_condition + - .use_gko-cuda101-gnu8-llvm7-intel2019 variables: <<: *default_variables BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_CUDA: "HIP" EXTRA_CMAKE_FLAGS: '-DGINKGO_WITH_IWYU=ON' - only: - variables: - - $RUN_CI_TAG - dependencies: [] allow_failure: yes - tags: - - private_ci - - cuda - - gpu # Code analysis, coverage and reporting tool # For short living branches or PRs, try to detect an open PR sonarqube_cov_: stage: code_quality - image: localhost:5000/gko-cuda101-gnu8-llvm7-intel2019 + extends: + - .quick_test_condition + - .use_gko-cuda101-gnu8-llvm7-intel2019 before_script: *default_before_script script: - - PR_ID=$(curl "https://api.github.com/search/issues?q=sha:${CI_COMMIT_SHA}" + - PR_ID=$(curl -s "https://api.github.com/search/issues?q=sha:${CI_COMMIT_SHA}" | jq '.items[0].number') - if [[ "${PR_ID}" != "null" ]]; then - target_branch=$(curl + target_branch=$(curl -s "https://api.github.com/repos/ginkgo-project/ginkgo/pulls/${PR_ID}" | jq '.base.ref' | sed 's/"//g'); sonar_branching="-Dsonar.pullrequest.branch=${CI_COMMIT_REF_NAME} @@ -743,25 +897,19 @@ sonarqube_cov_: -Dsonar.cfamily.gcov.reportsPath=build/Testing/CoverageInfo ${sonar_branching} - bash <(curl -s https://codecov.io/bash) -f "\!*examples*" -f "\!*third_party*" -f "\!*c\\+\\+*" -f "\!*benchmark*" - dependencies: [] except: refs: - develop - master - tags - only: - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu # For long living branches, do not detect the PR. A PR would always be detected # (the one that was merged). sonarqube_cov: stage: code_quality - image: localhost:5000/gko-cuda101-gnu8-llvm7-intel2019 + extends: + - .deploy_condition + - .use_gko-cuda101-gnu8-llvm7-intel2019 before_script: *default_before_script script: - ctest -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=COVERAGE @@ -771,24 +919,14 @@ sonarqube_cov: -Dsonar.cfamily.gcov.reportsPath=build/Testing/CoverageInfo -Dsonar.branch.name=${CI_COMMIT_REF_NAME} - bash <(curl -s https://codecov.io/bash) -f "\!*examples*" -f "\!*third_party*" -f "\!*c\\+\\+*" -f "\!*benchmark*" - dependencies: [] - only: - refs: - - develop - - master - - tags - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu # Deploy documentation to github-pages gh-pages: stage: deploy - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .deploy_condition + - .use_gko-nocuda-gnu9-llvm8 variables: <<: *default_variables PUBLIC_REPO: git@github.com:ginkgo-project/ginkgo.git @@ -818,24 +956,13 @@ gh-pages: - git add -A - git diff --quiet HEAD || (git commit -m "Update documentation from ${CURRENT_SHA}" && git push) - dependencies: [] - only: - refs: - - develop - - master - - tags - variables: - - $RUN_CI_TAG - except: - - schedules - tags: - - private_ci - - cpu threadsanitizer: stage: QoS_tools - image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019 + extends: + - .deploy_condition + - .use_gko-cuda101-gnu8-llvm10-intel2019 before_script: *default_before_script script: - LD_PRELOAD=/usr/local/lib/libomp.so @@ -844,100 +971,51 @@ threadsanitizer: -DCTEST_MEMORYCHECK_TYPE=ThreadSanitizer -DCTEST_MEMORYCHECK_SANITIZER_OPTIONS=ignore_noninstrumented_modules=1 --timeout 6000 - dependencies: [] - only: - refs: - - master - - develop - - tags - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu leaksanitizer: stage: QoS_tools - image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019 + extends: + - .deploy_condition + - .use_gko-cuda101-gnu8-llvm10-intel2019 before_script: *default_before_script script: - ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=LSAN -DCTEST_MEMORYCHECK_TYPE=LeakSanitizer - dependencies: [] - only: - refs: - - master - - develop - - tags - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu addresssanitizer: stage: QoS_tools - image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019 + extends: + - .deploy_condition + - .use_gko-cuda101-gnu8-llvm10-intel2019 before_script: *default_before_script script: - ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=ASAN -DCTEST_MEMORYCHECK_TYPE=AddressSanitizer - dependencies: [] - only: - refs: - - master - - develop - - tags - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu undefinedsanitizer: stage: QoS_tools - image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019 + extends: + - .deploy_condition + - .use_gko-cuda101-gnu8-llvm10-intel2019 before_script: *default_before_script script: # the Gold linker is required because of a linker flag issues given by UBsan # in the Ubuntu setup we are using. - ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=UBSAN -DCTEST_MEMORYCHECK_TYPE=UndefinedBehaviorSanitizer - dependencies: [] - only: - refs: - - master - - develop - - tags - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu cudamemcheck: stage: QoS_tools - image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019 + extends: + - .deploy_condition + image: ginkgohub/cuda:101-gnu8-llvm10-intel2019 + tags: + - private_ci + - nvidia-gpu before_script: *default_before_script script: - ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=RelWithDebInfo -DCTEST_MEMORYCHECK_TYPE=CudaMemcheck - dependencies: [] - only: - refs: - - master - - develop - - tags - variables: - - $RUN_CI_TAG - tags: - - private_ci - - cuda - - gpu # Benchmark build .benchmark_before_script_template: &default_benchmark_before_script @@ -960,7 +1038,8 @@ cudamemcheck: fineci-benchmark-build: stage: benchmark-build - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .use_gko-nocuda-gnu9-llvm8-intel variables: <<: *default_variables BENCHMARK_SERVER: FINECI @@ -999,10 +1078,6 @@ fineci-benchmark-build: - schedules # - develop # - master - tags: - - private_ci - - cpu - - cuda # Benchmark runs @@ -1041,7 +1116,8 @@ fineci-benchmark-build: fineci-benchmark-cuda: stage: benchmark-cuda - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .use_gko-nocuda-gnu9-llvm8-intel variables: <<: *default_variables BENCHMARK_SERVER: FINECI @@ -1050,10 +1126,6 @@ fineci-benchmark-cuda: BENCHMARK_REPO: git@github.com:ginkgo-project/ginkgo-data.git SYSTEM_NAME: K20Xm <<: *default_benchmark - tags: - - private_ci - - cpu - - cuda # fineci-benchmark-omp: # stage: benchmark-omp @@ -1079,7 +1151,8 @@ fineci-benchmark-cuda: new-issue-on-failure: stage: on-failure - image: localhost:5000/gko-nocuda-gnu9-llvm8 + extends: + - .use_gko-nocuda-gnu9-llvm8 script: curl --request POST "https://gitlab.com/api/v4/projects/${PROJECT_ID}/issues?private_token=${BOT_ACCESS_TOKEN}&title=Error%20in%20${CI_PROJECT_NAME}%20with%20pipeline%20${CI_PIPELINE_ID}%20for%20commit%20${CI_COMMIT_SHA}&labels&description=${CI_PIPELINE_URL}" when: on_failure only: @@ -1087,6 +1160,3 @@ new-issue-on-failure: - develop - master dependencies: [] - tags: - - private_ci - - cpu diff --git a/.gitlab/condition.yml b/.gitlab/condition.yml new file mode 100644 index 00000000000..fffb88738a5 --- /dev/null +++ b/.gitlab/condition.yml @@ -0,0 +1,36 @@ +.pr_condition: + only: + variables: + - $RUN_CI_TAG + except: + refs: + - develop + - master + - tags + dependencies: [] + +.full_test_condition: + only: + variables: + - $RUN_CI_TAG && $STATUS_CONTEXT == "full" + - $RUN_CI_TAG && ($CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "develop") + - $RUN_CI_TAG && $CI_COMMIT_TAG + dependencies: [] + +.quick_test_condition: + only: + variables: + - $RUN_CI_TAG && $STATUS_CONTEXT == null + dependencies: [] + +.deploy_condition: + only: + refs: + - develop + - master + - tags + variables: + - $RUN_CI_TAG + except: + - schedules + dependencies: [] diff --git a/.gitlab/image.yml b/.gitlab/image.yml new file mode 100644 index 00000000000..a2afe0fcd53 --- /dev/null +++ b/.gitlab/image.yml @@ -0,0 +1,93 @@ +.use_gko-nocuda-gnu9-llvm8: + image: ginkgohub/cpu:gnu9-llvm8 + tags: + - private_ci + - cpu + - amdci + +.use_gko-nocuda-gnu9-llvm8-intel: + image: ginkgohub/cpu:gnu9-llvm8-intel2020 + tags: + - private_ci + - cpu + - controller + +.use_gko-cuda90-gnu5-llvm39: + image: ginkgohub/cuda:90-gnu5-llvm39 + tags: + - private_ci + - controller + - cpu + +.use_gko-cuda91-gnu6-llvm40: + image: ginkgohub/cuda:91-gnu6-llvm40 + tags: + - private_ci + - controller + - cpu + +.use_gko-cuda92-gnu7-llvm50-intel2017: + image: ginkgohub/cuda:92-gnu7-llvm50-intel2017 + tags: + - private_ci + - nvidia-gpu + +.use_gko-cuda100-gnu7-llvm60-intel2018: + image: ginkgohub/cuda:100-gnu7-llvm60-intel2018 + tags: + - private_ci + - controller + - cpu + +.use_gko-cuda101-gnu8-llvm7-intel2019: + image: ginkgohub/cuda:101-gnu8-llvm7-intel2019 + tags: + - private_ci + - controller + - cpu + +.use_gko-cuda101-gnu8-llvm10-intel2019: + image: ginkgohub/cuda:101-gnu8-llvm10-intel2019 + tags: + - private_ci + - controller + - cpu + +.use_gko-cuda102-gnu8-llvm8-intel2019: + image: ginkgohub/cuda:102-gnu8-llvm8-intel2019 + tags: + - private_ci + - controller + - cpu + +.use_gko-cuda110-gnu9-llvm9-intel2020: + image: ginkgohub/cuda:110-gnu9-llvm9-intel2020 + tags: + - private_ci + - nvidia-gpu + +.use_gko-amd-gnu8-llvm7: + image: ginkgohub/rocm:gnu8-llvm7 + tags: + - private_ci + - amdci + - gpu + +.use_gko-oneapi-cpu: + image: ginkgohub/oneapi:latest + tags: + - private_ci + - intelci-igpu + - cpu + +.use_gko-oneapi-igpu: + image: ginkgohub/oneapi:latest + tags: + - private_ci + - intel-igpu + +.use_gko-oneapi-dgpu: + image: ginkgohub/oneapi:latest + tags: + - private_ci + - intel-dgpu diff --git a/ABOUT-LICENSING.md b/ABOUT-LICENSING.md index 3adba19ba6e..e9215ee35cf 100644 --- a/ABOUT-LICENSING.md +++ b/ABOUT-LICENSING.md @@ -240,6 +240,50 @@ also licensed the same as the deal.II library. > page](https://github.com/dealii/dealii/blob/master/LICENSE.md) or on the > official [GNU license page](https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html). + +For detecting the HWLOC library, we used a modified version of the FindHWLOC.cmake file from the MORSE-cmake library. The library is [available on gitlab](https://gitlab.inria.fr/solverstack/morse_cmake), and its LICENSE is available below: + +> ### +> # +> # @copyright (c) 2012-2020 Inria. All rights reserved. +> # @copyright (c) 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +> # +> ### +> # +> # This software is a computer program whose purpose is to process +> # Matrices Over Runtime Systems @ Exascale (MORSE). More information +> # can be found on the following website: http://www.inria.fr/en/teams/morse. +> # +> # This software is governed by the CeCILL-C license under French law and +> # abiding by the rules of distribution of free software. You can use, +> # modify and/ or redistribute the software under the terms of the CeCILL-C +> # license as circulated by CEA, CNRS and INRIA at the following URL +> # "http://www.cecill.info". +> # +> # As a counterpart to the access to the source code and rights to copy, +> # modify and redistribute granted by the license, users are provided only +> # with a limited warranty and the software's author, the holder of the +> # economic rights, and the successive licensors have only limited +> # liability. +> # +> # In this respect, the user's attention is drawn to the risks associated +> # with loading, using, modifying and/or developing or reproducing the +> # software by the user in light of its specific status of free software, +> # that may mean that it is complicated to manipulate, and that also +> # therefore means that it is reserved for developers and experienced +> # professionals having in-depth computer knowledge. Users are therefore +> # encouraged to load and test the software's suitability as regards their +> # requirements in conditions enabling the security of their systems and/or +> # data to be ensured and, more generally, to use and operate it in the +> # same conditions as regards security. +> # +> # The fact that you are presently reading this means that you have had +> # knowledge of the CeCILL-C license and that you accept its terms. +> # +> ### + + + __NOTE:__ Some of the options that pull additional software when compiling Ginkgo are ON by default, and have to be disabled manually to prevent third-party licensing. Refer to the [Installation section in diff --git a/BENCHMARKING.md b/BENCHMARKING.md index 6ce05bf072c..bc419f57386 100644 --- a/BENCHMARKING.md +++ b/BENCHMARKING.md @@ -143,12 +143,12 @@ The benchmark suite can take a number of configuration parameters. Benchmarks can be run only for `sparse matrix vector products (spmv)`, for full solvers (with or without preconditioners), or for preconditioners only when supported. The benchmark suite also allows to target a sub-part of the SuiteSparse matrix -collection. For details, see the [available benchmark options](### 5: Available +collection. For details, see the [available benchmark options](### 6: Available benchmark options). Here are the most important options: * `BENCHMARK={spmv, solver, preconditioner}` - allows to select the type of benchmark to be ran. -* `EXECUTOR={reference,cuda,hip,omp}` - select the executor and platform the - benchmarks should be ran on. +* `EXECUTOR={reference,cuda,hip,omp,dpcpp}` - select the executor and platform + the benchmarks should be ran on. * `SYSTEM_NAME=` - a name which will be used to designate this platform (e.g. V100, RadeonVII, ...). * `SEGMENTS=` - Split the benchmarked matrix space into `` segments. If @@ -156,6 +156,8 @@ benchmark options). Here are the most important options: * `SEGMENT_ID=` - used in combination with the `SEGMENTS` variable. `` should be an integer between 1 and ``, the number of `SEGMENTS`. If specified, only the ``-th segment of the benchmark suite will be run. +* `BENCHMARK_PRECISION` - defines the precision the benchmarks are run in. + Supported values are: "double" (default), "single", "dcomplex" and "scomplex" * `MATRIX_LIST_FILE=/path/to/matrix_list.file` - allows to list SuiteSparse matrix id or name to benchmark. As an example, a matrix list file containing the following will ensure that benchmarks are ran for only those three @@ -256,8 +258,8 @@ The supported environment variables are described in the following list: benchmark. * `preconditioner` - Runs the preconditioner benchmarks on artificially generated block-diagonal matrices. -* `EXECUTOR={reference,cuda,hip,omp}` - select the executor and platform the - benchmarks should be ran on. Default is `cuda`. +* `EXECUTOR={reference,cuda,hip,omp,dpcpp}` - select the executor and platform + the benchmarks should be ran on. Default is `cuda`. * `SYSTEM_NAME=` - a name which will be used to designate this platform (e.g. V100, RadeonVII, ...) and not overwrite previous results. Default is `unknown`. @@ -282,8 +284,8 @@ The supported environment variables are described in the following list: benchmark runs (downloads the collections, creates the result structure, etc.) and outputs the list of commands that would normally be run, but does not run the benchmarks themselves. Default is `false`. -* `PRECONDS={jacobi,adaptive-jacobi,ilu,parict,parilu,parilut,none}` the - preconditioners to use for either `solver` or `preconditioner` benchmarks. +* `PRECONDS={jacobi,ic,ilu,paric,parict,parilu,parilut,ic-isai,ilu-isai,paric-isai,parict-isai,parilu-isai,parilut-isai,none}` + the preconditioners to use for either `solver` or `preconditioner` benchmarks. Multiple options can be passed to this variable. Default is `none`. * `FORMATS={csr,coo,ell,hybrid,sellp,hybridxx,cusp_xx,hipsp_xx}` the matrix formats to benchmark for the `spmv` phase of the benchmark. Run @@ -293,11 +295,28 @@ The supported environment variables are described in the following list: library formats (cuSPARSE with `cusp_` prefix or hipSPARSE with `hipsp_` prefix) can be used as well. Multiple options can be passed. The default is `csr,coo,ell,hybrid,sellp`. -* `SOLVERS={bicgstab,bicg,cg,cgs,fcg,gmres}` - the solvers which should be - benchmarked. Multiple options can be passed. The default is `cg`. +* `SOLVERS={bicgstab,bicg,cg,cgs,fcg,gmres,cb_gmres_{keep,reduce1,reduce2,integer,ireduce1,ireduce2},lower_trs,upper_trs}` + - the solvers which should be benchmarked. Multiple options can be passed. + The default is `bicgstab,cg,cgs,fcg,gmres,idr`. Note that `lower/upper_trs` + by default don't use a preconditioner, as they are by default exact direct + solvers. * `SOLVERS_PRECISION=` - the minimal residual reduction before which the solver should stop. The default is `1e-6`. * `SOLVERS_MAX_ITERATION=` - the maximum number of iterations with which a solver should be ran. The default is `10000`. +* `SOLVERS_RHS={1, random, sinus}` - whether to use a vector of all ones, + random values or b = A * (s / |s|)$ with s(idx) = sin(idx) (for complex + numbers, s(idx) = sin(2*idx) + i * sin(2*idx+1)) + as the right-hand side in solver benchmarks. Default is `1`. +* `SOLVERS_INITIAL_GUESS={rhs,0,random}` - the initial guess generation of the + solvers. `rhs` uses the right-hand side, `0` uses a zero vector and `random` + generates a random vector as the initial guess. * `DETAILED={0,1}` - selects whether detailed benchmarks should be ran for the solver benchmarks, can be either `0` (off) or `1` (on). The default is `0`. +* `GPU_TIMER={true, false}` - If set to `true`, use the gpu timer, which is + valid for cuda/hip executor, to measure the timing. Default is `false`. +* `SOLVERS_JACOBI_MAX_BS` - sets the maximum block size for the Jacobi + preconditioner (if used, otherwise, it does nothing) in the solvers + benchmark. The default is '32'. +* `SOLVERS_GMRES_RESTART` - the maximum dimension of the Krylov space to use in + GMRES. The default is `100`. diff --git a/CHANGELOG.md b/CHANGELOG.md index c88066c7ebe..238f8921921 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,143 @@ commits. For a comprehensive list, use the following command: git log --first-parent ``` +## Version 1.4.0 + +The Ginkgo team is proud to announce the new Ginkgo minor release 1.4.0. This +release brings most of the Ginkgo functionality to the Intel DPC++ ecosystem +which enables Intel-GPU and CPU execution. The only Ginkgo features which have +not been ported yet are some preconditioners. + +Ginkgo's mixed-precision support is greatly enhanced thanks to: +1. The new Accessor concept, which allows writing kernels featuring on-the-fly +memory compression, among other features. The accessor can be used as +header-only, see the [accessor BLAS benchmarks repository](https://github.com/ginkgo-project/accessor-BLAS/tree/develop) as a usage example. +2. All LinOps now transparently support mixed-precision execution. By default, +this is done through a temporary copy which may have a performance impact but +already allows mixed-precision research. + +Native mixed-precision ELL kernels are implemented which do not see this cost. +The accessor is also leveraged in a new CB-GMRES solver which allows for +performance improvements by compressing the Krylov basis vectors. Many other +features have been added to Ginkgo, such as reordering support, a new IDR +solver, Incomplete Cholesky preconditioner, matrix assembly support (only CPU +for now), machine topology information, and more! + +Supported systems and requirements: ++ For all platforms, cmake 3.13+ ++ C++14 compliant compiler ++ Linux and MacOS + + gcc: 5.3+, 6.3+, 7.3+, all versions after 8.1+ + + clang: 3.9+ + + Intel compiler: 2018+ + + Apple LLVM: 8.0+ + + CUDA module: CUDA 9.0+ + + HIP module: ROCm 3.5+ + + DPC++ module: Intel OneAPI 2021.3. Set the CXX compiler to `dpcpp`. ++ Windows + + MinGW and Cygwin: gcc 5.3+, 6.3+, 7.3+, all versions after 8.1+ + + Microsoft Visual Studio: VS 2019 + + CUDA module: CUDA 9.0+, Microsoft Visual Studio + + OpenMP module: MinGW or Cygwin. + + +Algorithm and important feature additions: ++ Add a new DPC++ Executor for SYCL execution and other base utilities + [#648](https://github.com/ginkgo-project/ginkgo/pull/648), [#661](https://github.com/ginkgo-project/ginkgo/pull/661), [#757](https://github.com/ginkgo-project/ginkgo/pull/757), [#832](https://github.com/ginkgo-project/ginkgo/pull/832) ++ Port matrix formats, solvers and related kernels to DPC++. For some kernels, + also make use of a shared kernel implementation for all executors (except + Reference). [#710](https://github.com/ginkgo-project/ginkgo/pull/710), [#799](https://github.com/ginkgo-project/ginkgo/pull/799), [#779](https://github.com/ginkgo-project/ginkgo/pull/779), [#733](https://github.com/ginkgo-project/ginkgo/pull/733), [#844](https://github.com/ginkgo-project/ginkgo/pull/844), [#843](https://github.com/ginkgo-project/ginkgo/pull/843), [#789](https://github.com/ginkgo-project/ginkgo/pull/789), [#845](https://github.com/ginkgo-project/ginkgo/pull/845), [#849](https://github.com/ginkgo-project/ginkgo/pull/849), [#855](https://github.com/ginkgo-project/ginkgo/pull/855), [#856](https://github.com/ginkgo-project/ginkgo/pull/856) ++ Add accessors which allow multi-precision kernels, among other things. + [#643](https://github.com/ginkgo-project/ginkgo/pull/643), [#708](https://github.com/ginkgo-project/ginkgo/pull/708) ++ Add support for mixed precision operations through apply in all LinOps. [#677](https://github.com/ginkgo-project/ginkgo/pull/677) ++ Add incomplete Cholesky factorizations and preconditioners as well as some + improvements to ILU. [#672](https://github.com/ginkgo-project/ginkgo/pull/672), [#837](https://github.com/ginkgo-project/ginkgo/pull/837), [#846](https://github.com/ginkgo-project/ginkgo/pull/846) ++ Add an AMGX implementation and kernels on all devices but DPC++. + [#528](https://github.com/ginkgo-project/ginkgo/pull/528), [#695](https://github.com/ginkgo-project/ginkgo/pull/695), [#860](https://github.com/ginkgo-project/ginkgo/pull/860) ++ Add a new mixed-precision capability solver, Compressed Basis GMRES + (CB-GMRES). [#693](https://github.com/ginkgo-project/ginkgo/pull/693), [#763](https://github.com/ginkgo-project/ginkgo/pull/763) ++ Add the IDR(s) solver. [#620](https://github.com/ginkgo-project/ginkgo/pull/620) ++ Add a new fixed-size block CSR matrix format (for the Reference executor). + [#671](https://github.com/ginkgo-project/ginkgo/pull/671), [#730](https://github.com/ginkgo-project/ginkgo/pull/730) ++ Add native mixed-precision support to the ELL format. [#717](https://github.com/ginkgo-project/ginkgo/pull/717), [#780](https://github.com/ginkgo-project/ginkgo/pull/780) ++ Add Reverse Cuthill-McKee reordering [#500](https://github.com/ginkgo-project/ginkgo/pull/500), [#649](https://github.com/ginkgo-project/ginkgo/pull/649) ++ Add matrix assembly support on CPUs. [#644](https://github.com/ginkgo-project/ginkgo/pull/644) ++ Extends ISAI from triangular to general and spd matrices. [#690](https://github.com/ginkgo-project/ginkgo/pull/690) + +Other additions: ++ Add possibility to apply real matrices to complex vectors. + [#655](https://github.com/ginkgo-project/ginkgo/pull/655), [#658](https://github.com/ginkgo-project/ginkgo/pull/658) ++ Add functions to compute the absolute of a matrix format. [#636](https://github.com/ginkgo-project/ginkgo/pull/636) ++ Add symmetric permutation and improve existing permutations. + [#684](https://github.com/ginkgo-project/ginkgo/pull/684), [#657](https://github.com/ginkgo-project/ginkgo/pull/657), [#663](https://github.com/ginkgo-project/ginkgo/pull/663) ++ Add a MachineTopology class with HWLOC support [#554](https://github.com/ginkgo-project/ginkgo/pull/554), [#697](https://github.com/ginkgo-project/ginkgo/pull/697) ++ Add an implicit residual norm criterion. [#702](https://github.com/ginkgo-project/ginkgo/pull/702), [#818](https://github.com/ginkgo-project/ginkgo/pull/818), [#850](https://github.com/ginkgo-project/ginkgo/pull/850) ++ Row-major accessor is generalized to more than 2 dimensions and a new + "block column-major" accessor has been added. [#707](https://github.com/ginkgo-project/ginkgo/pull/707) ++ Add an heat equation example. [#698](https://github.com/ginkgo-project/ginkgo/pull/698), [#706](https://github.com/ginkgo-project/ginkgo/pull/706) ++ Add ccache support in CMake and CI. [#725](https://github.com/ginkgo-project/ginkgo/pull/725), [#739](https://github.com/ginkgo-project/ginkgo/pull/739) ++ Allow tuning and benchmarking variables non intrusively. [#692](https://github.com/ginkgo-project/ginkgo/pull/692) ++ Add triangular solver benchmark [#664](https://github.com/ginkgo-project/ginkgo/pull/664) ++ Add benchmarks for BLAS operations [#772](https://github.com/ginkgo-project/ginkgo/pull/772), [#829](https://github.com/ginkgo-project/ginkgo/pull/829) ++ Add support for different precisions and consistent index types in benchmarks. + [#675](https://github.com/ginkgo-project/ginkgo/pull/675), [#828](https://github.com/ginkgo-project/ginkgo/pull/828) ++ Add a Github bot system to facilitate development and PR management. + [#667](https://github.com/ginkgo-project/ginkgo/pull/667), [#674](https://github.com/ginkgo-project/ginkgo/pull/674), [#689](https://github.com/ginkgo-project/ginkgo/pull/689), [#853](https://github.com/ginkgo-project/ginkgo/pull/853) ++ Add Intel (DPC++) CI support and enable CI on HPC systems. [#736](https://github.com/ginkgo-project/ginkgo/pull/736), [#751](https://github.com/ginkgo-project/ginkgo/pull/751), [#781](https://github.com/ginkgo-project/ginkgo/pull/781) ++ Add ssh debugging for Github Actions CI. [#749](https://github.com/ginkgo-project/ginkgo/pull/749) ++ Add pipeline segmentation for better CI speed. [#737](https://github.com/ginkgo-project/ginkgo/pull/737) + + +Changes: ++ Add a Scalar Jacobi specialization and kernels. [#808](https://github.com/ginkgo-project/ginkgo/pull/808), [#834](https://github.com/ginkgo-project/ginkgo/pull/834), [#854](https://github.com/ginkgo-project/ginkgo/pull/854) ++ Add implicit residual log for solvers and benchmarks. [#714](https://github.com/ginkgo-project/ginkgo/pull/714) ++ Change handling of the conjugate in the dense dot product. [#755](https://github.com/ginkgo-project/ginkgo/pull/755) ++ Improved Dense stride handling. [#774](https://github.com/ginkgo-project/ginkgo/pull/774) ++ Multiple improvements to the OpenMP kernels performance, including COO, +an exclusive prefix sum, and more. [#703](https://github.com/ginkgo-project/ginkgo/pull/703), [#765](https://github.com/ginkgo-project/ginkgo/pull/765), [#740](https://github.com/ginkgo-project/ginkgo/pull/740) ++ Allow specialization of submatrix and other dense creation functions in solvers. [#718](https://github.com/ginkgo-project/ginkgo/pull/718) ++ Improved Identity constructor and treatment of rectangular matrices. [#646](https://github.com/ginkgo-project/ginkgo/pull/646) ++ Allow CUDA/HIP executors to select allocation mode. [#758](https://github.com/ginkgo-project/ginkgo/pull/758) ++ Check if executors share the same memory. [#670](https://github.com/ginkgo-project/ginkgo/pull/670) ++ Improve test install and smoke testing support. [#721](https://github.com/ginkgo-project/ginkgo/pull/721) ++ Update the JOSS paper citation and add publications in the documentation. + [#629](https://github.com/ginkgo-project/ginkgo/pull/629), [#724](https://github.com/ginkgo-project/ginkgo/pull/724) ++ Improve the version output. [#806](https://github.com/ginkgo-project/ginkgo/pull/806) ++ Add some utilities for dim and span. [#821](https://github.com/ginkgo-project/ginkgo/pull/821) ++ Improved solver and preconditioner benchmarks. [#660](https://github.com/ginkgo-project/ginkgo/pull/660) ++ Improve benchmark timing and output. [#669](https://github.com/ginkgo-project/ginkgo/pull/669), [#791](https://github.com/ginkgo-project/ginkgo/pull/791), [#801](https://github.com/ginkgo-project/ginkgo/pull/801), [#812](https://github.com/ginkgo-project/ginkgo/pull/812) + + +Fixes: ++ Sorting fix for the Jacobi preconditioner. [#659](https://github.com/ginkgo-project/ginkgo/pull/659) ++ Also log the first residual norm in CGS [#735](https://github.com/ginkgo-project/ginkgo/pull/735) ++ Fix BiCG and HIP CSR to work with complex matrices. [#651](https://github.com/ginkgo-project/ginkgo/pull/651) ++ Fix Coo SpMV on strided vectors. [#807](https://github.com/ginkgo-project/ginkgo/pull/807) ++ Fix segfault of extract_diagonal, add short-and-fat test. [#769](https://github.com/ginkgo-project/ginkgo/pull/769) ++ Fix device_reset issue by moving counter/mutex to device. [#810](https://github.com/ginkgo-project/ginkgo/pull/810) ++ Fix `EnableLogging` superclass. [#841](https://github.com/ginkgo-project/ginkgo/pull/841) ++ Support ROCm 4.1.x and breaking HIP_PLATFORM changes. [#726](https://github.com/ginkgo-project/ginkgo/pull/726) ++ Decreased test size for a few device tests. [#742](https://github.com/ginkgo-project/ginkgo/pull/742) ++ Fix multiple issues with our CMake HIP and RPATH setup. + [#712](https://github.com/ginkgo-project/ginkgo/pull/712), [#745](https://github.com/ginkgo-project/ginkgo/pull/745), [#709](https://github.com/ginkgo-project/ginkgo/pull/709) ++ Cleanup our CMake installation step. [#713](https://github.com/ginkgo-project/ginkgo/pull/713) ++ Various simplification and fixes to the Windows CMake setup. [#720](https://github.com/ginkgo-project/ginkgo/pull/720), [#785](https://github.com/ginkgo-project/ginkgo/pull/785) ++ Simplify third-party integration. [#786](https://github.com/ginkgo-project/ginkgo/pull/786) ++ Improve Ginkgo device arch flags management. [#696](https://github.com/ginkgo-project/ginkgo/pull/696) ++ Other fixes and improvements to the CMake setup. + [#685](https://github.com/ginkgo-project/ginkgo/pull/685), [#792](https://github.com/ginkgo-project/ginkgo/pull/792), [#705](https://github.com/ginkgo-project/ginkgo/pull/705), [#836](https://github.com/ginkgo-project/ginkgo/pull/836) ++ Clarification of dense norm documentation [#784](https://github.com/ginkgo-project/ginkgo/pull/784) ++ Various development tools fixes and improvements [#738](https://github.com/ginkgo-project/ginkgo/pull/738), [#830](https://github.com/ginkgo-project/ginkgo/pull/830), [#840](https://github.com/ginkgo-project/ginkgo/pull/840) ++ Make multiple operators/constructors explicit. [#650](https://github.com/ginkgo-project/ginkgo/pull/650), [#761](https://github.com/ginkgo-project/ginkgo/pull/761) ++ Fix some issues, memory leaks and warnings found by MSVC. + [#666](https://github.com/ginkgo-project/ginkgo/pull/666), [#731](https://github.com/ginkgo-project/ginkgo/pull/731) ++ Improved solver memory estimates and consistent iteration counts [#691](https://github.com/ginkgo-project/ginkgo/pull/691) ++ Various logger improvements and fixes [#728](https://github.com/ginkgo-project/ginkgo/pull/728), [#743](https://github.com/ginkgo-project/ginkgo/pull/743), [#754](https://github.com/ginkgo-project/ginkgo/pull/754) ++ Fix for ForwardIterator requirements in iterator_factory. [#665](https://github.com/ginkgo-project/ginkgo/pull/665) ++ Various benchmark fixes. [#647](https://github.com/ginkgo-project/ginkgo/pull/647), [#673](https://github.com/ginkgo-project/ginkgo/pull/673), [#722](https://github.com/ginkgo-project/ginkgo/pull/722) ++ Various CI fixes and improvements. [#642](https://github.com/ginkgo-project/ginkgo/pull/642), [#641](https://github.com/ginkgo-project/ginkgo/pull/641), [#795](https://github.com/ginkgo-project/ginkgo/pull/795), [#783](https://github.com/ginkgo-project/ginkgo/pull/783), [#793](https://github.com/ginkgo-project/ginkgo/pull/793), [#852](https://github.com/ginkgo-project/ginkgo/pull/852) + + ## Version 1.3.0 The Ginkgo team is proud to announce the new minor release of Ginkgo version diff --git a/CITING.md b/CITING.md index 02fcec9cf13..1ccaf82fdef 100644 --- a/CITING.md +++ b/CITING.md @@ -23,11 +23,14 @@ The Ginkgo software itself was reviewed and has a paper published in the Journal of Open Source Software, which can be cited with the following reference: ```bibtex -@article{Joss2020, - doi = {10.21105.joss.02260}, +@article{GinkgoJoss2020, + doi = {10.21105/joss.02260}, url = {https://doi.org/10.21105/joss.02260}, year = {2020}, publisher = {The Open Journal}, + volume = {5}, + number = {52}, + pages = {2260}, author = {Hartwig Anzt and Terry Cojean and Yen-Chen Chen and Goran Flegar and Fritz G\"{o}bel and Thomas Gr\"{u}tzmacher and Pratik Nayak and Tobias Ribizel and Yu-Hsiang Tsai}, title = {Ginkgo: A high performance numerical linear algebra library}, journal = {Journal of Open Source Software} @@ -68,7 +71,7 @@ series = {PASC ’19} } ``` -### On SpMV performance +### On SpMV or solvers performance ``` bibtex @InProceedings{tsai2020amdspmv, @@ -89,7 +92,6 @@ abstract="Efficiently processing sparse matrices is a central and performance-cr isbn="978-3-030-50743-5" } - @article{anzt2020spmv, author = {Anzt, Hartwig and Cojean, Terry and Yen-Chen, Chen and Dongarra, Jack and Flegar, Goran and Nayak, Pratik and Tomov, Stanimire and Tsai, Yuhsiang M. and Wang, Weichung}, title = {Load-Balancing Sparse Matrix Vector Product Kernels on GPUs}, @@ -109,3 +111,14 @@ numpages = {26}, keywords = {irregular matrices, GPUs, Sparse Matrix Vector Product (SpMV)} } ``` + +```bibtex +@misc{tsai2020evaluating, + title={Evaluating the Performance of NVIDIA's A100 Ampere GPU for Sparse Linear Algebra Computations}, + author={Yuhsiang Mike Tsai and Terry Cojean and Hartwig Anzt}, + year={2020}, + eprint={2008.08478}, + archivePrefix={arXiv}, + primaryClass={cs.MS} +} +``` diff --git a/CMakeLists.txt b/CMakeLists.txt index e75f7d61fcc..3a4b8ae08cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,7 @@ -cmake_minimum_required(VERSION 3.9) +cmake_minimum_required(VERSION 3.13) # Use *_ROOT environment variables for find_package calls -if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.12) - cmake_policy(SET CMP0074 NEW) -endif() +cmake_policy(SET CMP0074 NEW) # Let CAS handle the CUDA architecture flags (for now) # Windows still gives CMP0104 warning if putting it in cuda. @@ -11,15 +9,26 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) cmake_policy(SET CMP0104 OLD) endif() -project(Ginkgo LANGUAGES C CXX VERSION 1.3.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures") +project(Ginkgo LANGUAGES C CXX VERSION 1.4.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures") set(Ginkgo_VERSION_TAG "master") set(PROJECT_VERSION_TAG ${Ginkgo_VERSION_TAG}) -# Determine which executors can be compiled +# Determine which modules can be compiled include(cmake/hip_path.cmake) include(cmake/autodetect_executors.cmake) include(cmake/build_type_helpers.cmake) +# Load other CMake helpers +include(cmake/build_helpers.cmake) +include(cmake/install_helpers.cmake) + +if (MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj") +endif() +if (MINGW OR CYGWIN) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wa,-mbig-obj") +endif() + # Ginkgo configuration options option(GINKGO_DEVEL_TOOLS "Add development tools to the build system" OFF) option(GINKGO_BUILD_TESTS "Generate build files for unit tests" ON) @@ -27,9 +36,13 @@ option(GINKGO_BUILD_EXAMPLES "Build Ginkgo's examples" ON) option(GINKGO_BUILD_BENCHMARKS "Build Ginkgo's benchmarks" ON) option(GINKGO_BUILD_REFERENCE "Compile reference CPU kernels" ON) option(GINKGO_BUILD_OMP "Compile OpenMP kernels for CPU" ${GINKGO_HAS_OMP}) +option(GINKGO_BUILD_DPCPP + "Compile DPC++ kernels for Intel GPUs or other DPC++ enabled hardware" ${GINKGO_HAS_DPCPP}) option(GINKGO_BUILD_CUDA "Compile kernels for NVIDIA GPUs" ${GINKGO_HAS_CUDA}) option(GINKGO_BUILD_HIP "Compile kernels for AMD or NVIDIA GPUs" ${GINKGO_HAS_HIP}) option(GINKGO_BUILD_DOC "Generate documentation" OFF) +option(GINKGO_FAST_TESTS "Reduces the input size for a few tests known to be time-intensive" OFF) +option(GINKGO_MIXED_PRECISION "Instantiate true mixed-precision kernels (otherwise they will be conversion-based using implicit temporary storage)" OFF) option(GINKGO_SKIP_DEPENDENCY_UPDATE "Do not update dependencies each time the project is rebuilt" ON) option(GINKGO_EXPORT_BUILD_DIR @@ -37,16 +50,25 @@ option(GINKGO_EXPORT_BUILD_DIR OFF) option(GINKGO_WITH_CLANG_TIDY "Make Ginkgo call `clang-tidy` to find programming issues." OFF) option(GINKGO_WITH_IWYU "Make Ginkgo call `iwyu` (Include What You Use) to find include issues." OFF) +option(GINKGO_WITH_CCACHE "Use ccache if available to speed up C++ and CUDA rebuilds by caching compilations." ON) option(GINKGO_CHECK_CIRCULAR_DEPS "Enable compile-time checks detecting circular dependencies between libraries and non-self-sufficient headers." OFF) option(GINKGO_CONFIG_LOG_DETAILED "Enable printing of detailed configuration log to screen in addition to the writing of files," OFF) +option(GINKGO_BENCHMARK_ENABLE_TUNING + "Enable tuning variables in the benchmarks. For specific use cases, manual code changes could be required." + OFF) set(GINKGO_VERBOSE_LEVEL "1" CACHE STRING "Verbosity level. Put 0 to turn off. 1 activates a few important messages.") if(MSVC) set(GINKGO_COMPILER_FLAGS "" CACHE STRING "Set the required CXX compiler flags, mainly used for warnings. Current default is ``") +elseif(GINKGO_BUILD_DPCPP OR CMAKE_CXX_COMPILER MATCHES "dpcpp") + # For now always use `-ffp-model=precise` with DPC++. This can be removed when + # the floating point issues are fixed. + set(GINKGO_COMPILER_FLAGS "-Wpedantic;-ffp-model=precise" CACHE STRING + "Set the required CXX compiler flags, mainly used for warnings. Current default is `-Wpedantic;-ffp-model=precise`") else() set(GINKGO_COMPILER_FLAGS "-Wpedantic" CACHE STRING "Set the required CXX compiler flags, mainly used for warnings. Current default is `-Wpedantic`") @@ -60,30 +82,43 @@ set(GINKGO_HIP_COMPILER_FLAGS "" CACHE STRING "Set the required HIP compiler flags. Current default is an empty string.") set(GINKGO_HIP_NVCC_COMPILER_FLAGS "" CACHE STRING "Set the required HIP nvcc compiler flags. Current default is an empty string.") -set(GINKGO_HIP_HCC_COMPILER_FLAGS "" CACHE STRING - "Set the required HIP HCC compiler flags. Current default is an empty string.") set(GINKGO_HIP_CLANG_COMPILER_FLAGS "" CACHE STRING "Set the required HIP CLANG compiler flags. Current default is an empty string.") set(GINKGO_HIP_AMDGPU "" CACHE STRING "The amdgpu_target(s) variable passed to hipcc. The default is none (auto).") option(GINKGO_JACOBI_FULL_OPTIMIZATIONS "Use all the optimizations for the CUDA Jacobi algorithm" OFF) option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON) +if(MSVC OR WIN32 OR CYGWIN OR APPLE) + option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is OFF. Ginkgo does not support HWLOC on Windows/MacOS" OFF) +else() + option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is ON. If a system HWLOC is not found, then we try to build it ourselves. Switch this OFF to disable HWLOC." ON) +endif() +option(GINKGO_DPCPP_SINGLE_MODE "Do not compile double kernels for the DPC++ backend." OFF) +option(GINKGO_INSTALL_RPATH "Set the RPATH when installing its libraries." ON) +option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS) to the installation RPATH." ON) +option(GINKGO_INSTALL_RPATH_DEPENDENCIES "Add dependencies to the installation RPATH." OFF) set(GINKGO_CIRCULAR_DEPS_FLAGS "-Wl,--no-undefined") -if(BUILD_SHARED_LIBS AND (WIN32 OR CYGWIN) AND (GINKGO_BUILD_TESTS OR GINKGO_BUILD_EXAMPLES OR GINKGO_BUILD_BENCHMARKS)) - # Change shared libraries output only if this build has executable program with shared libraries. - set(GINKGO_CHANGED_SHARED_LIBRARY TRUE) - option(GINKGO_CHECK_PATH "Tell Ginkgo to check if the environment variable PATH is available for this build." ON) - set(GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH "windows_shared_library" CACHE STRING - "Set Ginkgo's shared library relative path in windows. Current default is `windows_shared_library`. \ - This absolute path ${PROJECT_BINARY_DIR}/GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH must be in the environment variable PATH.") - set(GINKGO_WINDOWS_SHARED_LIBRARY_PATH ${PROJECT_BINARY_DIR}/${GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH}) -else() - set(GINKGO_CHANGED_SHARED_LIBRARY FALSE) +# Use ccache as compilation launcher +if(GINKGO_WITH_CCACHE) + find_program(CCACHE_PROGRAM ccache) + if(CCACHE_PROGRAM) + set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}") + if(GINKGO_BUILD_CUDA) + set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}") + endif() + endif() +endif() + +if(GINKGO_BENCHMARK_ENABLE_TUNING) + # In this state, the tests and examples cannot be compiled without extra + # complexity/intrusiveness, so we simply disable them. + set(GINKGO_BUILD_TESTS OFF) + set(GINKGO_BUILD_EXAMPLES OFF) endif() -if(GINKGO_BUILD_TESTS AND (GINKGO_BUILD_CUDA OR GINKGO_BUILD_OMP OR GINKGO_BUILD_HIP)) +if(GINKGO_BUILD_TESTS AND (GINKGO_BUILD_CUDA OR GINKGO_BUILD_OMP OR GINKGO_BUILD_HIP OR GINKGO_BUILD_DPCPP)) message(STATUS "GINKGO_BUILD_TESTS is ON, enabling GINKGO_BUILD_REFERENCE") set(GINKGO_BUILD_REFERENCE ON CACHE BOOL "Compile reference CPU kernels" FORCE) endif() @@ -115,6 +150,7 @@ if(GINKGO_BUILD_TESTS) enable_testing() include(CTest) + add_custom_target(quick_test "${CMAKE_CTEST_COMMAND}" -R 'core|reference') endif() if(GINKGO_WITH_CLANG_TIDY) @@ -125,10 +161,11 @@ if(GINKGO_WITH_IWYU) find_program(GINKGO_IWYU_PATH iwyu) endif() -list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules/") +list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules/") -# Find important header files, store the definitions in include/ginkgo/config.h.in -# For details, see https://gitlab.kitware.com/cmake/community/wikis/doc/tutorials/How-To-Write-Platform-Checks +# Find important header files, store the definitions in +# include/ginkgo/config.h.in For details, see +# https://gitlab.kitware.com/cmake/community/wikis/doc/tutorials/How-To-Write-Platform-Checks include(CheckIncludeFileCXX) check_include_file_cxx(cxxabi.h GKO_HAVE_CXXABI_H) @@ -139,6 +176,20 @@ if(PAPI_sde_FOUND) set(GINKGO_HAVE_PAPI_SDE 1) endif() +# Switch off HWLOC for Windows and MacOS +if(GINKGO_BUILD_HWLOC AND (MSVC OR WIN32 OR CYGWIN OR APPLE)) + set(GINKGO_BUILD_HWLOC OFF CACHE BOOL "Build Ginkgo with HWLOC. Default is OFF. Ginkgo does not support HWLOC on Windows/MacOS" FORCE) + message(WARNING "Ginkgo does not support HWLOC on Windows/MacOS, switch GINKGO_BUILD_HWLOC to OFF") +endif() +if(GINKGO_BUILD_HWLOC) + set(GINKGO_HAVE_HWLOC 1) +else() + set(GINKGO_HAVE_HWLOC 0) + message(STATUS "HWLOC is being forcibly switched off") +endif() + +# We keep using NVCC/HCC for consistency with previous releases even if AMD +# updated everything to use NVIDIA/AMD in ROCM 4.1 set(GINKGO_HIP_PLATFORM_NVCC 0) set(GINKGO_HIP_PLATFORM_HCC 0) @@ -147,67 +198,71 @@ if(GINKGO_BUILD_HIP) if(DEFINED ENV{HIP_PLATFORM}) set(GINKGO_HIP_PLATFORM "$ENV{HIP_PLATFORM}") elseif(GINKGO_HIPCONFIG_PATH) - execute_process(COMMAND ${GINKGO_HIPCONFIG_PATH} --platform OUTPUT_VARIABLE GINKGO_HIP_PLATFORM) + execute_process(COMMAND ${GINKGO_HIPCONFIG_PATH} + --platform OUTPUT_VARIABLE GINKGO_HIP_PLATFORM) else() message(FATAL_ERROR "No platform could be found for HIP. " "Set and export the environment variable HIP_PLATFORM.") endif() message(STATUS "HIP platform set to ${GINKGO_HIP_PLATFORM}") + set(HIP_PLATFORM_AMD_REGEX "hcc|amd") + set(HIP_PLATFORM_NVIDIA_REGEX "nvcc|nvidia") - if (GINKGO_HIP_PLATFORM STREQUAL "hcc") + if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") set(GINKGO_HIP_PLATFORM_HCC 1) - elseif (GINKGO_HIP_PLATFORM STREQUAL "nvcc") + elseif (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") set(GINKGO_HIP_PLATFORM_NVCC 1) endif() endif() -configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in - ${Ginkgo_BINARY_DIR}/include/ginkgo/config.hpp @ONLY) - -# Load CMake helpers -include(cmake/build_helpers.cmake) -include(cmake/hip_helpers.cmake) -include(cmake/install_helpers.cmake) -include(cmake/windows_helpers.cmake) +# Try to find the third party packages before using our subdirectories +include(cmake/package_helpers.cmake) +if(GINKGO_BUILD_TESTS) + find_package(GTest 1.10.0) # No need for QUIET as CMake ships FindGTest +endif() +if(GINKGO_BUILD_BENCHMARKS) + find_package(gflags 2.2.2 QUIET) + find_package(RapidJSON 1.1.0 QUIET) +endif() +if(GINKGO_BUILD_HWLOC) + find_package(HWLOC 2.1) # No need for QUIET as we ship FindHWLOC +endif() +add_subdirectory(third_party) # Third-party tools and libraries -# This is modified from https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace if(MSVC) if(BUILD_SHARED_LIBS) - ginkgo_switch_to_windows_dynamic("CXX") - ginkgo_switch_to_windows_dynamic("C") set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) else() - ginkgo_switch_to_windows_static("CXX") - ginkgo_switch_to_windows_static("C") set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS FALSE) endif() endif() -# Try to find the third party packages before using our subdirectories -include(cmake/package_helpers.cmake) -ginkgo_find_package(GTest "GTest::GTest;GTest::Main" FALSE 1.8.1) -ginkgo_find_package(gflags gflags FALSE 2.2.2) -ginkgo_find_package(RapidJSON rapidjson TRUE 1.1.0) -add_subdirectory(third_party) # Third-party tools and libraries +configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in + ${Ginkgo_BINARY_DIR}/include/ginkgo/config.hpp @ONLY) # Ginkgo core libraries # Needs to be first in order for `CMAKE_CUDA_DEVICE_LINK_EXECUTABLE` to be # propagated to the other parts of Ginkgo in case of building as static libraries +add_subdirectory(devices) # Basic device functionalities. Always compiled. if(GINKGO_BUILD_CUDA) add_subdirectory(cuda) # High-performance kernels for NVIDIA GPUs endif() -add_subdirectory(core) # Core Ginkgo types and top-level functions -add_subdirectory(include) # Public API self-contained check if (GINKGO_BUILD_REFERENCE) add_subdirectory(reference) # Reference kernel implementations endif() +if(GINKGO_BUILD_HIP) + add_subdirectory(hip) # High-performance kernels for AMD or NVIDIA GPUs +endif() +if (GINKGO_BUILD_DPCPP) + add_subdirectory(dpcpp) # High-performance DPC++ kernels +endif() if (GINKGO_BUILD_OMP) add_subdirectory(omp) # High-performance omp kernels endif() -# HIP needs to be last because it builds the GINKGO_RPATH_FOR_HIP variable -# which needs to know the `ginkgo` target. -if(GINKGO_BUILD_HIP) - add_subdirectory(hip) # High-performance kernels for AMD or NVIDIA GPUs +add_subdirectory(core) # Core Ginkgo types and top-level functions +add_subdirectory(include) # Public API self-contained check +if (GINKGO_BUILD_TESTS) + add_subdirectory(test) # Tests running on all executors endif() # Non core directories and targets @@ -223,7 +278,10 @@ if(GINKGO_DEVEL_TOOLS) add_custom_target(add_license COMMAND ${Ginkgo_SOURCE_DIR}/dev_tools/scripts/add_license.sh WORKING_DIRECTORY ${Ginkgo_SOURCE_DIR}) - add_dependencies(format add_license) + # if git-cmake-format can not build format target, do not add the dependencies + if(TARGET format) + add_dependencies(format add_license) + endif() endif() # MacOS needs to install bash, gnu-sed, findutils and coreutils @@ -271,35 +329,79 @@ endif() configure_file(${Ginkgo_SOURCE_DIR}/cmake/ginkgo.pc.in ${Ginkgo_BINARY_DIR}/ginkgo.pc @ONLY) -# WINDOWS NVCC has " inside the string, add escape charater to avoid config problem. +# WINDOWS NVCC has " inside the string, add escape character +# to avoid config problem. ginkgo_modify_flags(CMAKE_CUDA_FLAGS) ginkgo_modify_flags(CMAKE_CUDA_FLAGS_DEBUG) ginkgo_modify_flags(CMAKE_CUDA_FLAGS_RELEASE) ginkgo_install() +set(GINKGO_TEST_INSTALL_SRC_DIR "${Ginkgo_SOURCE_DIR}/test/test_install/") +set(GINKGO_TEST_INSTALL_BIN_DIR "${Ginkgo_BINARY_DIR}/test/test_install/") +set(GINKGO_TEST_EXPORTBUILD_SRC_DIR "${Ginkgo_SOURCE_DIR}/test/test_exportbuild/") +set(GINKGO_TEST_EXPORTBUILD_BIN_DIR "${Ginkgo_BINARY_DIR}/test/test_exportbuild/") if(MSVC) - # Set path/command with $ - set(GINKGO_TEST_INSTALL_COMMAND "${Ginkgo_BINARY_DIR}/test_install/$/test_install") + set(GINKGO_TEST_INSTALL_CMD ${GINKGO_TEST_INSTALL_BIN_DIR}/$/test_install) + set(GINKGO_TEST_EXPORTBUILD_CMD ${GINKGO_TEST_EXPORTBUILD_BIN_DIR}/$/test_exportbuild) if(GINKGO_BUILD_CUDA) - set(GINKGO_TEST_INSTALL_COMMAND "${GINKGO_TEST_INSTALL_COMMAND}" "${Ginkgo_BINARY_DIR}/test_install/$/test_install_cuda") + set(GINKGO_TEST_INSTALL_CUDA_CMD ${GINKGO_TEST_INSTALL_BIN_DIR}/$/test_install_cuda) endif() else() - set(GINKGO_TEST_INSTALL_COMMAND "${Ginkgo_BINARY_DIR}/test_install/test_install") + set(GINKGO_TEST_INSTALL_CMD ${GINKGO_TEST_INSTALL_BIN_DIR}/test_install) + set(GINKGO_TEST_EXPORTBUILD_CMD ${GINKGO_TEST_EXPORTBUILD_BIN_DIR}/test_exportbuild) if(GINKGO_BUILD_CUDA) - set(GINKGO_TEST_INSTALL_COMMAND "${GINKGO_TEST_INSTALL_COMMAND}" "${Ginkgo_BINARY_DIR}/test_install/test_install_cuda") + set(GINKGO_TEST_INSTALL_CUDA_CMD ${GINKGO_TEST_INSTALL_BIN_DIR}/test_install_cuda) endif() endif() +if(GINKGO_BUILD_HIP) + set(GINKGO_TEST_INSTALL_HIP_CMD ${GINKGO_TEST_INSTALL_BIN_DIR}/test_install_hip) +endif() + +file(MAKE_DIRECTORY "${GINKGO_TEST_INSTALL_BIN_DIR}") +file(MAKE_DIRECTORY "${GINKGO_TEST_EXPORTBUILD_BIN_DIR}") +set(TOOLSET "") +if (NOT "${CMAKE_GENERATOR_TOOLSET}" STREQUAL "") + set(TOOLSET "-T${CMAKE_GENERATOR_TOOLSET}") +endif() add_custom_target(test_install - COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} -H${Ginkgo_SOURCE_DIR}/test_install - -B${Ginkgo_BINARY_DIR}/test_install + COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${TOOLSET} + -H${GINKGO_TEST_INSTALL_SRC_DIR} + -B${GINKGO_TEST_INSTALL_BIN_DIR} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_PREFIX_PATH=${CMAKE_INSTALL_PREFIX}/${GINKGO_INSTALL_CONFIG_DIR} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER} # `--config cfg` is ignored by single-configuration generator. - # `$` is always be the same as `CMAKE_BUILD_TYPE` in single-configuration generator. - COMMAND ${CMAKE_COMMAND} --build ${Ginkgo_BINARY_DIR}/test_install --config $ - COMMAND ${GINKGO_TEST_INSTALL_COMMAND} - COMMENT "Running a test on the installed binaries. This requires running `(sudo) make install` first.") + # `$` is always be the same as `CMAKE_BUILD_TYPE` in + # single-configuration generator. + COMMAND ${CMAKE_COMMAND} + --build ${GINKGO_TEST_INSTALL_BIN_DIR} + --config $ + COMMAND ${GINKGO_TEST_INSTALL_CMD} + COMMAND ${GINKGO_TEST_INSTALL_CUDA_CMD} + COMMAND ${GINKGO_TEST_INSTALL_HIP_CMD} + WORKING_DIRECTORY ${GINKGO_TEST_INSTALL_BIN_DIR} + COMMENT "Running a test on the installed binaries. " + "This requires running `(sudo) make install` first.") + +add_custom_target(test_exportbuild + COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${TOOLSET} + -H${GINKGO_TEST_EXPORTBUILD_SRC_DIR} + -B${GINKGO_TEST_EXPORTBUILD_BIN_DIR} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER} + # `--config cfg` is ignored by single-configuration generator. + # `$` is always be the same as `CMAKE_BUILD_TYPE` in + # single-configuration generator. + COMMAND ${CMAKE_COMMAND} + --build ${GINKGO_TEST_EXPORTBUILD_BIN_DIR} + --config $ + COMMAND ${GINKGO_TEST_EXPORTBUILD_CMD} + COMMENT "Running a test on Ginkgo's exported build directory. " + "This requires compiling Ginkgo with `-DGINKGO_EXPORT_BUILD_DIR=ON` first.") + # Setup CPack set(CPACK_PACKAGE_DESCRIPTION_FILE "${Ginkgo_SOURCE_DIR}/README.md") @@ -309,10 +411,9 @@ set(CPACK_PACKAGE_CONTACT "ginkgo.library@gmail.com") include(CPack) # And finally, print the configuration to screen: -# if(GINKGO_CONFIG_LOG_DETAILED) - FILE(READ ${CMAKE_BINARY_DIR}/detailed.log GINKGO_LOG_SUMMARY) + FILE(READ ${PROJECT_BINARY_DIR}/detailed.log GINKGO_LOG_SUMMARY) else() - FILE(READ ${CMAKE_BINARY_DIR}/minimal.log GINKGO_LOG_SUMMARY) + FILE(READ ${PROJECT_BINARY_DIR}/minimal.log GINKGO_LOG_SUMMARY) endif() MESSAGE("${GINKGO_LOG_SUMMARY}") diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9fcdc25ed13..07aec3f9649 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -68,8 +68,8 @@ look at our coding guidelines before proposing a pull request. Ginkgo is divided into a `core` module with common functionalities independent of the architecture, and several kernel modules (`reference`, `omp`, `cuda`, -`hip`) which contain low-level computational routines for each supported -architecture. +`hip`, `dpcpp`) which contain low-level computational routines for each +supported architecture. ### Extended header files @@ -521,12 +521,12 @@ existing code has been broken. need to be performed with data that can be as small as possible. For example, matrices lesser than 5x5 are acceptable. This allows the reviewers to verify the results for exactness with tools such as MATLAB. -* OpenMP, CUDA and HIP kernels have to be tested against the reference kernels. - Hence data for the tests of these kernels can be generated in the test files - using helper functions or by using external files to be read through the - standard input. In particular for CUDA and HIP, the data size should be at - least bigger than the architecture's warp size to ensure there is no corner - case in the kernels. +* OpenMP, CUDA, HIP and DPC++ kernels have to be tested against the reference + kernels. Hence data for the tests of these kernels can be generated in the + test files using helper functions or by using external files to be read + through the standard input. In particular for CUDA, HIP and DPC++ the data + size should be at least bigger than the architecture's warp size to ensure + there is no corner case in the kernels. ## Documentation style diff --git a/INSTALL.md b/INSTALL.md index 2a0056bf3a4..787508b720c 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -2,7 +2,7 @@ Installation Instructions {#install_ginkgo} ------------------------------------- ### Building -Use the standard cmake build procedure: +Use the standard CMake build procedure: ```sh mkdir build; cd build @@ -17,9 +17,19 @@ Ginkgo adds the following additional switches to control what is being built: * `-DGINKGO_DEVEL_TOOLS={ON, OFF}` sets up the build system for development (requires clang-format, will also download git-cmake-format), - default is `OFF`. + default is `OFF`. The default behavior installs a pre-commit hook, which + disables git commits. If it is set to `ON`, a new pre-commit hook for + formatting will be installed (enabling commits again). In both cases the + hook may overwrite a user defined pre-commit hook when Ginkgo is used as + a submodule. +* `-DGINKGO_MIXED_PRECISION={ON, OFF}` compiles true mixed-precision kernels + instead of converting data on the fly, default is `OFF`. + Enabling this flag increases the library size, but improves performance of + mixed-precision kernels. * `-DGINKGO_BUILD_TESTS={ON, OFF}` builds Ginkgo's tests (will download googletest), default is `ON`. +* `-DGINKGO_FAST_TESTS={ON, OFF}` reduces the input sizes for a few slow tests + to speed them up, default is `OFF`. * `-DGINKGO_BUILD_BENCHMARKS={ON, OFF}` builds Ginkgo's benchmarks (will download gflags and rapidjson), default is `ON`. * `-DGINKGO_BUILD_EXAMPLES={ON, OFF}` builds Ginkgo's examples, default is `ON` @@ -32,11 +42,18 @@ Ginkgo adds the following additional switches to control what is being built: * `-DGINKGO_BUILD_CUDA={ON, OFF}` builds optimized cuda versions of the kernels (requires CUDA), default is `ON` if a CUDA compiler could be detected, `OFF` otherwise. +* `-DGINKGO_BUILD_DPCPP={ON, OFF}` builds optimized DPC++ versions of the + kernels (requires `CMAKE_CXX_COMPILER` to be set to the `dpcpp` compiler). + The default is `ON` if `CMAKE_CXX_COMPILER` is a DPC++ compiler, `OFF` + otherwise. * `-DGINKGO_BUILD_HIP={ON, OFF}` builds optimized HIP versions of the kernels (requires HIP), default is `ON` if an installation of HIP could be detected, `OFF` otherwise. * `-DGINKGO_HIP_AMDGPU="gpuarch1;gpuarch2"` the amdgpu_target(s) variable passed to hipcc for the `hcc` HIP backend. The default is none (auto). +* `-DGINKGO_BUILD_HWLOC={ON, OFF}` builds Ginkgo with HWLOC. If system HWLOC + is not found, Ginkgo will try to build it. Default is `ON` on Linux. Ginkgo + does not support HWLOC on Windows/MacOS, so the default is `OFF` on Windows/MacOS. * `-DGINKGO_BUILD_DOC={ON, OFF}` creates an HTML version of Ginkgo's documentation from inline comments in the code. The default is `OFF`. * `-DGINKGO_DOC_GENERATE_EXAMPLES={ON, OFF}` generates the documentation of examples @@ -59,6 +76,13 @@ Ginkgo adds the following additional switches to control what is being built: * `-DGINKGO_VERBOSE_LEVEL=integer` sets the verbosity of Ginkgo. * `0` disables all output in the main libraries, * `1` enables a few important messages related to unexpected behavior (default). +* `GINKGO_INSTALL_RPATH` allows setting any RPATH information when installing + the Ginkgo libraries. If this is `OFF`, the behavior is the same as if all + other RPATH flags are set to `OFF` as well. The default is `ON`. +* `GINKGO_INSTALL_RPATH_ORIGIN` adds $ORIGIN (Linux) or @loader_path (MacOS) + to the installation RPATH. The default is `ON`. +* `GINKGO_INSTALL_RPATH_DEPENDENCIES` adds the dependencies to the + installation RPATH. The default is `OFF`. * `-DCMAKE_INSTALL_PREFIX=path` sets the installation path for `make install`. The default value is usually something like `/usr/local`. * `-DCMAKE_BUILD_TYPE=type` specifies which configuration will be used for @@ -84,7 +108,7 @@ Ginkgo adds the following additional switches to control what is being built: list of architectures. Supported values are: * `Auto` - * `Kepler`, `Maxwell`, `Pascal`, `Volta`, `Ampere` + * `Kepler`, `Maxwell`, `Pascal`, `Volta`, `Turing`, `Ampere` * `CODE`, `CODE(COMPUTE)`, `(COMPUTE)` `Auto` will automatically detect the present CUDA-enabled GPU architectures @@ -96,14 +120,6 @@ Ginkgo adds the following additional switches to control what is being built: this option see the [`ARCHITECTURES` specification list](https://github.com/ginkgo-project/CudaArchitectureSelector/blob/master/CudaArchitectureSelector.cmake#L58) section in the documentation of the CudaArchitectureSelector CMake module. -* `-DGINKGO_WINDOWS_SHARED_LIBRARY_RELPATH=` where is a relative - path built with `PROJECT_BINARY_DIR`. Users must add the absolute path - (`PROJECT_BINARY_DIR`/`GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH`) into the - environment variable PATH when building shared libraries and executable - program, default is `windows_shared_library`. -* `-DGINKGO_CHECK_PATH={ON, OFF}` checks if the environment variable PATH is valid. - It is checked only when building shared libraries and executable program, - default is `ON`. For example, to build everything (in debug mode), use: @@ -119,48 +135,11 @@ generators. Other CMake generators are untested. ### Building Ginkgo in Windows Depending on the configuration settings, some manual work might be required: -* Build Ginkgo as shared library: - Add `PROJECT_BINARY_DIR/GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH` into the environment variable `PATH`. - `GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH` is `windows_shared_library` by default. More Details are available in the [Installation page](./INSTALL.md). - * cmd: `set PATH=";%PATH%"` - * powershell: `$env:PATH=";$env:PATH"` - - CMake will give the following error message if the path is not correct. - ``` - Did not find this build in the environment variable PATH. Please add into the environment variable PATH. - ``` - where `` is the needed ``. * Build Ginkgo with Debug mode: - Some Debug build specific issues can appear depending on the machine and environment. The known issues are the following: - 1. `bigobj` issue: encountering `too many sections` needs the compilation flags `\bigobj` or `-Wa,-mbig-obj` - 2. `ld` issue: encountering `ld: error: export ordinal too large` needs the compilation flag `-O1` - - The following are the details for different environments: - * _Microsoft Visual Studio_: - 1. `bigobj` issue - * `cmake -DCMAKE_CXX_FLAGS=\bigobj ` which might overwrite the default settings. - * add `\bigobj` into the environment variable `CXXFLAGS` (only available in the first cmake configuration) - * cmd: `set CXXFLAGS=\bigobj` - * powershell: `$env:CXXFLAGS=\bigobj` - 2. `ld` issue (_Microsoft Visual Studio_ does not have this issue) - * _Cygwin_: - 1. `bigobj` issue - * add `-Wa,-mbig-obj -O1` into the environment variable `CXXFLAGS` (only available in the first cmake configuration) - * `export CXXFLAGS="-Wa,-mbig-obj -O1"` - * `cmake -DCMAKE_CXX_FLAGS=-Wa,-mbig-obj `, which might overwrite the default settings. - 2. `ld` issue (If building Ginkgo as static library, this is not needed) - * `cmake -DGINKGO_COMPILER_FLAGS="-Wpedantic -O1" ` (`GINKGO_COMPILER_FLAGS` is `-Wpedantic` by default) - * add `-O1` in the environement variable `CXX_FLAGS` or `CMAKE_CXX_FLAGS` - * _MinGW_: - 1. `bigobj` issue - * add `-Wa,-mbig-obj -O1` into the environment variable `CXXFLAGS` (only available in the first cmake configuration) - * cmd: `set CXXFLAGS="-Wa,-mbig-obj"` - * powershell: `$env:CXXFLAGS="-Wa,-mbig-obj"` - * `cmake -DCMAKE_CXX_FLAGS=-Wa,-mbig-obj `, which might overwrite the default settings. - 2. `ld` issue (If building Ginkgo as static library, this is not needed) - * `cmake -DGINKGO_COMPILER_FLAGS="-Wpedantic -O1" ` (`GINKGO_COMPILER_FLAGS` is `-Wpedantic` by default) - * add `-O1` in the environement variable `CXX_FLAGS` or `CMAKE_CXX_FLAGS` -* Build Ginkgo in _MinGW_: + Some Debug build specific issues can appear depending on the machine and environment: + When you encounter the error message `ld: error: export ordinal too large`, add the compilation flag `-O1` + by adding `-DCMAKE_CXX_FLAGS=-O1` to the CMake invocation. +* Build Ginkgo in _MinGW_:\ If encountering the issue `cc1plus.exe: out of memory allocating 65536 bytes`, please follow the workaround in [reference](https://www.intel.com/content/www/us/en/programmable/support/support-resources/knowledge-base/embedded/2016/cc1plus-exe--out-of-memory-allocating-65536-bytes.html), or trying to compile ginkgo again might work. @@ -173,92 +152,46 @@ of HIP either at `/opt/rocm/hip` or at the path specified by `HIP_PATH` as a CMake parameter (`-DHIP_PATH=`) or environment variable (`export HIP_PATH=`), unless `-DGINKGO_BUILD_HIP=ON/OFF` is set explicitly. -#### Correctly installing HIP toolkits and dependencies for Ginkgo -In general, Ginkgo's HIP backend requires the following packages: -+ HIP, -+ hipBLAS, -+ hipSPARSE, -+ Thrust. - -It is necessary to provide some details about the different ways to -procure and install these packages, in particular for NVIDIA systems since -getting a correct, non bloated setup is not straightforward. - -For AMD systems, the simplest way is to follow the [instructions provided -here](https://github.com/ROCm-Developer-Tools/HIP/blob/master/INSTALL.md) which -provide package installers for most Linux distributions. Ginkgo also needs the -installation of the [hipBLAS](https://github.com/ROCmSoftwarePlatform/hipBLAS) -and [hipSPARSE](https://github.com/ROCmSoftwarePlatform/hipSPARSE) interfaces. -Optionally if you do not already have a thrust installation, [the ROCm provided -rocThrust package can be -used](https://github.com/ROCmSoftwarePlatform/rocThrust). - -For NVIDIA systems, the traditional installation (package `hip_nvcc`), albeit -working properly is currently odd: it depends on all the `hcc` related packages, -although the `nvcc` backend seems to entirely rely on the CUDA suite. [See this -issue for more -details](https://github.com/ROCmSoftwarePlatform/hipBLAS/issues/53). It is -advised in this case to compile everything manually, including using forks of -`hipBLAS` and `hipSPARSE` specifically made to not depend on the `hcc` specific -packages. `Thrust` is often provided by CUDA and this Thrust version should work -with `HIP`. Here is a sample procedure for installing `HIP`, `hipBLAS` and -`hipSPARSE`. - - -```bash -# HIP -git clone https://github.com/ROCm-Developer-Tools/HIP.git -pushd HIP && mkdir build && pushd build -cmake .. && make install -popd && popd - -# hipBLAS -git clone https://github.com/tcojean/hipBLAS.git -pushd hipBLAS && mkdir build && pushd build -cmake .. && make install -popd && popd - -# hipSPARSE -git clone https://github.com/tcojean/hipSPARSE.git -pushd hipSPARSE && mkdir build && pushd build -cmake -DBUILD_CUDA=ON .. && make install -popd && popd -``` - - #### Changing the paths to search for HIP and other packages All HIP installation paths can be configured through the use of environment variables or CMake variables. This way of configuring the paths is currently imposed by the `HIP` tool suite. The variables are the following: -+ CMake `-DHIP_PATH=` or environment `export HIP_PATH=`: sets the `HIP` - installation path. The default value is `/opt/rocm/hip`. -+ CMake `-DHIPBLAS_PATH=` or environment `export HIPBLAS_PATH=`: sets the - `hipBLAS` installation path. The default value is `/opt/rocm/hipblas`. -+ CMake `-DHIPSPARSE_PATH=` or environment `export HIPSPARSE_PATH=`: sets the - `hipSPARSE` installation path. The default value is `/opt/rocm/hipsparse`. -+ CMake `-DHCC_PATH=` or environment `export HCC_PATH=`: sets the `HCC` - installation path, for AMD backends. The default value is `/opt/rocm/hcc`. ++ CMake `-DROCM_PATH=` or environment `export ROCM_PATH=`: sets the `ROCM` + installation path. The default value is `/opt/rocm/`. ++ CMake `-DHIP_CLANG_PATH` or environment `export HIP_CLANG_PATH=`: sets the + `HIP` compatible `clang` binary path. The default value is + `${ROCM_PATH}/llvm/bin`. ++ CMake `-DHIP_PATH=` or environment `export HIP_PATH=`: sets the `HIP` + installation path. The default value is `${ROCM_PATH}/hip`. ++ CMake `-DHIPBLAS_PATH=` or environment `export HIPBLAS_PATH=`: sets the + `hipBLAS` installation path. The default value is `${ROCM_PATH}/hipblas`. ++ CMake `-DHIPSPARSE_PATH=` or environment `export HIPSPARSE_PATH=`: sets the + `hipSPARSE` installation path. The default value is `${ROCM_PATH}/hipsparse`. ++ CMake `-DROCRAND_PATH=` or environment `export ROCRAND_PATH=`: sets the + `rocRAND` installation path. The default value is `${ROCM_PATH}/rocrand`. ++ CMake `-DHIPRAND_PATH=` or environment `export HIPRAND_PATH=`: sets the + `hipRAND` installation path. The default value is `${ROCM_PATH}/hiprand`. + environment `export CUDA_PATH=`: where `hipcc` can find `CUDA` if it is not in the default `/usr/local/cuda` path. #### HIP platform detection of AMD and NVIDIA By default, Ginkgo uses the output of `/opt/rocm/hip/bin/hipconfig --platform` -to select the backend. The accepted values are either `hcc` (AMD) or `nvcc` -(NVIDIA). When on an AMD or NVIDIA system, this should output the correct -platform by default. When on a system without GPUs, this should output `hcc` by -default. To change this value, export the environment variable `HIP_PLATFORM` -like so: +to select the backend. The accepted values are either `hcc` (`amd` with ROCM >= +4.1) or `nvcc` (`nvidia` with ROCM >= 4.1). When on an AMD or NVIDIA system, +this should output the correct platform by default. When on a system without +GPUs, this should output `hcc` by default. To change this value, export the +environment variable `HIP_PLATFORM` like so: ```bash -export HIP_PLATFORM=nvcc +export HIP_PLATFORM=nvcc # or nvidia for ROCM >= 4.1 ``` #### Setting platform specific compilation flags -Platform specific compilation flags can be given through the following -CMake variables: +Platform specific compilation flags can be given through the following CMake +variables: + `-DGINKGO_HIP_COMPILER_FLAGS=`: compilation flags given to all platforms. -+ `-DGINKGO_HIP_HCC_COMPILER_FLAGS=`: compilation flags given to AMD platforms. + `-DGINKGO_HIP_NVCC_COMPILER_FLAGS=`: compilation flags given to NVIDIA platforms. ++ `-DGINKGO_HIP_CLANG_COMPILER_FLAGS=`: compilation flags given to AMD clang compiler. ### Third party libraries and packages @@ -266,9 +199,6 @@ CMake variables: Ginkgo relies on third party packages in different cases. These third party packages can be turned off by disabling the relevant options. -+ GINKGO_BUILD_CUDA=ON: - [CudaArchitectureSelector](https://github.com/ginkgo-project/CudaArchitectureSelector) - (CAS) is a CMake helper to manage CUDA architecture settings; + GINKGO_BUILD_TESTS=ON: Our tests are implemented with [Google Test](https://github.com/google/googletest); + GINKGO_BUILD_BENCHMARKS=ON: For argument management we use @@ -277,29 +207,24 @@ packages can be turned off by disabling the relevant options. + GINKGO_DEVEL_TOOLS=ON: [git-cmake-format](https://github.com/gflegar/git-cmake-format) is our CMake helper for code formatting. - -By default, Ginkgo uses the internal version of each package. For each of the -packages `GTEST`, `GFLAGS`, `RAPIDJSON` and `CAS`, it is possible to force -Ginkgo to try to use an external version of a package. For this, Ginkgo provides -two ways to find packages. To rely on the CMake `find_package` command, use the -CMake option `-DGINKGO_USE_EXTERNAL_=ON`. Note that, if the external -packages were not installed to the default location, the CMake option -`-DCMAKE_PREFIX_PATH=` needs to be set to the semicolon (`;`) -separated list of install paths of these external packages. For more -Information, see the [CMake documentation for ++ GINKGO_BUILD_HWLOC=ON: + [hwloc](https://www.open-mpi.org/projects/hwloc) to detect and control cores + and devices. + +Ginkgo attempts to use pre-installed versions of these package if they match +version requirements using `find_package`. Otherwise, the configuration step +will download the files for each of the packages `GTest`, `gflags`, +`RapidJSON` and `hwloc` and build them internally. + +Note that, if the external packages were not installed to the default location, +the CMake option `-DCMAKE_PREFIX_PATH=` needs to be set to the +semicolon (`;`) separated list of install paths of these external packages. For +more Information, see the [CMake documentation for CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.9/variable/CMAKE_PREFIX_PATH.html) for details. -To manually configure the paths, Ginkgo relies on the [standard xSDK Installation -policies](https://xsdk.info/policies/) for all packages except `CAS` (as it is -neither a library nor a header, it cannot be expressed through the `TPL` -format): -+ `-DTPL_ENABLE_=ON` -+ `-DTPL__LIBRARIES=/path/to/libraries.{so|a}` -+ `-DTPL__INCLUDE_DIRS=/path/to/header/directory` - -When applicable (e.g. for `GTest` libraries), a `;` separated list can be given -to the `TPL__{LIBRARIES|INCLUDE_DIRS}` variables. +For convenience, the options `GINKGO_INSTALL_RPATH[_.*]` can be used +to bind the installed Ginkgo shared libraries to the path of its dependencies. ### Installing Ginkgo diff --git a/LICENSE b/LICENSE index 48867b57a87..b193691a64e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.md b/README.md index a545ef04def..de30f8d31b5 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ [![Documentation](https://img.shields.io/badge/Documentation-latest-blue.svg)](https://ginkgo-project.github.io/ginkgo/doc/master/) [![License](https://img.shields.io/github/license/ginkgo-project/ginkgo.svg)](./LICENSE) [![c++ standard](https://img.shields.io/badge/c%2B%2B-14-blue.svg)](https://en.wikipedia.org/wiki/C%2B%2B#Standardization) +[![DOI](https://joss.theoj.org/papers/10.21105/joss.02260/status.svg)](https://doi.org/10.21105/joss.02260) Ginkgo is a high-performance linear algebra library for manycore systems, with a focus on sparse solution of linear systems. It is implemented using modern C++ @@ -35,12 +36,12 @@ Prerequisites For Ginkgo core library: -* _cmake 3.9+_ +* _cmake 3.13+_ * C++14 compliant compiler, one of: - * _gcc 5.3+, 6.3+, 7.3+, all versions after 8.1+_ + * _gcc 5.5+_ * _clang 3.9+_ - * _Intel compiler 2017+_ - * _Apple LLVM 8.0+_ (__TODO__: verify) + * _Intel compiler 2018+_ + * _Apple LLVM 8.0+_ The Ginkgo CUDA module has the following __additional__ requirements: @@ -50,29 +51,36 @@ The Ginkgo CUDA module has the following __additional__ requirements: [CUDA installation guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) or [CUDA installation guide for Mac Os X](https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html) +The Ginkgo HIP module has the following __additional__ requirements: + +* _ROCm 3.5+_ +* the HIP, hipBLAS, hipSPARSE, hip/rocRAND and rocThrust packages compiled with either: + * _AMD_ backend (using the `clang` compiler) + * _9.2 <= CUDA < 11_ backend + +The Ginkgo DPC++ module has the following __additional__ requirements: + +* _OneAPI 2021.3+_ +* Set `dpcpp` as the `CMAKE_CXX_COMPILER` +* `c++17` is used to compile this module, while the rest of Ginkgo is compiled using `c++14`. +* The following oneAPI packages should be available: + * oneMKL + * oneDPL In addition, if you want to contribute code to Ginkgo, you will also need the following: -* _clang-format 5.0.0+_ (ships as part of _clang_) +* _clang-format 8.0.0+_ (ships as part of _clang_) * _clang-tidy_ (optional, when setting the flag `-DGINKGO_WITH_CLANG_TIDY=ON`) * _iwyu_ (Include What You Use, optional, when setting the flag `-DGINKGO_WITH_IWYU=ON`) -The Ginkgo HIP module has the following __additional__ requirements: - -* _ROCm 2.8+_ -* the HIP, hipBLAS and hipSPARSE packages compiled with either: - * _AMD_ backend - * _CUDA 9.0+_ backend. When using CUDA 10+, _cmake 3.12.2+_ is required. - ### Windows -The prequirement needs to be verified -* _cmake 3.9+_ +* _cmake 3.13+_ * C++14 compliant 64-bit compiler: - * _MinGW : gcc 5.3+, 6.3+, 7.3+, all versions after 8.1+_ - * _Cygwin : gcc 5.3+, 6.3+, 7.3+, all versions after 8.1+_ - * _Microsoft Visual Studio : VS 2017 15.7+_ + * _MinGW : gcc 5.5+_ + * _Cygwin : gcc 5.5+_ + * _Microsoft Visual Studio : VS 2019+_ __NOTE:__ Need to add `--autocrlf=input` after `git clone` in _Cygwin_. @@ -87,20 +95,10 @@ The Ginkgo CUDA module has the following __additional__ requirements: The Ginkgo OMP module has the following __additional__ requirements: * _MinGW_ or _Cygwin_ -Depending on the configuration settings, some manual work might be required. More details are availble in [windows section in INSTALL.md](INSTALL.md#building-ginkgo-in-windows): -* Build Ginkgo as shared library: - Add `PROJECT_BINARY_DIR/GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH` into the environment variable `PATH`. - `GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH` is `windows_shared_library` by default. -* Build Ginkgo with Debug mode: - Some Debug build specific issues can appear depending on the machine and environment. The known issues are the following: - 1. `bigobj` issue: encountering `too many sections` needs the compilation flags `\bigobj` or `-Wa,-mbig-obj` - 2. `ld` issue: encountering `ld: error: export ordinal too large` needs the compilation flag `-O1` -* Build Ginkgo in _MinGW_: - If encountering the issue `cc1plus.exe: out of memory allocating 65536 bytes`, please follow the workaround in - [reference](https://www.intel.com/content/www/us/en/programmable/support/support-resources/knowledge-base/embedded/2016/cc1plus-exe--out-of-memory-allocating-65536-bytes.html), - or compile ginkgo again might work. - -__NOTE:__ _Microsoft Visual Studio_ only supports OpenMP 2.0, so it can not compile the ginkgo OMP module. +In these environments, two problems can be encountered, the solution for which is described in the +[windows section in INSTALL.md](INSTALL.md#building-ginkgo-in-windows): +* `ld: error: export ordinal too large` needs the compilation flag `-O1` +* `cc1plus.exe: out of memory allocating 65536 bytes` requires a modification of the environment __NOTE:__ Some restrictions will also apply on the version of C and C++ standard libraries installed on the system. This needs further investigation. @@ -117,9 +115,12 @@ mkdir build; cd build cmake -G "Unix Makefiles" .. && make ``` -By default, `GINKGO_BUILD_REFERENCE` is enabled. You should be able to run examples with this -executor. You would need to explicitly compile with the OpenMP and CUDA modules enabled -to run with these executors. Please refer to the [Installation page](./INSTALL.md). +By default, `GINKGO_BUILD_REFERENCE` is enabled. You should be able to run +examples with this executor. By default, Ginkgo tries to enable the relevant +modules depending on your machine environment (present of CUDA, ...). You can +also explicitly compile with the OpenMP, CUDA, HIP or DPC++ modules enabled to +run the examples with these executors. Please refer to the [Installation +page](./INSTALL.md) for more details. After the installation, CMake can find ginkgo with `find_package(Ginkgo)`. An example can be found in the [`test_install`](test_install/CMakeLists.txt). diff --git a/TESTING.md b/TESTING.md index 482f8721792..c8e7ec38c89 100644 --- a/TESTING.md +++ b/TESTING.md @@ -27,6 +27,17 @@ run the following from the build folder: where `path/to/test` is the path returned by `make test`. +#### Using make quick_test +After compiling Ginkgo, use the following command inside the build folder to run +a small subset of tests that should execute quickly: + +```sh +make quick_test +``` + +These tests do not use GPU features except for a few device property queries, so +they may still fail if Ginkgo was compiled with GPU support, but no such GPU is +available. The output is equivalent to `make test`. #### Using CTest The tests can also be ran through CTest from the command line, for example when diff --git a/accessor/accessor_helper.hpp b/accessor/accessor_helper.hpp new file mode 100644 index 00000000000..1d90004d8f8 --- /dev/null +++ b/accessor/accessor_helper.hpp @@ -0,0 +1,794 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_ACCESSOR_ACCESSOR_HELPER_HPP_ +#define GKO_ACCESSOR_ACCESSOR_HELPER_HPP_ + + +#include +#include +#include +#include +#include + + +#include "index_span.hpp" +#include "utils.hpp" + + +namespace gko { +namespace acc { +/** + * This namespace contains helper functionality for the accessors. + * + * @note This namespace is not part of the public interface and can change + * without notice. + */ +namespace helper { +namespace detail { + + +/** + * This helper runs from first to last dimension in order to compute the index. + * The index is computed like this: + * indices: x1, x2, x3, ... + * compute(stride, x1, x2, x3) -> x1 * stride[0] + x2 * stride[1] + x3 + */ +template +struct row_major_helper_s { + static_assert(total_dim >= 1, "Dimensionality must be >= 1"); + static_assert(current_iter < total_dim, "Iteration must be < total_dim!"); + + static constexpr size_type dim_idx{current_iter - 1}; + + template + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute(const std::array &size, + const std::array 1 ? total_dim - 1 : 0)> + &stride, + FirstType first, Indices &&... idxs) + { + // The ASSERT size check must NOT be indexed with `dim_idx` directy, + // otherwise, it leads to a linker error. The reason is likely that + // `std::array::operator[](const size_type &)` uses a + // reference. Since `dim_idx` is constexpr (and not defined in a + // namespace scope), it can't be odr-used. + return GKO_ACC_ASSERT(first < size[static_cast(dim_idx)]), + first * stride[dim_idx] + + row_major_helper_s::compute(size, stride, + std::forward( + idxs)...); + } +}; + +template +struct row_major_helper_s { + template + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute(const std::array &size, + const std::array 1 ? total_dim - 1 : 0)>, + FirstType first) + { + return GKO_ACC_ASSERT(first < size[total_dim - 1]), first; + } +}; + + +template +constexpr GKO_ACC_ATTRIBUTES std::enable_if_t +mult_dim_upwards(const std::array &) +{ + return 1; +} + +template +constexpr GKO_ACC_ATTRIBUTES std::enable_if_t<(iter < N), ValueType> +mult_dim_upwards(const std::array &size) +{ + return size[iter] * mult_dim_upwards(size); +} + + +template +constexpr GKO_ACC_ATTRIBUTES + std::enable_if_t> + compute_default_row_major_stride_array(const std::array &, + Args &&... args) +{ + return {{std::forward(args)...}}; +} + +template +constexpr GKO_ACC_ATTRIBUTES std::enable_if_t< + (iter < N) && (iter == sizeof...(Args) + 1), std::array> +compute_default_row_major_stride_array(const std::array &size, + Args &&... args) +{ + return compute_default_row_major_stride_array( + size, std::forward(args)..., + mult_dim_upwards(size)); +} + + +} // namespace detail + + +/** + * Computes the storage index for the given indices with respect to the given + * stride array + */ +template +constexpr GKO_ACC_ATTRIBUTES ValueType compute_row_major_index( + const std::array &size, + const std::array 1 ? total_dim - 1 : 0)> &stride, + Indices &&... idxs) +{ + return detail::row_major_helper_s< + ValueType, total_dim, DimensionType>::compute(size, stride, + std::forward( + idxs)...); +} + + +/** + * Computes the default stride array from a given size, assuming there is no + * padding. + * + * Example: std::array size={2, 3, 5, 7} results in a return value + * of: std::array = {3*5*7, 5*7, 7} + * + * @tparam ValueType value type of the values in the returned array + * + * @tparam dimensions number of dimensions in the given size + * + * @tparam DimensionType value type of the stored size + * + * @returns an std::array with the stride + * information. + */ +template +constexpr GKO_ACC_ATTRIBUTES + std::array 0 ? dimensions - 1 : 0)> + compute_default_row_major_stride_array( + const std::array &size) +{ + return detail::compute_default_row_major_stride_array(size); +} + + +namespace detail { + + +/** + * This helper walks through the index arguments from left to right (lowest to + * highest dimension) in order to properly use the stride for the scalar + * indices. The mask indicates which indices are actually used. The least + * significant bit set means using the last index, second bit corresponds to the + * second last dimension, and so on. + * + * Basically, this computes indices in a similar fashion as `row_major_helper_s` + * while having a mask signaling which indices to skip. + * + * Example: Mask = 0b1101 + * compute(stride, tuple(x1, x2, x3, x4)) + * -> x1 * stride[1] + x2 * stride[2] + x4 (x3 skipped since bit not set) + */ +template (mask &( + size_type{1} << (total_dim - 1 - dim_idx))) + : false> +struct row_major_masked_helper_s {}; + + +// bit for current dimensionality is set +template +struct row_major_masked_helper_s { + static_assert(mask & + (size_type{1} + << (dim_idx < total_dim ? total_dim - 1 - dim_idx : 0)), + "Do not touch the `mask_set` template parameter!"); + static_assert(dim_idx < total_dim, + "The iteration count must be smaller than total_dim here!!!"); + static_assert(set_bits_processed <= stride_size, + "The processed bits must be < total number of set bits!"); + + template + static constexpr GKO_ACC_ATTRIBUTES std::array + build_stride(const std::array &size, + Args &&... args) + { + return row_major_masked_helper_s< + ValueType, mask, set_bits_processed + 1, stride_size, dim_idx + 1, + total_dim, DimensionType>::build_stride(size, + std::forward(args)..., + mult_size_upwards(size)); + } + + static constexpr GKO_ACC_ATTRIBUTES ValueType + mult_size_upwards(const std::array &size) + { + return size[dim_idx] * row_major_masked_helper_s< + ValueType, mask, set_bits_processed + 1, + stride_size, dim_idx + 1, total_dim, + DimensionType>::mult_size_upwards(size); + } + + template + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute_mask_idx(const std::array &size, + const std::array &stride, + First first, Indices &&... idxs) + { + static_assert(sizeof...(Indices) + 1 == total_dim - dim_idx, + "Mismatching number of Idxs!"); + // If it is the last set dimension, there is no need for a stride + return GKO_ACC_ASSERT(first < size[dim_idx]), + first * (set_bits_processed == stride_size + ? 1 + : stride[set_bits_processed]) + + row_major_masked_helper_s< + ValueType, mask, set_bits_processed + 1, stride_size, + dim_idx + 1, total_dim, + DimensionType>::compute_mask_idx(size, stride, + std::forward( + idxs)...); + } + + template + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute_direct_idx(const std::array &size, + const std::array &stride, + First first, Indices &&... idxs) + { + static_assert(sizeof...(Indices) == stride_size - set_bits_processed, + "Mismatching number of Idxs!"); + // If it is the last set dimension, there is no need for a stride + return GKO_ACC_ASSERT(first < size[dim_idx]), + first * (set_bits_processed == stride_size + ? 1 + : stride[set_bits_processed]) + + row_major_masked_helper_s< + ValueType, mask, set_bits_processed + 1, stride_size, + dim_idx + 1, total_dim, + DimensionType>::compute_direct_idx(size, stride, + std::forward( + idxs)...); + } +}; + +// first set bit (from the left) for current dimensionality encountered: +// Do not calculate stride value for it (since no lower dimension needs it)! +template +struct row_major_masked_helper_s { + static constexpr size_type set_bits_processed{0}; + static_assert(mask & + (size_type{1} + << (dim_idx < total_dim ? total_dim - 1 - dim_idx : 0)), + "Do not touch the `mask_set` template parameter!"); + static_assert(dim_idx < total_dim, + "The iteration count must be smaller than total_dim here!!!"); + + template + static constexpr GKO_ACC_ATTRIBUTES std::array + build_stride(const std::array &size, + Args &&... args) + { + return row_major_masked_helper_s< + ValueType, mask, set_bits_processed + 1, stride_size, dim_idx + 1, + total_dim, DimensionType>::build_stride(size, + std::forward( + args)...); + } + + static constexpr GKO_ACC_ATTRIBUTES ValueType + mult_size_upwards(const std::array &size) + { + return row_major_masked_helper_s< + ValueType, mask, set_bits_processed + 1, stride_size, dim_idx + 1, + total_dim, DimensionType>::mult_size_upwards(size); + } + + template + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute_mask_idx(const std::array &size, + const std::array &stride, + First first, Indices &&... idxs) + { + static_assert(sizeof...(Indices) + 1 == total_dim - dim_idx, + "Mismatching number of Idxs!"); + // If it is the last set dimension, there is no need for a stride + return GKO_ACC_ASSERT(first < size[dim_idx]), + first * (set_bits_processed == stride_size + ? 1 + : stride[set_bits_processed]) + + row_major_masked_helper_s< + ValueType, mask, set_bits_processed + 1, stride_size, + dim_idx + 1, total_dim, + DimensionType>::compute_mask_idx(size, stride, + std::forward( + idxs)...); + } + + template + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute_direct_idx(const std::array &size, + const std::array &stride, + First first, Indices &&... idxs) + { + static_assert(sizeof...(Indices) == stride_size - set_bits_processed, + "Mismatching number of Idxs!"); + // If it is the last set dimension, there is no need for a stride + return GKO_ACC_ASSERT(first < size[dim_idx]), + first * (set_bits_processed == stride_size + ? 1 + : stride[set_bits_processed]) + + row_major_masked_helper_s< + ValueType, mask, set_bits_processed + 1, stride_size, + dim_idx + 1, total_dim, + DimensionType>::compute_direct_idx(size, stride, + std::forward( + idxs)...); + } +}; + +// bit for current dimensionality is not set +template +struct row_major_masked_helper_s { + static_assert((mask & (size_type{1} + << (dim_idx < total_dim ? total_dim - 1 - dim_idx + : 0))) == 0, + "Do not touch the `mask_set` template parameter!"); + static_assert(dim_idx < total_dim, + "The iteration count must be smaller than total_dim here!!!"); + static_assert(set_bits_processed <= stride_size + 1, + "The processed bits must be < total number of set bits!"); + template + static constexpr GKO_ACC_ATTRIBUTES std::array + build_stride(const std::array &size, + Args &&... args) + { + return row_major_masked_helper_s< + ValueType, mask, set_bits_processed, stride_size, dim_idx + 1, + total_dim, DimensionType>::build_stride(size, + std::forward( + args)...); + } + + static constexpr GKO_ACC_ATTRIBUTES ValueType + mult_size_upwards(const std::array &size) + { + return row_major_masked_helper_s< + ValueType, mask, set_bits_processed, stride_size, dim_idx + 1, + total_dim, DimensionType>::mult_size_upwards(size); + } + + template + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute_mask_idx(const std::array &size, + const std::array &stride, First, + Indices &&... idxs) + { + static_assert(sizeof...(Indices) + 1 == total_dim - dim_idx, + "Mismatching number of Idxs!"); + return row_major_masked_helper_s< + ValueType, mask, set_bits_processed, stride_size, dim_idx + 1, + total_dim, DimensionType>::compute_mask_idx(size, stride, + std::forward( + idxs)...); + } + + template + static constexpr GKO_ACC_ATTRIBUTES ValueType compute_direct_idx( + const std::array &size, + const std::array &stride, Indices &&... idxs) + { + return row_major_masked_helper_s< + ValueType, mask, set_bits_processed, stride_size, dim_idx + 1, + total_dim, DimensionType>::compute_direct_idx(size, stride, + std::forward( + idxs)...); + } +}; + +// Specialization for the end of recursion: build_stride array from created +// arguments +template +struct row_major_masked_helper_s { + static_assert(set_bits_processed <= stride_size + 1, + "The processed bits must be smaller than the total number of " + "set bits!"); + template + static constexpr GKO_ACC_ATTRIBUTES std::array + build_stride(const std::array &, Args &&... args) + { + return {{std::forward(args)...}}; + } + static constexpr GKO_ACC_ATTRIBUTES ValueType + mult_size_upwards(const std::array &) + { + return 1; + } + + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute_mask_idx(const std::array &, + const std::array &) + { + return 0; + } + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute_direct_idx(const std::array &, + const std::array &) + { + return 0; + } +}; + + +} // namespace detail + + +/** + * Computes the memory index for the given indices considering the stride. + * Only indices are considered where the corresponding mask bit is set. + */ +template +constexpr GKO_ACC_ATTRIBUTES auto compute_masked_index( + const std::array &size, + const std::array &stride, Indices &&... idxs) +{ + return detail::row_major_masked_helper_s< + ValueType, mask, 0, stride_size, 0, total_dim, + DimensionType>::compute_mask_idx(size, stride, + std::forward(idxs)...); +} + + +/** + * Computes the memory index for the given indices considering the stride. + */ +template +constexpr GKO_ACC_ATTRIBUTES auto compute_masked_index_direct( + const std::array &size, + const std::array &stride, Indices &&... idxs) +{ + return detail::row_major_masked_helper_s< + ValueType, mask, 0, stride_size, 0, total_dim, + DimensionType>::compute_direct_idx(size, stride, + std::forward(idxs)...); +} + + +/** + * Computes the default stride array from a size and a given mask which + * indicates which array indices to consider. It is assumed that there is no + * padding + */ +template +constexpr GKO_ACC_ATTRIBUTES auto compute_default_masked_row_major_stride_array( + const std::array &size) +{ + return detail::row_major_masked_helper_s::build_stride(size); +} + + +namespace detail { + + +template +struct are_index_span_compatible_impl + : public std::integral_constant {}; + +template +struct are_index_span_compatible_impl + : public std::conditional_t< + std::is_integral>::value || + std::is_same, index_span>::value, + are_index_span_compatible_impl< + has_span || std::is_same, index_span>::value, + Args...>, + std::false_type> {}; + + +} // namespace detail + + +/** + * Evaluates if at least one type of Args is a gko::acc::index_span and the + * others either also gko::acc::index_span or fulfill std::is_integral + */ +template +using are_index_span_compatible = + detail::are_index_span_compatible_impl; + + +namespace detail { + + +template +GKO_ACC_ATTRIBUTES std::enable_if_t multidim_for_each_impl( + const std::array &, Callable callable, + Indices &&... indices) +{ + static_assert(iter == sizeof...(Indices), + "Number arguments must match current iteration!"); + callable(std::forward(indices)...); +} + +template +GKO_ACC_ATTRIBUTES std::enable_if_t<(iter < N)> multidim_for_each_impl( + const std::array &size, Callable callable, + Indices... indices) +{ + static_assert(iter == sizeof...(Indices), + "Number arguments must match current iteration!"); + for (size_type i = 0; i < size[iter]; ++i) { + multidim_for_each_impl(size, callable, indices..., i); + } +} + + +} // namespace detail + + +/** + * Creates a recursive for-loop for each dimension and calls dest(indices...) = + * source(indices...) + */ +template +GKO_ACC_ATTRIBUTES void multidim_for_each( + const std::array &size, Callable &&callable) +{ + detail::multidim_for_each_impl<0>(size, std::forward(callable)); +} + + +namespace detail { + + +template +constexpr GKO_ACC_ATTRIBUTES std::enable_if_t +index_spans_in_size(const std::array &) +{ + return 0; +} + +template +constexpr GKO_ACC_ATTRIBUTES std::enable_if_t<(iter < N), int> +index_spans_in_size(const std::array &size, First first, + Remaining &&... remaining) +{ + static_assert(sizeof...(Remaining) + 1 == N - iter, + "Number of remaining spans must be equal to N - iter"); + return GKO_ACC_ASSERT(index_span{first}.is_valid()), + GKO_ACC_ASSERT(index_span{first} <= index_span{size[iter]}), + index_spans_in_size(size, + std::forward(remaining)...); +} + + +} // namespace detail + + +template +constexpr GKO_ACC_ATTRIBUTES int validate_index_spans( + const std::array &size, Spans &&... spans) +{ + return detail::index_spans_in_size<0>(size, std::forward(spans)...); +} + + +namespace detail { + + +template +constexpr std::enable_if_t +count_mask_dimensionality_impl() +{ + return 0; +} + +template +constexpr std::enable_if_t<(iter < N), size_type> +count_mask_dimensionality_impl() +{ + return (mask & size_type{1}) + + count_mask_dimensionality_impl<(mask >> 1), N, iter + 1>(); +} + + +} // namespace detail + + +template +constexpr size_type count_mask_dimensionality() +{ + return detail::count_mask_dimensionality_impl(); +} + + +/** + * Namespace for helper functions and structs for + * the block column major accessor. + */ +namespace blk_col_major { + + +/** + * Runs from first to last dimension in order to compute the index. + * + * The index is computed like this: + * indices: x1, x2, x3, ..., xn + * compute(stride, x1, x2, x3, ..., x(n-1), xn) -> + * x1 * stride[0] + x2 * stride[1] + ... + * + x(n-2) * stride[n-3] + x(n-1) + xn * stride[n-2] + * Note that swap of the last two strides, making this 'block column major'. + */ +template +struct index_helper_s { + static_assert(total_dim >= 1, "Dimensionality must be >= 1"); + static_assert(current_iter <= total_dim, "Iteration must be <= total_dim!"); + + static constexpr size_type dim_idx{current_iter - 1}; + + template + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute(const std::array &size, + const std::array 0 ? total_dim - 1 : 0)> + &stride, + FirstType first, Indices &&... idxs) + { + if (current_iter == total_dim - 1) { + return GKO_ACC_ASSERT(first < size[dim_idx]), + first + + index_helper_s:: + compute(size, stride, + std::forward(idxs)...); + } + + return GKO_ACC_ASSERT(first < size[dim_idx]), + first * stride[dim_idx] + + index_helper_s:: + compute(size, stride, std::forward(idxs)...); + } +}; + +template +struct index_helper_s { + static_assert(total_dim >= 2, "Dimensionality must be >= 2"); + + static constexpr size_type dim_idx{total_dim - 1}; + + template + static constexpr GKO_ACC_ATTRIBUTES ValueType + compute(const std::array &size, + const std::array 1 ? total_dim - 1 : 0)> + &stride, + FirstType first) + { + return GKO_ACC_ASSERT(first < size[total_dim - 1]), + first * stride[dim_idx - 1]; + } +}; + +/** + * Computes the flat storage index for block-column-major access. + * + * @param size the multi-dimensional sizes of the range of values + * @param stride the stride array + * @param idxs the multi-dimensional indices of the desired entry + */ +template +constexpr GKO_ACC_ATTRIBUTES ValueType compute_index( + const std::array &size, + const std::array 0 ? total_dim - 1 : 0)> &stride, + Indices &&... idxs) +{ + return index_helper_s::compute( + size, stride, std::forward(idxs)...); +} + + +template +constexpr GKO_ACC_ATTRIBUTES + std::enable_if_t<(iter == N - 1) && (iter == sizeof...(Args) + 1), + std::array> + default_stride_array_impl(const std::array &size, + Args &&... args) +{ + return {{std::forward(args)..., size[N - 2]}}; +} + +template +constexpr GKO_ACC_ATTRIBUTES std::enable_if_t<(iter < N - 1 || iter == N) && + (iter == sizeof...(Args) + 1), + std::array> +default_stride_array_impl(const std::array &size, Args &&... args) +{ + return default_stride_array_impl( + size, std::forward(args)..., + detail::mult_dim_upwards(size)); +} + +template +constexpr GKO_ACC_ATTRIBUTES + std::array 0 ? dimensions - 1 : 0)> + default_stride_array(const std::array &size) +{ + return default_stride_array_impl(size); +} + + +} // namespace blk_col_major + + +} // namespace helper +} // namespace acc +} // namespace gko + + +#endif // GKO_ACCESSOR_ACCESSOR_HELPER_HPP_ diff --git a/accessor/accessor_references.hpp b/accessor/accessor_references.hpp new file mode 100644 index 00000000000..19c2ddbc49a --- /dev/null +++ b/accessor/accessor_references.hpp @@ -0,0 +1,467 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_ACCESSOR_ACCESSOR_REFERENCES_HPP_ +#define GKO_ACCESSOR_ACCESSOR_REFERENCES_HPP_ + + +#include +#include + + +#include "utils.hpp" + + +// CUDA TOOLKIT < 11 does not support constexpr in combination with +// thrust::complex, which is why constexpr is only present in later versions +#if defined(__CUDA_ARCH__) && defined(__CUDACC_VER_MAJOR__) && \ + (__CUDACC_VER_MAJOR__ < 11) + +#define GKO_ACC_ENABLE_REFERENCE_CONSTEXPR + +#else + +#define GKO_ACC_ENABLE_REFERENCE_CONSTEXPR constexpr + +#endif // __CUDA_ARCH__ && __CUDACC_VER_MAJOR__ && __CUDACC_VER_MAJOR__ < 11 + + +namespace gko { +namespace acc { +/** + * This namespace is not part of the public interface and can change without + * notice. + */ +namespace detail { + + +// tests if the cast operator to `ValueType` is present +template > +struct has_cast_operator : std::false_type {}; + +template +struct has_cast_operator< + Ref, ValueType, + xstd::void_t().Ref::operator ValueType())>> + : std::true_type {}; + + +/** + * @internal + * converts `ref` to ValueType while preferring the cast operator overload + * from class `Ref` before falling back to a simple + * `static_cast`. + * + * This function is only needed for CUDA TOOLKIT < 11 because + * thrust::complex has a constructor call: `template complex(const T + * &other) : real(other), imag()`, which is always preferred over the + * overloaded `operator value_type()`. + */ +template +constexpr GKO_ACC_ATTRIBUTES + std::enable_if_t::value, ValueType> + to_value_type(const Ref &ref) +{ + return ref.Ref::operator ValueType(); +} + +template +constexpr GKO_ACC_ATTRIBUTES + std::enable_if_t::value, ValueType> + to_value_type(const Ref &ref) +{ + return static_cast(ref); +} + + +/** + * This is a mixin which defines the binary operators for *, /, +, - for the + * Reference class, the unary operator -, and the assignment operators + * *=, /=, +=, -= + * All assignment operators expect an rvalue reference (Reference &&) for + * the Reference class in order to prevent copying the Reference object. + * + * @tparam Reference The reference class this mixin provides operator overloads + * for. The reference class needs to overload the cast + * operator to ValueType + * + * @tparam ArithmeticType arithmetic type the Reference class is supposed + * to represent. + * + * @warning This struct should only be used by reference classes. + */ +template +struct enable_reference_operators { + using arithmetic_type = std::remove_cv_t; + +#define GKO_ACC_REFERENCE_BINARY_OPERATOR_OVERLOAD(_op) \ + friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ + GKO_ACC_ATTRIBUTES arithmetic_type \ + operator _op(const Reference &ref1, const Reference &ref2) \ + { \ + return to_value_type(ref1) \ + _op to_value_type(ref2); \ + } \ + friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ + GKO_ACC_ATTRIBUTES arithmetic_type \ + operator _op(const Reference &ref, const arithmetic_type &a) \ + { \ + return to_value_type(ref) _op a; \ + } \ + friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ + GKO_ACC_ATTRIBUTES arithmetic_type \ + operator _op(const arithmetic_type &a, const Reference &ref) \ + { \ + return a _op to_value_type(ref); \ + } + + GKO_ACC_REFERENCE_BINARY_OPERATOR_OVERLOAD(*) + GKO_ACC_REFERENCE_BINARY_OPERATOR_OVERLOAD(/) + GKO_ACC_REFERENCE_BINARY_OPERATOR_OVERLOAD(+) + GKO_ACC_REFERENCE_BINARY_OPERATOR_OVERLOAD(-) +#undef GKO_ACC_REFERENCE_BINARY_OPERATOR_OVERLOAD + +#define GKO_ACC_REFERENCE_ASSIGNMENT_OPERATOR_OVERLOAD(_oper, _op) \ + friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ + GKO_ACC_ATTRIBUTES arithmetic_type \ + _oper(Reference &&ref1, const Reference &ref2) \ + { \ + return std::move(ref1) = to_value_type(ref1) \ + _op to_value_type(ref2); \ + } \ + friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ + GKO_ACC_ATTRIBUTES arithmetic_type \ + _oper(Reference &&ref, const arithmetic_type &a) \ + { \ + return std::move(ref) = to_value_type(ref) _op a; \ + } + + GKO_ACC_REFERENCE_ASSIGNMENT_OPERATOR_OVERLOAD(operator*=, *) + GKO_ACC_REFERENCE_ASSIGNMENT_OPERATOR_OVERLOAD(operator/=, /) + GKO_ACC_REFERENCE_ASSIGNMENT_OPERATOR_OVERLOAD(operator+=, +) + GKO_ACC_REFERENCE_ASSIGNMENT_OPERATOR_OVERLOAD(operator-=, -) +#undef GKO_ACC_REFERENCE_ASSIGNMENT_OPERATOR_OVERLOAD + +#define GKO_ACC_REFERENCE_COMPARISON_OPERATOR_OVERLOAD(_op) \ + friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ + GKO_ACC_ATTRIBUTES bool \ + operator _op(const Reference &ref1, const Reference &ref2) \ + { \ + return to_value_type(ref1) \ + _op to_value_type(ref2); \ + } \ + friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ + GKO_ACC_ATTRIBUTES bool \ + operator _op(const Reference &ref, const arithmetic_type &a) \ + { \ + return to_value_type(ref) _op a; \ + } \ + friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ + GKO_ACC_ATTRIBUTES bool \ + operator _op(const arithmetic_type &a, const Reference &ref) \ + { \ + return a _op to_value_type(ref); \ + } + + GKO_ACC_REFERENCE_COMPARISON_OPERATOR_OVERLOAD(==) +#undef GKO_ACC_REFERENCE_COMPARISON_OPERATOR_OVERLOAD + + friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE GKO_ACC_ATTRIBUTES + arithmetic_type + operator-(const Reference &ref) + { + return -to_value_type(ref); + } +}; + +// There is no more need for this macro in this file +#undef GKO_ACC_ENABLE_REFERENCE_CONSTEXPR + + +} // namespace detail + + +/** + * This namespace contains reference classes used inside accessors. + * + * @warning These classes should only be used by accessors. + */ +namespace reference_class { + + +/** + * Reference class for a different storage than arithmetic type. The + * conversion between both formats is done with a simple static_cast. + * + * Copying this reference is disabled, but move construction is possible to + * allow for an additional layer (like gko::acc::range). + * The assignment operator only works for an rvalue reference (&&) to + * prevent accidentally copying the reference and working on a reference. + * + * @tparam ArithmeticType Type used for arithmetic operations, therefore, + * the type which is used for input and output of this + * class. + * + * @tparam StorageType Type actually used as a storage, which is converted + * to ArithmeticType before usage + */ +template +class reduced_storage + : public detail::enable_reference_operators< + reduced_storage, ArithmeticType> { +public: + using arithmetic_type = std::remove_cv_t; + using storage_type = StorageType; + + // Allow move construction, so perfect forwarding is possible (required + // for `range` support) + reduced_storage(reduced_storage &&) = default; + + reduced_storage() = delete; + + ~reduced_storage() = default; + + // Forbid copy construction + reduced_storage(const reduced_storage &) = delete; + + constexpr explicit GKO_ACC_ATTRIBUTES reduced_storage( + storage_type *const GKO_ACC_RESTRICT ptr) + : ptr_{ptr} + {} + + constexpr GKO_ACC_ATTRIBUTES operator arithmetic_type() const + { + const storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + return static_cast(*r_ptr); + } + + constexpr GKO_ACC_ATTRIBUTES arithmetic_type + operator=(arithmetic_type val) &&noexcept + { + storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + *r_ptr = static_cast(val); + return val; + } + + constexpr GKO_ACC_ATTRIBUTES arithmetic_type + operator=(const reduced_storage &ref) && + { + std::move(*this) = static_cast(ref); + return static_cast(*this); + } + + constexpr GKO_ACC_ATTRIBUTES arithmetic_type + operator=(reduced_storage &&ref) &&noexcept + { + std::move(*this) = static_cast(ref); + return static_cast(*this); + } + +private: + storage_type *const GKO_ACC_RESTRICT ptr_; +}; + +// Specialization for const storage_type to prevent `operator=` +template +class reduced_storage + : public detail::enable_reference_operators< + reduced_storage, ArithmeticType> { +public: + using arithmetic_type = std::remove_cv_t; + using storage_type = const StorageType; + + // Allow move construction, so perfect forwarding is possible + reduced_storage(reduced_storage &&) = default; + + reduced_storage() = delete; + + ~reduced_storage() = default; + + // Forbid copy construction and move assignment + reduced_storage(const reduced_storage &) = delete; + + reduced_storage &operator=(reduced_storage &&) = delete; + + constexpr explicit GKO_ACC_ATTRIBUTES reduced_storage( + storage_type *const GKO_ACC_RESTRICT ptr) + : ptr_{ptr} + {} + + constexpr GKO_ACC_ATTRIBUTES operator arithmetic_type() const + { + const storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + return static_cast(*r_ptr); + } + +private: + storage_type *const GKO_ACC_RESTRICT ptr_; +}; + + +template +constexpr remove_complex_t abs( + const reduced_storage &ref) +{ + using std::abs; + return abs(static_cast(ref)); +} + + +/** + * Reference class for a different storage than arithmetic type with the + * addition of a scaling factor. The conversion between both formats is done + * with a static_cast to the ArithmeticType, followed by a multiplication + * of the scalar (when reading; for writing, the new value is divided by the + * scalar before casting to the StorageType). + * + * Copying this reference is disabled, but move construction is possible to + * allow for an additional layer (like gko::acc::range). + * The assignment operator only works for an rvalue reference (&&) to + * prevent accidentally copying and working on the reference. + * + * @tparam ArithmeticType Type used for arithmetic operations, therefore, + * the type which is used for input and output of this + * class. + * + * @tparam StorageType Type actually used as a storage, which is converted + * to ArithmeticType before usage + */ +template +class scaled_reduced_storage + : public detail::enable_reference_operators< + scaled_reduced_storage, ArithmeticType> { +public: + using arithmetic_type = std::remove_cv_t; + using storage_type = StorageType; + + // Allow move construction, so perfect forwarding is possible + scaled_reduced_storage(scaled_reduced_storage &&) = default; + + scaled_reduced_storage() = delete; + + ~scaled_reduced_storage() = default; + + // Forbid copy construction + scaled_reduced_storage(const scaled_reduced_storage &) = delete; + + constexpr explicit GKO_ACC_ATTRIBUTES scaled_reduced_storage( + storage_type *const GKO_ACC_RESTRICT ptr, arithmetic_type scalar) + : ptr_{ptr}, scalar_{scalar} + {} + + constexpr GKO_ACC_ATTRIBUTES operator arithmetic_type() const + { + const storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + return static_cast(*r_ptr) * scalar_; + } + + constexpr GKO_ACC_ATTRIBUTES arithmetic_type + operator=(arithmetic_type val) &&noexcept + { + storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + *r_ptr = static_cast(val / scalar_); + return val; + } + + constexpr GKO_ACC_ATTRIBUTES arithmetic_type + operator=(const scaled_reduced_storage &ref) && + { + std::move(*this) = static_cast(ref); + return static_cast(*this); + } + + constexpr GKO_ACC_ATTRIBUTES arithmetic_type + operator=(scaled_reduced_storage &&ref) &&noexcept + { + std::move(*this) = static_cast(ref); + return static_cast(*this); + } + +private: + storage_type *const GKO_ACC_RESTRICT ptr_; + const arithmetic_type scalar_; +}; + +// Specialization for constant storage_type (no `operator=`) +template +class scaled_reduced_storage + : public detail::enable_reference_operators< + scaled_reduced_storage, + ArithmeticType> { +public: + using arithmetic_type = std::remove_cv_t; + using storage_type = const StorageType; + + // Allow move construction, so perfect forwarding is possible + scaled_reduced_storage(scaled_reduced_storage &&) = default; + + scaled_reduced_storage() = delete; + + ~scaled_reduced_storage() = default; + + // Forbid copy construction and move assignment + scaled_reduced_storage(const scaled_reduced_storage &) = delete; + + scaled_reduced_storage &operator=(scaled_reduced_storage &&) = delete; + + constexpr explicit GKO_ACC_ATTRIBUTES scaled_reduced_storage( + storage_type *const GKO_ACC_RESTRICT ptr, arithmetic_type scalar) + : ptr_{ptr}, scalar_{scalar} + {} + + constexpr GKO_ACC_ATTRIBUTES operator arithmetic_type() const + { + const storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + return static_cast(*r_ptr) * scalar_; + } + +private: + storage_type *const GKO_ACC_RESTRICT ptr_; + const arithmetic_type scalar_; +}; + + +template +constexpr remove_complex_t abs( + const scaled_reduced_storage &ref) +{ + using std::abs; + return abs(static_cast(ref)); +} + + +} // namespace reference_class +} // namespace acc +} // namespace gko + + +#endif // GKO_ACCESSOR_ACCESSOR_REFERENCES_HPP_ diff --git a/accessor/block_col_major.hpp b/accessor/block_col_major.hpp new file mode 100644 index 00000000000..6495180ba2f --- /dev/null +++ b/accessor/block_col_major.hpp @@ -0,0 +1,208 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_ACCESSOR_BLOCK_COL_MAJOR_HPP_ +#define GKO_ACCESSOR_BLOCK_COL_MAJOR_HPP_ + +#include + +#include "accessor_helper.hpp" +#include "range.hpp" +#include "utils.hpp" + + +namespace gko { +namespace acc { + + +/** + * A bridge between a range and a block-column-major memory layout. + * + * Only the innermost two dimensions are regarded as defining + * a column-major matrix, and the rest of the dimensions are treated + * identically to \ref row_major. + * + * You should not try to explicitly create an instance of this accessor. + * Instead, supply it as a template parameter to a range, and pass the + * constructor parameters for this class to the range (it will forward it to + * this class). + * + * @tparam ValueType type of values this accessor returns + * @tparam Dimensionality number of dimensions of this accessor + */ +template +class block_col_major { +public: + friend class range; + + static_assert(Dimensionality != 0, + "This accessor does not support a dimensionality of 0!"); + static_assert(Dimensionality != 1, + "Please use row_major accessor for 1D ranges."); + + /** + * Number of dimensions of the accessor. + */ + static constexpr size_type dimensionality = Dimensionality; + + /** + * Type of values returned by the accessor. + */ + using value_type = ValueType; + + /** + * Type of underlying data storage. + */ + using data_type = value_type *; + + using const_accessor = block_col_major; + using stride_type = std::array; + using length_type = std::array; + +protected: + /** + * Creates a block_col_major accessor. + * + * @param data pointer to the block of memory containing the data + * @param lengths size / length of the accesses of each dimension + * @param stride distance (in elements) between starting positions of + * the dimensions (i.e. + * `x_1 * stride_1 + x_2 * stride_2 * ... + x_(n-1) + x_n * stride_(n-1)` + * points to the element at (x_1, x_2, ..., x_n)) + */ + constexpr GKO_ACC_ATTRIBUTES explicit block_col_major(length_type size, + data_type data, + stride_type stride) + : lengths(size), data{data}, stride(stride) + {} + + /** + * Creates a block_col_major accessor with a default stride + * (assumes no padding) + * + * @param data pointer to the block of memory containing the data + * @param lengths size / length of the accesses of each dimension + */ + constexpr GKO_ACC_ATTRIBUTES explicit block_col_major(length_type size, + data_type data) + : lengths(size), + data{data}, + stride(helper::blk_col_major::default_stride_array(lengths)) + {} + +public: + /** + * Creates a block_col_major range which contains a read-only version of + * the current accessor. + * + * @returns a block column major range which is read-only. + */ + constexpr GKO_ACC_ATTRIBUTES range to_const() const + { + // TODO Remove this functionality all together (if requested) + return range(lengths, data, stride); + } + + /** + * Returns the data element at the specified indices + * + * @param row row index + * @param col column index + * + * @return data element at (indices...) + */ + template + constexpr GKO_ACC_ATTRIBUTES + std::enable_if_t::value, value_type &> + operator()(Indices &&... indices) const + { + return data[helper::blk_col_major::compute_index( + lengths, stride, std::forward(indices)...)]; + } + + /** + * Returns the sub-range spanning the range (x1_span, x2_span, ...) + * + * @param rows row span + * @param cols column span + * + * @return sub-range spanning the given spans + */ + template + constexpr GKO_ACC_ATTRIBUTES + std::enable_if_t::value, + range> + operator()(SpanTypes... spans) const + { + return helper::validate_index_spans(lengths, spans...), + range{ + length_type{ + (index_span{spans}.end - index_span{spans}.begin)...}, + data + helper::blk_col_major::compute_index( + lengths, stride, (index_span{spans}.begin)...), + stride}; + } + + /** + * Returns the length in dimension `dimension`. + * + * @param dimension a dimension index + * + * @return length in dimension `dimension` + */ + constexpr GKO_ACC_ATTRIBUTES size_type length(size_type dimension) const + { + return lengths[dimension]; + } + + /** + * An array of dimension sizes. + */ + const length_type lengths; + + /** + * Reference to the underlying data. + */ + const data_type data; + + /** + * Distance between consecutive 'layers' for each dimension + * (except the second, for which it is 1). + */ + const stride_type stride; +}; + + +} // namespace acc +} // namespace gko + +#endif // GKO_ACCESSOR_BLOCK_COL_MAJOR_HPP_ diff --git a/accessor/index_span.hpp b/accessor/index_span.hpp new file mode 100644 index 00000000000..6feb6007dad --- /dev/null +++ b/accessor/index_span.hpp @@ -0,0 +1,137 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_ACCESSOR_INDEX_SPAN_HPP_ +#define GKO_ACCESSOR_INDEX_SPAN_HPP_ + +#include "utils.hpp" + + +namespace gko { +namespace acc { + + +/** + * An index_span is a lightweight structure used to describe a contiguous span + * of indices of one dimension. + * + * The main purpose of the index_span is to create sub-ranges from other ranges. + * An index_span `is` represents a contiguous set of indexes in one dimension of + * the range, starting from `is.begin` (inclusive) and ending at index `is.end` + * (exclusive). An index_span is only valid if its end is larger than its + * beginning. + * + * index_spans can be compared using `==` and `!=` operators. Two spans are + * equal iff both their `begin` and `end` values are identical. + * + * index_sets also have two distinct partial orders defined: + * 1. `x < y` (`y > x`) iff `x.end < y.begin` + * 2. `x <= y` (`y >= x`) iff `x.end <= y.begin` + * Note: `x < y || x == y` is not equivalent to `x <= y`. + */ +struct index_span { + /** + * Creates an index_span. + * + * @param begin the beginning (inclusive) of the index_span + * @param end the end (exclusive) of the index_span + * + */ + GKO_ACC_ATTRIBUTES constexpr index_span(size_type begin, + size_type end) noexcept + : begin{begin}, end{end} + {} + + /** + * Creates an index_span representing the point `point`. + * + * The begin is set to `point`, and the end to `point + 1` + * + * @param point the point which the index_span represents + */ + GKO_ACC_ATTRIBUTES constexpr index_span(size_type point) noexcept + : index_span{point, point + 1} + {} + + /** + * Checks if an index_span is valid. + * + * @returns true iff `this->begin < this->end` + */ + GKO_ACC_ATTRIBUTES constexpr bool is_valid() const { return begin < end; } + + friend GKO_ACC_ATTRIBUTES constexpr bool operator<(const index_span &first, + const index_span &second) + { + return first.end < second.begin; + } + + friend GKO_ACC_ATTRIBUTES constexpr bool operator<=( + const index_span &first, const index_span &second) + { + return first.end <= second.begin; + } + + friend GKO_ACC_ATTRIBUTES constexpr bool operator>(const index_span &first, + const index_span &second) + { + return second < first; + } + + friend GKO_ACC_ATTRIBUTES constexpr bool operator>=( + const index_span &first, const index_span &second) + { + return second <= first; + } + + friend GKO_ACC_ATTRIBUTES constexpr bool operator==( + const index_span &first, const index_span &second) + { + return first.begin == second.begin && first.end == second.end; + } + + friend GKO_ACC_ATTRIBUTES constexpr bool operator!=( + const index_span &first, const index_span &second) + { + return !(first == second); + } + + const size_type begin; + const size_type end; +}; + + +} // namespace acc +} // namespace gko + + +#endif // GKO_ACCESSOR_INDEX_SPAN_HPP_ diff --git a/accessor/range.hpp b/accessor/range.hpp new file mode 100644 index 00000000000..ee3eb59c763 --- /dev/null +++ b/accessor/range.hpp @@ -0,0 +1,172 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_ACCESSOR_RANGE_HPP_ +#define GKO_ACCESSOR_RANGE_HPP_ + + +#include + + +#include "utils.hpp" + + +namespace gko { +namespace acc { + + +template +class range { +private: + /** + * the default check_if_same gives false. + * + * @tparam Ref the reference type + * @tparam Args the input type + */ + template + struct check_if_same : public std::false_type {}; + + /** + * check_if_same gives true if the decay type of input is the same type as + * Ref. + * + * @tparam Ref the reference type + */ + template + struct check_if_same : public std::true_type {}; + +public: + /** + * The type of the underlying accessor. + */ + using accessor = Accessor; + + /** + * The number of dimensions of the range. + */ + static constexpr size_type dimensionality = accessor::dimensionality; + + /** + * Use the default destructor. + */ + ~range() = default; + + /** + * Creates a new range. + * + * @tparam AccessorParam types of parameters forwarded to the accessor + * constructor. + * + * @param params parameters forwarded to Accessor constructor. + * + * @note We use SFINAE to allow for a default copy and move constructor to + * be generated, so a `range` is trivially copyable if the `Accessor` + * is trivially copyable. + */ + template ...>::value, + int> = 0> + GKO_ACC_ATTRIBUTES constexpr explicit range(AccessorParams &&... args) + : accessor_{std::forward(args)...} + {} + + /** + * Returns a value (or a sub-range) with the specified indexes. + * + * @tparam DimensionTypes The types of indexes. Supported types depend on + * the underlying accessor, but are usually either + * integer types or index_spans. If at least one + * index is a span, the returned value will be a + * sub-range (if that is supported by the accessor). + * + * @param dimensions the indexes of the values or index_spans for the new + * range. + * + * @return a value on position `(dimensions...)` or a sub-range with the + * given index_spans. + */ + template + GKO_ACC_ATTRIBUTES constexpr auto operator()( + DimensionTypes &&... dimensions) const + -> decltype(std::declval()( + std::forward(dimensions)...)) + { + static_assert(sizeof...(dimensions) <= dimensionality, + "Too many dimensions in range call"); + return accessor_(std::forward(dimensions)...); + } + + /** + * Returns the length of the specified dimension of the range. + * + * @param dimension the dimensions whose length is returned + * + * @return the length of the `dimension`-th dimension of the range + */ + GKO_ACC_ATTRIBUTES constexpr size_type length(size_type dimension) const + { + return accessor_.length(dimension); + } + + /** + * Returns a pointer to the accessor. + * + * Can be used to access data and functions of a specific accessor. + * + * @return pointer to the accessor + */ + GKO_ACC_ATTRIBUTES constexpr const accessor *operator->() const noexcept + { + return &accessor_; + } + + /** + * `Returns a reference to the accessor. + * + * @return reference to the accessor + */ + GKO_ACC_ATTRIBUTES constexpr const accessor &get_accessor() const noexcept + { + return accessor_; + } + +private: + accessor accessor_; +}; + + +} // namespace acc +} // namespace gko + +#endif // GKO_ACCESSOR_RANGE_HPP_ diff --git a/accessor/reduced_row_major.hpp b/accessor/reduced_row_major.hpp new file mode 100644 index 00000000000..2d271fbd340 --- /dev/null +++ b/accessor/reduced_row_major.hpp @@ -0,0 +1,276 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_ACCESSOR_REDUCED_ROW_MAJOR_HPP_ +#define GKO_ACCESSOR_REDUCED_ROW_MAJOR_HPP_ + + +#include +#include +#include +#include + + +#include "accessor_helper.hpp" +#include "accessor_references.hpp" +#include "index_span.hpp" +#include "range.hpp" +#include "utils.hpp" + + +namespace gko { +/** + * @brief The accessor namespace. + * + * @ingroup accessor + */ +namespace acc { + + +/** + * The reduced_row_major class allows a storage format that is different from + * the arithmetic format (which is returned from the brace operator). + * As storage, the StorageType is used. + * + * This accessor uses row-major access. For example for three dimensions, + * neighboring z coordinates are next to each other in memory, followed by y + * coordinates and then x coordinates. + * + * @tparam Dimensionality The number of dimensions managed by this accessor + * + * @tparam ArithmeticType Value type used for arithmetic operations and + * for in- and output + * + * @tparam StorageType Value type used for storing the actual value to memory + * + * @note This class only manages the accesses and not the memory itself. + */ +template +class reduced_row_major { +public: + using arithmetic_type = std::remove_cv_t; + using storage_type = StorageType; + static constexpr size_type dimensionality{Dimensionality}; + static constexpr bool is_const{std::is_const::value}; + using const_accessor = + reduced_row_major; + + static_assert(Dimensionality >= 1, + "Dimensionality must be a positive number!"); + + friend class range; + +protected: + using dim_type = std::array; + using storage_stride_type = std::array; + using reference_type = + reference_class::reduced_storage; + + /** + * Creates the accessor for an already allocated storage space with a + * stride. The first stride is used for computing the index for the first + * index, the second stride for the second index, and so on. + * + * @param size multidimensional size of the memory + * @param storage pointer to the block of memory containing the storage + * @param stride stride array used for memory accesses + */ + constexpr GKO_ACC_ATTRIBUTES reduced_row_major(dim_type size, + storage_type *storage, + storage_stride_type stride) + : size_(size), storage_{storage}, stride_(stride) + {} + + /** + * Creates the accessor for an already allocated storage space with a + * stride. The first stride is used for computing the index for the first + * index, the second stride for the second index, and so on. + * + * @param storage pointer to the block of memory containing the storage + * @param size multidimensional size of the memory + * @param strides strides used for memory accesses + */ + template + constexpr GKO_ACC_ATTRIBUTES reduced_row_major(dim_type size, + storage_type *storage, + Strides &&... strides) + : reduced_row_major{ + size, storage, + storage_stride_type{{std::forward(strides)...}}} + { + static_assert(sizeof...(Strides) + 1 == dimensionality, + "Number of provided Strides must be dimensionality - 1!"); + } + + /** + * Creates the accessor for an already allocated storage space. + * It is assumed that all accesses are without padding. + * + * @param storage pointer to the block of memory containing the storage + * @param size multidimensional size of the memory + */ + constexpr GKO_ACC_ATTRIBUTES reduced_row_major(dim_type size, + storage_type *storage) + : reduced_row_major{ + size, storage, + helper::compute_default_row_major_stride_array(size)} + {} + + /** + * Creates an empty accessor (pointing nowhere with an empty size) + */ + constexpr GKO_ACC_ATTRIBUTES reduced_row_major() + : reduced_row_major{{0, 0, 0}, nullptr} + {} + +public: + /** + * Creates a reduced_row_major range which contains a read-only version of + * the current accessor. + * + * @returns a reduced_row_major major range which is read-only. + */ + constexpr GKO_ACC_ATTRIBUTES range to_const() const + { + return range{size_, storage_, stride_}; + } + + /** + * Returns the length in dimension `dimension`. + * + * @param dimension a dimension index + * + * @returns length in dimension `dimension` + */ + constexpr GKO_ACC_ATTRIBUTES size_type length(size_type dimension) const + { + return dimension < dimensionality ? size_[dimension] : 1; + } + + /** + * Returns the stored value for the given indices. If the storage is const, + * a value is returned, otherwise, a reference is returned. + * + * @param indices indices which value is supposed to access + * + * @returns the stored value if the accessor is const (if the storage type + * is const), or a reference if the accessor is non-const + */ + template + constexpr GKO_ACC_ATTRIBUTES std::enable_if_t< + are_all_integral::value, + std::conditional_t> + operator()(Indices &&... indices) const + { + return reference_type{storage_ + + compute_index(std::forward(indices)...)}; + } + + /** + * Returns a sub-range spanning the current range (x1_span, x2_span, ...) + * + * @param spans span for the indices + * + * @returns a sub-range for the given spans. + */ + template + constexpr GKO_ACC_ATTRIBUTES + std::enable_if_t::value, + range> + operator()(SpanTypes... spans) const + { + return helper::validate_index_spans(size_, spans...), + range{ + dim_type{ + (index_span{spans}.end - index_span{spans}.begin)...}, + storage_ + compute_index((index_span{spans}.begin)...), + stride_}; + } + + /** + * Returns the size of the accessor + * + * @returns the size of the accessor + */ + constexpr GKO_ACC_ATTRIBUTES dim_type get_size() const { return size_; } + + /** + * Returns a pointer to a stride array of size dimensionality - 1 + * + * @returns returns a pointer to a stride array of size dimensionality - 1 + */ + GKO_ACC_ATTRIBUTES + constexpr const storage_stride_type &get_stride() const { return stride_; } + + /** + * Returns the pointer to the storage data + * + * @returns the pointer to the storage data + */ + constexpr GKO_ACC_ATTRIBUTES storage_type *get_stored_data() const + { + return storage_; + } + + /** + * Returns a const pointer to the storage data + * + * @returns a const pointer to the storage data + */ + constexpr GKO_ACC_ATTRIBUTES const storage_type *get_const_storage() const + { + return storage_; + } + +protected: + template + constexpr GKO_ACC_ATTRIBUTES size_type + compute_index(Indices &&... indices) const + { + static_assert(sizeof...(Indices) == dimensionality, + "Number of indices must match dimensionality!"); + return helper::compute_row_major_index( + size_, stride_, std::forward(indices)...); + } + +private: + const dim_type size_; + storage_type *const storage_; + const storage_stride_type stride_; +}; + + +} // namespace acc +} // namespace gko + + +#endif // GKO_ACCESSOR_REDUCED_ROW_MAJOR_HPP_ diff --git a/accessor/row_major.hpp b/accessor/row_major.hpp new file mode 100644 index 00000000000..3aed9fe685f --- /dev/null +++ b/accessor/row_major.hpp @@ -0,0 +1,206 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_ACCESSOR_ROW_MAJOR_HPP_ +#define GKO_ACCESSOR_ROW_MAJOR_HPP_ + +#include + +#include "accessor_helper.hpp" +#include "range.hpp" +#include "utils.hpp" + + +namespace gko { +namespace acc { + + +/** + * A row_major accessor is a bridge between a range and the row-major memory + * layout. + * + * You should never try to explicitly create an instance of this accessor. + * Instead, supply it as a template parameter to a range, and pass the + * constructor parameters for this class to the range (it will forward it to + * this class). + * + * @warning For backward compatability reasons, a specialization is provided + * for dimensionality == 2. + * + * @tparam ValueType type of values this accessor returns + * @tparam Dimensionality number of dimensions of this accessor + */ +template +class row_major { +public: + friend class range; + + static_assert(Dimensionality != 0, + "This accessor does not support a dimensionality of 0!"); + + /** + * Number of dimensions of the accessor. + */ + static constexpr size_type dimensionality = Dimensionality; + + /** + * Type of values returned by the accessor. + */ + using value_type = ValueType; + + /** + * Type of underlying data storage. + */ + using data_type = value_type *; + + using const_accessor = row_major; + using length_type = std::array; + using stride_type = std::array; + +protected: + /** + * Creates a row_major accessor. + * + * @param lengths size / length of the accesses of each dimension + * @param data pointer to the block of memory containing the data + * @param stride distance (in elements) between starting positions of + * the dimensions (i.e. + * `x_1 * stride_1 + x_2 * stride_2 * ... + x_n` + * points to the element at (x_1, x_2, ..., x_n)) + */ + constexpr GKO_ACC_ATTRIBUTES explicit row_major(length_type size, + data_type data, + stride_type stride) + : lengths(size), data{data}, stride(stride) + {} + + /** + * Creates a row_major accessor with a default stride (assumes no + * padding) + * + * @param lengths size / length of the accesses of each dimension + * @param data pointer to the block of memory containing the data + */ + constexpr GKO_ACC_ATTRIBUTES explicit row_major(length_type size, + data_type data) + : row_major{size, data, + helper::compute_default_row_major_stride_array< + typename stride_type::value_type>(size)} + {} + +public: + /** + * Creates a row_major range which contains a read-only version of the + * current accessor. + * + * @returns a row major range which is read-only. + */ + constexpr GKO_ACC_ATTRIBUTES range to_const() const + { + // TODO Remove this functionality all together (if requested) + return range(lengths, data, stride); + } + + /** + * Returns the data element at the specified indices + * + * @param row row index + * @param col column index + * + * @return data element at (indices...) + */ + template + constexpr GKO_ACC_ATTRIBUTES + std::enable_if_t::value, value_type &> + operator()(Indices &&... indices) const + { + return data[helper::compute_row_major_index( + lengths, stride, std::forward(indices)...)]; + } + + /** + * Returns the sub-range spanning the range (x1_span, x2_span, ...) + * + * @param rows row span + * @param cols column span + * + * @return sub-range spanning the given spans + */ + template + constexpr GKO_ACC_ATTRIBUTES + std::enable_if_t::value, + range> + operator()(SpanTypes... spans) const + { + return helper::validate_index_spans(lengths, spans...), + range{ + length_type{ + (index_span{spans}.end - index_span{spans}.begin)...}, + data + helper::compute_row_major_index( + lengths, stride, (index_span{spans}.begin)...), + stride}; + } + + /** + * Returns the length in dimension `dimension`. + * + * @param dimension a dimension index + * + * @return length in dimension `dimension` + */ + constexpr GKO_ACC_ATTRIBUTES size_type length(size_type dimension) const + { + return lengths[dimension]; + } + + /** + * An array of dimension sizes. + */ + const length_type lengths; + + /** + * Reference to the underlying data. + */ + const data_type data; + + /** + * Distance between consecutive rows for each dimension (except the + * first). + */ + const stride_type stride; +}; + + +} // namespace acc +} // namespace gko + +#endif // GKO_ACCESSOR_ROW_MAJOR_HPP_ diff --git a/accessor/scaled_reduced_row_major.hpp b/accessor/scaled_reduced_row_major.hpp new file mode 100644 index 00000000000..356a116fb0a --- /dev/null +++ b/accessor/scaled_reduced_row_major.hpp @@ -0,0 +1,497 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_ACCESSOR_SCALED_REDUCED_ROW_MAJOR_HPP_ +#define GKO_ACCESSOR_SCALED_REDUCED_ROW_MAJOR_HPP_ + + +#include +#include +#include + + +#include "accessor_helper.hpp" +#include "accessor_references.hpp" +#include "index_span.hpp" +#include "range.hpp" +#include "utils.hpp" + + +namespace gko { +/** + * @brief The accessor namespace. + * + * @ingroup accessor + */ +namespace acc { + + +namespace detail { + + +// In case of a const type, do not provide a write function +template ::value> +struct enable_write_scalar { + using scalar_type = ScalarType; +}; + +// In case of a non-const type, enable the write function +template +struct enable_write_scalar { + static_assert(Dimensionality >= 1, + "Dimensionality must be a positive number!"); + + using scalar_type = ScalarType; + + /** + * Writes the scalar value at the given indices. + * The number of indices must be equal to the number of dimensions, even + * if some of the indices are ignored (depending on the scalar mask). + * + * @param value value to write + * @param indices indices where to write the value + * + * @returns the written value. + */ + template + constexpr GKO_ACC_ATTRIBUTES scalar_type + write_scalar_masked(scalar_type value, Indices &&... indices) const + { + static_assert(sizeof...(Indices) == Dimensionality, + "Number of indices must match dimensionality!"); + scalar_type *GKO_ACC_RESTRICT rest_scalar = self()->scalar_; + return rest_scalar[self()->compute_mask_scalar_index( + std::forward(indices)...)] = value; + } + + /** + * Writes the scalar value at the given indices. + * Only the actually used indices must be provided, meaning the number of + * specified indices must be equal to the number of set bits in the + * scalar mask. + * + * @param value value to write + * @param indices indices where to write the value + * + * @returns the written value. + */ + template + constexpr GKO_ACC_ATTRIBUTES scalar_type + write_scalar_direct(scalar_type value, Indices &&... indices) const + { + scalar_type *GKO_ACC_RESTRICT rest_scalar = self()->scalar_; + return rest_scalar[self()->compute_direct_scalar_index( + std::forward(indices)...)] = value; + } + + +private: + constexpr GKO_ACC_ATTRIBUTES const Accessor *self() const + { + return static_cast(this); + } +}; + + +} // namespace detail + + +/** + * The scaled_reduced_row_major class allows a storage format that is different + * from the arithmetic format (which is returned from the brace operator). + * Additionally, a scalar is used when reading and writing data to allow for + * a shift in range. + * As storage, the StorageType is used. + * + * This accessor uses row-major access. For example, for three dimensions, + * neighboring z coordinates are next to each other in memory, followed by y + * coordinates and then x coordinates. + * + * @tparam Dimensionality The number of dimensions managed by this accessor + * + * @tparam ArithmeticType Value type used for arithmetic operations and + * for in- and output + * + * @tparam StorageType Value type used for storing the actual value to memory + * + * @tparam ScalarMask Binary mask that marks which indices matter for the + * scalar selection (set bit means the corresponding index + * needs to be considered, 0 means it is not). The least + * significand bit corresponds to the last index dimension, + * the second least to the second last index dimension, and + * so on. + * For example, the mask = 0b011101 means that for the 5d + * indices (x1, x2, x3, x4, x5), (x1, x2, x3, x5) are + * considered for the scalar, making the scalar itself 4d. + * + * @note This class only manages the accesses and not the memory itself. + */ +template +class scaled_reduced_row_major + : public detail::enable_write_scalar< + Dimensionality, + scaled_reduced_row_major, + ArithmeticType, std::is_const::value> { +public: + using arithmetic_type = std::remove_cv_t; + using storage_type = StorageType; + static constexpr size_type dimensionality{Dimensionality}; + static constexpr size_type scalar_mask{ScalarMask}; + static constexpr bool is_const{std::is_const::value}; + using scalar_type = + std::conditional_t; + + using const_accessor = + scaled_reduced_row_major; + + static_assert(!is_complex::value && + !is_complex::value, + "Both arithmetic and storage type must not be complex!"); + static_assert(Dimensionality >= 1, + "Dimensionality must be a positive number!"); + static_assert(dimensionality <= 32, + "Only Dimensionality <= 32 is currently supported"); + + // Allow access to both `scalar_` and `compute_mask_scalar_index()` + friend class detail::enable_write_scalar< + dimensionality, scaled_reduced_row_major, scalar_type>; + friend class range; + +protected: + static constexpr size_type scalar_dim{ + helper::count_mask_dimensionality()}; + static constexpr size_type scalar_stride_dim{ + scalar_dim == 0 ? 0 : (scalar_dim - 1)}; + + using dim_type = std::array; + using storage_stride_type = std::array; + using scalar_stride_type = std::array; + using reference_type = + reference_class::scaled_reduced_storage; + + /** + * Creates the accessor for an already allocated storage space with a + * stride. The first stride is used for computing the index for the first + * index, the second stride for the second index, and so on. + * + * @param size multidimensional size of the memory + * @param storage pointer to the block of memory containing the storage + * @param storage_stride stride array used for memory accesses to storage + * @param scalar pointer to the block of memory containing the scalar + * values. + * @param scalar_stride stride array used for memory accesses to scalar + */ + constexpr GKO_ACC_ATTRIBUTES scaled_reduced_row_major( + dim_type size, storage_type *storage, + storage_stride_type storage_stride, scalar_type *scalar, + scalar_stride_type scalar_stride) + : size_(size), + storage_{storage}, + storage_stride_(storage_stride), + scalar_{scalar}, + scalar_stride_(scalar_stride) + {} + + /** + * Creates the accessor for an already allocated storage space with a + * stride. The first stride is used for computing the index for the first + * index, the second stride for the second index, and so on. + * + * @param size multidimensional size of the memory + * @param storage pointer to the block of memory containing the storage + * @param stride stride array used for memory accesses to storage + * @param scalar pointer to the block of memory containing the scalar + * values. + */ + constexpr GKO_ACC_ATTRIBUTES scaled_reduced_row_major( + dim_type size, storage_type *storage, storage_stride_type stride, + scalar_type *scalar) + : scaled_reduced_row_major{ + size, storage, stride, scalar, + helper::compute_default_masked_row_major_stride_array< + typename scalar_stride_type::value_type, scalar_mask, + scalar_stride_dim, dimensionality>(size)} + {} + + /** + * Creates the accessor for an already allocated storage space. + * It is assumed that all accesses are without padding. + * + * @param size multidimensional size of the memory + * @param storage pointer to the block of memory containing the storage + * @param scalar pointer to the block of memory containing the scalar + * values. + */ + constexpr GKO_ACC_ATTRIBUTES scaled_reduced_row_major(dim_type size, + storage_type *storage, + scalar_type *scalar) + : scaled_reduced_row_major{ + size, storage, + helper::compute_default_row_major_stride_array< + typename storage_stride_type::value_type>(size), + scalar} + {} + + /** + * Creates an empty accessor (pointing nowhere with an empty size) + */ + constexpr GKO_ACC_ATTRIBUTES scaled_reduced_row_major() + : scaled_reduced_row_major{{0, 0, 0}, nullptr, nullptr} + {} + +public: + /** + * Creates a scaled_reduced_row_major range which contains a read-only + * version of the current accessor. + * + * @returns a scaled_reduced_row_major major range which is read-only. + */ + constexpr GKO_ACC_ATTRIBUTES range to_const() const + { + return range{size_, storage_, storage_stride_, scalar_, + scalar_stride_}; + } + + /** + * Reads the scalar value at the given indices. Only indices where the + * scalar mask bit is set are considered, the others are ignored. + * + * @param indices indices which data to access + * + * @returns the scalar value at the given indices. + */ + template + constexpr GKO_ACC_ATTRIBUTES scalar_type + read_scalar_masked(Indices &&... indices) const + { + const arithmetic_type *GKO_ACC_RESTRICT rest_scalar = scalar_; + return rest_scalar[compute_mask_scalar_index( + std::forward(indices)...)]; + } + + /** + * Reads the scalar value at the given indices. Only the actually used + * indices must be provided, meaning the number of specified indices must + * be equal to the number of set bits in the scalar mask. + * + * @param indices indices which data to access + * + * @returns the scalar value at the given indices. + */ + template + constexpr GKO_ACC_ATTRIBUTES scalar_type + read_scalar_direct(Indices &&... indices) const + { + const arithmetic_type *GKO_ACC_RESTRICT rest_scalar = scalar_; + return rest_scalar[compute_direct_scalar_index( + std::forward(indices)...)]; + } + + /** + * Returns the length in dimension `dimension`. + * + * @param dimension a dimension index + * + * @returns length in dimension `dimension` + */ + constexpr GKO_ACC_ATTRIBUTES size_type length(size_type dimension) const + { + return dimension < dimensionality ? size_[dimension] : 1; + } + + /** + * Returns the stored value for the given indices. If the storage is const, + * a value is returned, otherwise, a reference is returned. + * + * @param indices indices which value is supposed to access + * + * @returns the stored value if the accessor is const (if the storage type + * is const), or a reference if the accessor is non-const + */ + template + constexpr GKO_ACC_ATTRIBUTES std::enable_if_t< + are_all_integral::value, + std::conditional_t> + operator()(Indices... indices) const + { + return reference_type{storage_ + compute_index(indices...), + read_scalar_masked(indices...)}; + } + + /** + * Returns a sub-range spinning the current range (x1_span, x2_span, ...) + * + * @param spans span for the indices + * + * @returns a sub-range for the given spans. + */ + template + constexpr GKO_ACC_ATTRIBUTES + std::enable_if_t::value, + range> + operator()(SpanTypes... spans) const + { + return helper::validate_index_spans(size_, spans...), + range{ + dim_type{ + (index_span{spans}.end - index_span{spans}.begin)...}, + storage_ + compute_index((index_span{spans}.begin)...), + storage_stride_, + scalar_ + + compute_mask_scalar_index(index_span{spans}.begin...), + scalar_stride_}; + } + + /** + * Returns the size of the accessor + * + * @returns the size of the accessor + */ + constexpr GKO_ACC_ATTRIBUTES dim_type get_size() const { return size_; } + + /** + * Returns a const reference to the storage stride array of size + * dimensionality - 1 + * + * @returns a const reference to the storage stride array of size + * dimensionality - 1 + */ + constexpr GKO_ACC_ATTRIBUTES const storage_stride_type &get_storage_stride() + const + { + return storage_stride_; + } + + /** + * Returns a const reference to the scalar stride array + * + * @returns a const reference to the scalar stride array + */ + constexpr GKO_ACC_ATTRIBUTES const scalar_stride_type &get_scalar_stride() + const + { + return scalar_stride_; + } + + /** + * Returns the pointer to the storage data + * + * @returns the pointer to the storage data + */ + constexpr GKO_ACC_ATTRIBUTES storage_type *get_stored_data() const + { + return storage_; + } + + /** + * Returns a const pointer to the storage data + * + * @returns a const pointer to the storage data + */ + constexpr GKO_ACC_ATTRIBUTES const storage_type *get_const_storage() const + { + return storage_; + } + + /** + * Returns the pointer to the scalar data + * + * @returns the pointer to the scalar data + */ + constexpr GKO_ACC_ATTRIBUTES scalar_type *get_scalar() const + { + return scalar_; + } + + /** + * Returns a const pointer to the scalar data + * + * @returns a const pointer to the scalar data + */ + constexpr GKO_ACC_ATTRIBUTES const scalar_type *get_const_scalar() const + { + return scalar_; + } + +protected: + template + constexpr GKO_ACC_ATTRIBUTES size_type + compute_index(Indices &&... indices) const + { + static_assert(sizeof...(Indices) == dimensionality, + "Number of indices must match dimensionality!"); + return helper::compute_row_major_index( + size_, storage_stride_, std::forward(indices)...); + } + + template + constexpr GKO_ACC_ATTRIBUTES size_type + compute_mask_scalar_index(Indices &&... indices) const + { + static_assert(sizeof...(Indices) == dimensionality, + "Number of indices must match dimensionality!"); + return helper::compute_masked_index( + size_, scalar_stride_, std::forward(indices)...); + } + + template + constexpr GKO_ACC_ATTRIBUTES size_type + compute_direct_scalar_index(Indices &&... indices) const + { + static_assert( + sizeof...(Indices) == scalar_dim, + "Number of indices must match number of set bits in scalar mask!"); + return helper::compute_masked_index_direct( + size_, scalar_stride_, std::forward(indices)...); + } + + +private: + const dim_type size_; + storage_type *const storage_; + const storage_stride_type storage_stride_; + scalar_type *const scalar_; + const scalar_stride_type scalar_stride_; +}; + + +} // namespace acc +} // namespace gko + + +#endif // GKO_ACCESSOR_SCALED_REDUCED_ROW_MAJOR_HPP_ diff --git a/accessor/utils.hpp b/accessor/utils.hpp new file mode 100644 index 00000000000..79386536ca2 --- /dev/null +++ b/accessor/utils.hpp @@ -0,0 +1,173 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_ACCESSOR_UTILS_HPP_ +#define GKO_ACCESSOR_UTILS_HPP_ + +#include +#include +#include // for std::size_t + + +#if defined(__CUDACC__) || defined(__HIPCC__) +#include +#endif + + +#if defined(__CUDACC__) || defined(__HIPCC__) +#define GKO_ACC_ATTRIBUTES __host__ __device__ +#define GKO_ACC_INLINE __forceinline__ +#define GKO_ACC_RESTRICT __restrict__ +#else +#define GKO_ACC_ATTRIBUTES +#define GKO_ACC_INLINE inline +#define GKO_ACC_RESTRICT +#endif // defined(__CUDACC__) || defined(__HIPCC__) + + +#if (defined(__CUDA_ARCH__) && defined(__APPLE__)) || \ + defined(__HIP_DEVICE_COMPILE__) + +#ifdef NDEBUG +#define GKO_ACC_ASSERT(condition) ((void)0) +#else // NDEBUG +// Poor man's assertions on GPUs for MACs. They won't terminate the program +// but will at least print something on the screen +#define GKO_ACC_ASSERT(condition) \ + ((condition) \ + ? ((void)0) \ + : ((void)printf("%s: %d: %s: Assertion `" #condition "' failed\n", \ + __FILE__, __LINE__, __func__))) +#endif // NDEBUG + +#else // (defined(__CUDA_ARCH__) && defined(__APPLE__)) || + // defined(__HIP_DEVICE_COMPILE__) + +// Handle assertions normally on other systems +#define GKO_ACC_ASSERT(condition) assert(condition) + +#endif // (defined(__CUDA_ARCH__) && defined(__APPLE__)) || + // defined(__HIP_DEVICE_COMPILE__) + + +namespace gko { +namespace acc { + +namespace xstd { + + +template +using void_t = void; + + +} + + +using size_type = std::size_t; + + +namespace detail { + + +template +struct remove_complex_impl { + using type = T; +}; + + +template +struct remove_complex_impl> { + using type = T; +}; + + +#if defined(__CUDACC__) || defined(__HIPCC__) +template +struct remove_complex_impl> { + using type = T; +}; +#endif + + +template +struct is_complex_impl { + static constexpr bool value{false}; +}; + + +template +struct is_complex_impl> { + static constexpr bool value{true}; +}; + + +#if defined(__CUDACC__) || defined(__HIPCC__) +template +struct is_complex_impl> { + static constexpr bool value{true}; +}; +#endif + + +} // namespace detail + + +template +using remove_complex_t = typename detail::remove_complex_impl::type; + + +template +using is_complex = typename detail::is_complex_impl; + + +/** + * Evaluates if all template arguments Args fulfill std::is_integral. If that is + * the case, this class inherits from `std::true_type`, otherwise, it inherits + * from `std::false_type`. + * If no values are passed in, `std::true_type` is inherited from. + * + * @tparam Args... Arguments to test for std::is_integral + */ +template +struct are_all_integral : public std::true_type {}; + +template +struct are_all_integral + : public std::conditional_t>::value, + are_all_integral, std::false_type> {}; + + +} // namespace acc +} // namespace gko + + +#endif // GKO_ACCESSOR_UTILS_HPP_ diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index f3a05ab5c9e..da1c80b31e1 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -4,13 +4,21 @@ if (NOT CMAKE_BUILD_TYPE STREQUAL "Release") "will be affected") endif() -if (GINKGO_BUILD_CUDA AND GINKGO_BUILD_HIP AND GINKGO_HIP_PLATFORM MATCHES "hcc") +if (GINKGO_BUILD_CUDA AND GINKGO_BUILD_HIP AND + GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") message(FATAL_ERROR "Building the benchmarks for both HIP AMD and CUDA " "at the same time is currently not supported. " "Disable the benchmark build using `-DGINKGO_BUILD_BENCHMARKS=OFF` " - "or use `export HIP_PLATFORM=nvcc` in your build environment instead.") + "or use `export HIP_PLATFORM=nvcc` (ROCM <=4.0) or " + "`export HIP_PLATFORM=nvidia` (ROCM >= 4.1) in your build environment instead.") endif() +function(ginkgo_benchmark_add_tuning_maybe name) + if(GINKGO_BENCHMARK_ENABLE_TUNING) + target_sources(${name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../utils/tuning_variables.cpp) + endif() +endfunction() + function(ginkgo_benchmark_cusp_linops name) target_compile_definitions("${name}" PRIVATE HAS_CUDA=1) target_link_libraries("${name}" ginkgo ${CUDA_RUNTIME_LIBS} @@ -25,31 +33,71 @@ function(ginkgo_benchmark_hipsp_linops name) target_compile_definitions("${name}" PRIVATE HAS_HIP=1) EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS) set_target_properties("${name}" PROPERTIES COMPILE_FLAGS ${HIP_CXX_FLAGS}) + # use Thrust C++ device just for compilation, we don't use thrust::complex in the benchmarks + target_compile_definitions("${name}" PUBLIC -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CPP) # for some reason, HIP creates a dependency on Threads::Threads here, so we # need to find it find_package(Threads REQUIRED) find_package(HIP REQUIRED) find_package(hipblas REQUIRED) + find_package(hiprand REQUIRED) find_package(hipsparse REQUIRED) + find_package(rocrand REQUIRED) target_include_directories("${name}" SYSTEM PRIVATE ${HSA_HEADER} ${HIP_INCLUDE_DIRS} ${HIPBLAS_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS}) - if(GINKGO_HIP_PLATFORM MATCHES "hcc") - ginkgo_hip_ban_link_hcflag(hcc::hccrt) - ginkgo_hip_ban_link_hcflag(hcc::hc_am) - ginkgo_hip_ban_link_hcflag(hcc::mcwamp) - ginkgo_hip_ban_compile_hcflag(hcc::hccrt) - ginkgo_hip_ban_compile_hcflag(hcc::hc_am) - ginkgo_hip_ban_compile_hcflag(hcc::mcwamp) - ginkgo_hip_clang_ban_hip_device_flags() - target_link_libraries("${name}" hip::device) - else() - target_link_libraries("${name}" ${HIP_CUDA_LIBRARIES}) - endif() target_link_libraries("${name}" ${HIPSPARSE_LIBRARIES}) endfunction() + +# Generates an executable for one precision. Each executable will be linked to +# `ginkgo`, `gflags` and `rapidjson`. +# Note: This should only be used by `ginkgo_add_typed_benchmark_executables` +# +# \param name name for the executable to create (including type suffix) +# \param use_lib_linops Boolean indicating if linking against hipsparse/cusparse +# is necessary +# \param macro_def preprocessor macro name that will be defined during +# building (to compile for a specific type) +# All remaining arguments will be treated as source files +function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def) + add_executable("${name}" ${ARGN}) + target_link_libraries("${name}" ginkgo gflags rapidjson) + target_compile_definitions("${name}" PRIVATE "${macro_def}") + target_compile_options("${name}" PRIVATE ${GINKGO_COMPILER_FLAGS}) + ginkgo_benchmark_add_tuning_maybe("${name}") + if("${use_lib_linops}") + if (GINKGO_BUILD_CUDA) + ginkgo_benchmark_cusp_linops("${name}") + endif() + if (GINKGO_BUILD_HIP) + ginkgo_benchmark_hipsp_linops("${name}") + endif() + endif() +endfunction(ginkgo_add_single_benchmark_executable) + + +# Generates an executable for each supported precision. Each executable will be +# linked to `ginkgo`, `gflags` and `rapidjson`. +# +# \param name base-name for the executable to create +# \param use_lib_linops Boolean indicating if linking against hipsparse/cusparse +# is necessary +# All remaining arguments will be treated as source files +function(ginkgo_add_typed_benchmark_executables name use_lib_linops) + ginkgo_add_single_benchmark_executable( + "${name}" "${use_lib_linops}" "GKO_BENCHMARK_USE_DOUBLE_PRECISION" ${ARGN}) + ginkgo_add_single_benchmark_executable( + "${name}_single" "${use_lib_linops}" "GKO_BENCHMARK_USE_SINGLE_PRECISION" ${ARGN}) + ginkgo_add_single_benchmark_executable( + "${name}_dcomplex" "${use_lib_linops}" "GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION" ${ARGN}) + ginkgo_add_single_benchmark_executable( + "${name}_scomplex" "${use_lib_linops}" "GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION" ${ARGN}) +endfunction(ginkgo_add_typed_benchmark_executables) + + +add_subdirectory(blas) add_subdirectory(conversions) add_subdirectory(matrix_generator) add_subdirectory(matrix_statistics) diff --git a/benchmark/blas/CMakeLists.txt b/benchmark/blas/CMakeLists.txt new file mode 100644 index 00000000000..c3e40e80bc2 --- /dev/null +++ b/benchmark/blas/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_add_typed_benchmark_executables(blas "NO" blas.cpp) diff --git a/benchmark/blas/blas.cpp b/benchmark/blas/blas.cpp new file mode 100644 index 00000000000..bf52992a4fa --- /dev/null +++ b/benchmark/blas/blas.cpp @@ -0,0 +1,557 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "benchmark/utils/general.hpp" +#include "benchmark/utils/loggers.hpp" +#include "benchmark/utils/timer.hpp" +#include "benchmark/utils/types.hpp" + + +// Command-line arguments +DEFINE_string( + operations, "copy,axpy,scal", + "A comma-separated list of BLAS operations to benchmark.\nCandidates are" + " copy (y = x),\n" + " axpy (y = y + a * x),\n" + " multiaxpy (like axpy, but a has one entry per column),\n" + " scal (y = a * y),\n" + " multiscal (like scal, but a has one entry per column),\n" + " dot (a = x' * y)," + " norm (a = sqrt(x' * x)),\n" + " mm (C = A * B),\n" + " gemm (C = a * A * B + b * C)\n" + "where A has dimensions n x k, B has dimensions k x m,\n" + "C has dimensions n x m and x and y have dimensions n x r"); + + +class BenchmarkOperation { +public: + virtual ~BenchmarkOperation() = default; + + virtual gko::size_type get_flops() const = 0; + virtual gko::size_type get_memory() const = 0; + virtual void prepare(){}; + virtual void run() = 0; +}; + + +class CopyOperation : public BenchmarkOperation { +public: + CopyOperation(std::shared_ptr exec, + gko::size_type rows, gko::size_type cols, + gko::size_type istride, gko::size_type ostride) + { + in_ = gko::matrix::Dense::create(exec, gko::dim<2>{rows, cols}, + istride); + out_ = gko::matrix::Dense::create(exec, gko::dim<2>{rows, cols}, + ostride); + in_->fill(1); + } + + gko::size_type get_flops() const override + { + return in_->get_size()[0] * in_->get_size()[1]; + } + + gko::size_type get_memory() const override + { + return in_->get_size()[0] * in_->get_size()[1] * sizeof(etype) * 2; + } + + void run() override { in_->convert_to(lend(out_)); } + +private: + std::unique_ptr> in_; + std::unique_ptr> out_; +}; + + +class AxpyOperation : public BenchmarkOperation { +public: + AxpyOperation(std::shared_ptr exec, + gko::size_type rows, gko::size_type cols, + gko::size_type stride_in, gko::size_type stride_out, + bool multi) + { + alpha_ = gko::matrix::Dense::create( + exec, gko::dim<2>{1, multi ? cols : 1}); + x_ = gko::matrix::Dense::create(exec, gko::dim<2>{rows, cols}, + stride_in); + y_ = gko::matrix::Dense::create(exec, gko::dim<2>{rows, cols}, + stride_out); + alpha_->fill(1); + x_->fill(1); + } + + gko::size_type get_flops() const override + { + return y_->get_size()[0] * y_->get_size()[1] * 2; + } + + gko::size_type get_memory() const override + { + return y_->get_size()[0] * y_->get_size()[1] * sizeof(etype) * 3; + } + + void prepare() override { y_->fill(1); } + + void run() override { y_->add_scaled(lend(alpha_), lend(x_)); } + +private: + std::unique_ptr> alpha_; + std::unique_ptr> x_; + std::unique_ptr> y_; +}; + + +class ScalOperation : public BenchmarkOperation { +public: + ScalOperation(std::shared_ptr exec, + gko::size_type rows, gko::size_type cols, + gko::size_type stride, bool multi) + { + alpha_ = gko::matrix::Dense::create( + exec, gko::dim<2>{1, multi ? cols : 1}); + y_ = gko::matrix::Dense::create(exec, gko::dim<2>{rows, cols}, + stride); + alpha_->fill(1); + } + + gko::size_type get_flops() const override + { + return y_->get_size()[0] * y_->get_size()[1]; + } + + gko::size_type get_memory() const override + { + return y_->get_size()[0] * y_->get_size()[1] * sizeof(etype) * 2; + } + + void prepare() override { y_->fill(1); } + + void run() override { y_->scale(lend(alpha_)); } + +private: + std::unique_ptr> alpha_; + std::unique_ptr> y_; +}; + + +class DotOperation : public BenchmarkOperation { +public: + DotOperation(std::shared_ptr exec, gko::size_type rows, + gko::size_type cols, gko::size_type stride_x, + gko::size_type stride_y) + { + alpha_ = gko::matrix::Dense::create(exec, gko::dim<2>{1, cols}); + x_ = gko::matrix::Dense::create(exec, gko::dim<2>{rows, cols}, + stride_x); + y_ = gko::matrix::Dense::create(exec, gko::dim<2>{rows, cols}, + stride_y); + x_->fill(1); + y_->fill(1); + } + + gko::size_type get_flops() const override + { + return y_->get_size()[0] * y_->get_size()[1] * 2; + } + + gko::size_type get_memory() const override + { + return y_->get_size()[0] * y_->get_size()[1] * sizeof(etype) * 2; + } + + void run() override { x_->compute_dot(lend(y_), lend(alpha_)); } + +private: + std::unique_ptr> alpha_; + std::unique_ptr> x_; + std::unique_ptr> y_; +}; + + +class NormOperation : public BenchmarkOperation { +public: + NormOperation(std::shared_ptr exec, + gko::size_type rows, gko::size_type cols, + gko::size_type stride) + { + alpha_ = gko::matrix::Dense::create(exec, gko::dim<2>{1, cols}); + y_ = gko::matrix::Dense::create(exec, gko::dim<2>{rows, cols}, + stride); + y_->fill(1); + } + + gko::size_type get_flops() const override + { + return y_->get_size()[0] * y_->get_size()[1] * 2; + } + + gko::size_type get_memory() const override + { + return y_->get_size()[0] * y_->get_size()[1] * sizeof(etype); + } + + void run() override { y_->compute_norm2(lend(alpha_)); } + +private: + std::unique_ptr> alpha_; + std::unique_ptr> y_; +}; + + +class ApplyOperation : public BenchmarkOperation { +public: + ApplyOperation(std::shared_ptr exec, gko::size_type n, + gko::size_type k, gko::size_type m, gko::size_type stride_A, + gko::size_type stride_B, gko::size_type stride_C) + { + A_ = gko::matrix::Dense::create(exec, gko::dim<2>{n, k}, + stride_A); + B_ = gko::matrix::Dense::create(exec, gko::dim<2>{k, m}, + stride_B); + C_ = gko::matrix::Dense::create(exec, gko::dim<2>{n, m}, + stride_C); + A_->fill(1); + B_->fill(1); + } + + gko::size_type get_flops() const override + { + return A_->get_size()[0] * A_->get_size()[1] * B_->get_size()[1] * 2; + } + + gko::size_type get_memory() const override + { + return (A_->get_size()[0] * A_->get_size()[1] + + B_->get_size()[0] * B_->get_size()[1] + + C_->get_size()[0] * C_->get_size()[1]) * + sizeof(etype); + } + + void run() override { A_->apply(lend(B_), lend(C_)); } + +private: + std::unique_ptr> A_; + std::unique_ptr> B_; + std::unique_ptr> C_; +}; + + +class AdvancedApplyOperation : public BenchmarkOperation { +public: + AdvancedApplyOperation(std::shared_ptr exec, + gko::size_type n, gko::size_type k, gko::size_type m, + gko::size_type stride_A, gko::size_type stride_B, + gko::size_type stride_C) + { + A_ = gko::matrix::Dense::create(exec, gko::dim<2>{n, k}, + stride_A); + B_ = gko::matrix::Dense::create(exec, gko::dim<2>{k, m}, + stride_B); + C_ = gko::matrix::Dense::create(exec, gko::dim<2>{n, m}, + stride_C); + alpha_ = gko::matrix::Dense::create(exec, gko::dim<2>{1, 1}); + beta_ = gko::matrix::Dense::create(exec, gko::dim<2>{1, 1}); + A_->fill(1); + B_->fill(1); + alpha_->fill(1); + beta_->fill(1); + } + + gko::size_type get_flops() const override + { + return A_->get_size()[0] * A_->get_size()[1] * B_->get_size()[1] * 2 + + C_->get_size()[0] * C_->get_size()[1] * 3; + } + + gko::size_type get_memory() const override + { + return (A_->get_size()[0] * A_->get_size()[1] + + B_->get_size()[0] * B_->get_size()[1] + + C_->get_size()[0] * C_->get_size()[1] * 2) * + sizeof(etype); + } + + void run() override + { + A_->apply(lend(alpha_), lend(B_), lend(beta_), lend(C_)); + } + +private: + std::unique_ptr> alpha_; + std::unique_ptr> beta_; + std::unique_ptr> A_; + std::unique_ptr> B_; + std::unique_ptr> C_; +}; + + +struct dimensions { + gko::size_type n; + gko::size_type k; + gko::size_type m; + gko::size_type r; + gko::size_type stride_x; + gko::size_type stride_y; + gko::size_type stride_A; + gko::size_type stride_B; + gko::size_type stride_C; +}; + + +gko::size_type get_optional(rapidjson::Value &obj, const char *name, + gko::size_type default_value) +{ + if (obj.HasMember(name)) { + return obj[name].GetUint64(); + } else { + return default_value; + } +} + + +dimensions parse_dims(rapidjson::Value &test_case) +{ + dimensions result; + result.n = test_case["n"].GetInt64(); + result.k = get_optional(test_case, "k", result.n); + result.m = get_optional(test_case, "m", result.n); + result.r = get_optional(test_case, "r", 1); + if (test_case.HasMember("stride")) { + result.stride_x = test_case["stride"].GetInt64(); + result.stride_y = result.stride_x; + } else { + result.stride_x = get_optional(test_case, "stride_x", result.r); + result.stride_y = get_optional(test_case, "stride_y", result.r); + } + result.stride_A = get_optional(test_case, "stride_A", result.k); + result.stride_B = get_optional(test_case, "stride_B", result.m); + result.stride_C = get_optional(test_case, "stride_C", result.m); + return result; +} + + +std::map( + std::shared_ptr, dimensions)>> + operation_map{ + {"copy", + [](std::shared_ptr exec, dimensions dims) { + return std::make_unique( + exec, dims.n, dims.r, dims.stride_x, dims.stride_y); + }}, + {"axpy", + [](std::shared_ptr exec, dimensions dims) { + return std::make_unique( + exec, dims.n, dims.r, dims.stride_x, dims.stride_y, false); + }}, + {"multiaxpy", + [](std::shared_ptr exec, dimensions dims) { + return std::make_unique( + exec, dims.n, dims.r, dims.stride_x, dims.stride_y, true); + }}, + {"scal", + [](std::shared_ptr exec, dimensions dims) { + return std::make_unique(exec, dims.n, dims.r, + dims.stride_y, false); + }}, + {"multiscal", + [](std::shared_ptr exec, dimensions dims) { + return std::make_unique(exec, dims.n, dims.r, + dims.stride_y, true); + }}, + {"dot", + [](std::shared_ptr exec, dimensions dims) { + return std::make_unique( + exec, dims.n, dims.r, dims.stride_x, dims.stride_y); + }}, + {"norm", + [](std::shared_ptr exec, dimensions dims) { + return std::make_unique(exec, dims.n, dims.r, + dims.stride_y); + }}, + {"mm", + [](std::shared_ptr exec, dimensions dims) { + return std::make_unique( + exec, dims.n, dims.k, dims.m, dims.stride_A, dims.stride_B, + dims.stride_C); + }}, + {"gemm", + [](std::shared_ptr exec, dimensions dims) { + return std::make_unique( + exec, dims.n, dims.k, dims.m, dims.stride_A, dims.stride_B, + dims.stride_C); + }}}; + + +void apply_blas(const char *operation_name, std::shared_ptr exec, + rapidjson::Value &test_case, + rapidjson::MemoryPoolAllocator<> &allocator) +{ + try { + auto &blas_case = test_case["blas"]; + add_or_set_member(blas_case, operation_name, + rapidjson::Value(rapidjson::kObjectType), allocator); + + auto op = operation_map[operation_name](exec, parse_dims(test_case)); + + auto timer = get_timer(exec, FLAGS_gpu_timer); + IterationControl ic(timer); + + // warm run + for (auto _ : ic.warmup_run()) { + op->prepare(); + exec->synchronize(); + op->run(); + exec->synchronize(); + } + + // timed run + op->prepare(); + for (auto _ : ic.run()) { + op->run(); + } + const auto runtime = ic.compute_average_time(); + const auto flops = static_cast(op->get_flops()); + const auto mem = static_cast(op->get_memory()); + const auto repetitions = ic.get_num_repetitions(); + add_or_set_member(blas_case[operation_name], "time", runtime, + allocator); + add_or_set_member(blas_case[operation_name], "flops", flops / runtime, + allocator); + add_or_set_member(blas_case[operation_name], "bandwidth", mem / runtime, + allocator); + add_or_set_member(blas_case[operation_name], "repetitions", repetitions, + allocator); + + // compute and write benchmark data + add_or_set_member(blas_case[operation_name], "completed", true, + allocator); + } catch (const std::exception &e) { + add_or_set_member(test_case["blas"][operation_name], "completed", false, + allocator); + if (FLAGS_keep_errors) { + rapidjson::Value msg_value; + msg_value.SetString(e.what(), allocator); + add_or_set_member(test_case["blas"][operation_name], "error", + msg_value, allocator); + } + std::cerr << "Error when processing test case " << test_case << "\n" + << "what(): " << e.what() << std::endl; + } +} + + +int main(int argc, char *argv[]) +{ + std::string header = + "A benchmark for measuring performance of Ginkgo's BLAS-like " + "operations.\nParameters for a benchmark case are:\n" + " n: number of rows for vectors and gemm output (required)\n" + " r: number of columns for vectors (optional, default 1)\n" + " m: number of columns for gemm output (optional, default n)\n" + " k: inner dimension of the gemm (optional, default n)\n" + " stride: storage stride for both vectors (optional, default r)\n" + " stride_x: stride for input vector x (optional, default r)\n" + " stride_y: stride for in/out vector y (optional, default r)\n" + " stride_A: stride for A matrix in gemm (optional, default k)\n" + " stride_B: stride for B matrix in gemm (optional, default m)\n" + " stride_C: stride for C matrix in gemm (optional, default m)\n"; + std::string format = std::string() + " [\n { \"n\": 100 },\n" + + " { \"n\": 200, \"m\": 200, \"k\": 200 }\n" + + " ]\n\n"; + initialize_argument_parsing(&argc, &argv, header, format); + + std::string extra_information = "The operations are " + FLAGS_operations; + print_general_information(extra_information); + + auto exec = executor_factory.at(FLAGS_executor)(); + auto engine = get_engine(); + auto operations = split(FLAGS_operations, ','); + + rapidjson::IStreamWrapper jcin(std::cin); + rapidjson::Document test_cases; + test_cases.ParseStream(jcin); + if (!test_cases.IsArray()) { + std::cerr + << "Input has to be a JSON array of benchmark configurations:\n" + << format; + std::exit(1); + } + + auto &allocator = test_cases.GetAllocator(); + + for (auto &test_case : test_cases.GetArray()) { + try { + // set up benchmark + if (!test_case.HasMember("blas")) { + test_case.AddMember("blas", + rapidjson::Value(rapidjson::kObjectType), + allocator); + } + auto &blas_case = test_case["blas"]; + if (!FLAGS_overwrite && + all_of(begin(operations), end(operations), + [&blas_case](const std::string &s) { + return blas_case.HasMember(s.c_str()); + })) { + continue; + } + std::clog << "Running test case: " << test_case << std::endl; + + for (const auto &operation_name : operations) { + apply_blas(operation_name.c_str(), exec, test_case, allocator); + std::clog << "Current state:" << std::endl + << test_cases << std::endl; + backup_results(test_cases); + } + } catch (const std::exception &e) { + std::cerr << "Error setting up benchmark, what(): " << e.what() + << std::endl; + } + } + + std::cout << test_cases << std::endl; +} diff --git a/benchmark/conversions/CMakeLists.txt b/benchmark/conversions/CMakeLists.txt index 7f4a43f73c1..0e0893c3aec 100644 --- a/benchmark/conversions/CMakeLists.txt +++ b/benchmark/conversions/CMakeLists.txt @@ -1,2 +1 @@ -add_executable(conversions conversions.cpp) -target_link_libraries(conversions ginkgo gflags rapidjson) +ginkgo_add_typed_benchmark_executables(conversions "NO" conversions.cpp) diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp index d2cc6c147e9..111470a6f0f 100644 --- a/benchmark/conversions/conversions.cpp +++ b/benchmark/conversions/conversions.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -47,9 +47,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/spmv_common.hpp" +#include "benchmark/utils/timer.hpp" +#include "benchmark/utils/types.hpp" -using etype = double; +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING // This function supposes that management of `FLAGS_overwrite` is done before @@ -65,31 +69,28 @@ void convert_matrix(const gko::LinOp *matrix_from, const char *format_to, add_or_set_member(conversion_case, conversion_name, rapidjson::Value(rapidjson::kObjectType), allocator); - gko::matrix_data<> data{gko::dim<2>{1, 1}, 1}; + gko::matrix_data data{gko::dim<2>{1, 1}, 1}; auto matrix_to = share(formats::matrix_factory.at(format_to)(exec, data)); + + auto timer = get_timer(exec, FLAGS_gpu_timer); + IterationControl ic{timer}; + // warm run - for (unsigned int i = 0; i < FLAGS_warmup; i++) { + for (auto _ : ic.warmup_run()) { exec->synchronize(); matrix_to->copy_from(matrix_from); exec->synchronize(); matrix_to->clear(); } - std::chrono::nanoseconds time(0); // timed run - for (unsigned int i = 0; i < FLAGS_repetitions; i++) { - exec->synchronize(); - auto tic = std::chrono::steady_clock::now(); + for (auto _ : ic.run()) { matrix_to->copy_from(matrix_from); - exec->synchronize(); - auto toc = std::chrono::steady_clock::now(); - time += - std::chrono::duration_cast(toc - tic); - matrix_to->clear(); } add_or_set_member(conversion_case[conversion_name], "time", - static_cast(time.count()) / FLAGS_repetitions, - allocator); + ic.compute_average_time(), allocator); + add_or_set_member(conversion_case[conversion_name], "repetitions", + ic.get_num_repetitions(), allocator); // compute and write benchmark data add_or_set_member(conversion_case[conversion_name], "completed", true, @@ -97,6 +98,12 @@ void convert_matrix(const gko::LinOp *matrix_from, const char *format_to, } catch (const std::exception &e) { add_or_set_member(test_case["conversions"][conversion_name], "completed", false, allocator); + if (FLAGS_keep_errors) { + rapidjson::Value msg_value; + msg_value.SetString(e.what(), allocator); + add_or_set_member(test_case["conversions"][conversion_name], + "error", msg_value, allocator); + } std::cerr << "Error when processing test case " << test_case << "\n" << "what(): " << e.what() << std::endl; } @@ -141,9 +148,9 @@ int main(int argc, char *argv[]) std::clog << "Running test case: " << test_case << std::endl; std::ifstream mtx_fd(test_case["filename"].GetString()); - gko::matrix_data<> data; + gko::matrix_data data; try { - data = gko::read_raw(mtx_fd); + data = gko::read_raw(mtx_fd); } catch (std::exception &e) { std::cerr << "Error setting up matrix data, what(): " << e.what() << std::endl; @@ -155,8 +162,7 @@ int main(int argc, char *argv[]) try { auto matrix_from = share(formats::matrix_factory.at(format_from)(exec, data)); - for (const auto &format : formats::matrix_factory) { - const auto format_to = std::get<0>(format); + for (const auto &format_to : formats) { if (format_from == format_to) { continue; } diff --git a/benchmark/matrix_generator/CMakeLists.txt b/benchmark/matrix_generator/CMakeLists.txt index 95d4e00ac51..9ae32e71fae 100644 --- a/benchmark/matrix_generator/CMakeLists.txt +++ b/benchmark/matrix_generator/CMakeLists.txt @@ -1,2 +1 @@ -add_executable(matrix_generator matrix_generator.cpp) -target_link_libraries(matrix_generator ginkgo gflags rapidjson) +ginkgo_add_typed_benchmark_executables(matrix_generator "NO" matrix_generator.cpp) diff --git a/benchmark/matrix_generator/matrix_generator.cpp b/benchmark/matrix_generator/matrix_generator.cpp index 7622d2cd4ed..241678118f4 100644 --- a/benchmark/matrix_generator/matrix_generator.cpp +++ b/benchmark/matrix_generator/matrix_generator.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -40,10 +40,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/general.hpp" +#include "benchmark/utils/types.hpp" -// some Ginkgo shortcuts -using etype = double; +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING namespace { @@ -94,13 +96,13 @@ void validate_option_object(const rapidjson::Value &value) } -using generator_function = - std::function(rapidjson::Value &, std::ranlux24 &)>; +using generator_function = std::function( + rapidjson::Value &, std::ranlux24 &)>; // matrix generators -gko::matrix_data generate_block_diagonal(rapidjson::Value &config, - std::ranlux24 &engine) +gko::matrix_data generate_block_diagonal(rapidjson::Value &config, + std::ranlux24 &engine) { if (!config.HasMember("num_blocks") || !config["num_blocks"].IsUint() || !config.HasMember("block_size") || !config["block_size"].IsUint()) { @@ -108,10 +110,10 @@ gko::matrix_data generate_block_diagonal(rapidjson::Value &config, } auto num_blocks = config["num_blocks"].GetUint(); auto block_size = config["block_size"].GetUint(); - auto block = gko::matrix_data( - gko::dim<2>(block_size), std::uniform_real_distribution<>(-1.0, 1.0), - engine); - return gko::matrix_data::diag(num_blocks, block); + auto block = gko::matrix_data( + gko::dim<2>(block_size), + std::uniform_real_distribution(-1.0, 1.0), engine); + return gko::matrix_data::diag(num_blocks, block); } diff --git a/benchmark/matrix_statistics/CMakeLists.txt b/benchmark/matrix_statistics/CMakeLists.txt index 2de10097f36..e347ede0e16 100644 --- a/benchmark/matrix_statistics/CMakeLists.txt +++ b/benchmark/matrix_statistics/CMakeLists.txt @@ -1,2 +1 @@ -add_executable(matrix_statistics matrix_statistics.cpp) -target_link_libraries(matrix_statistics ginkgo gflags rapidjson) +ginkgo_add_typed_benchmark_executables(matrix_statistics "NO" matrix_statistics.cpp) diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp index 72e899407d7..d8a3ce8fc1b 100644 --- a/benchmark/matrix_statistics/matrix_statistics.cpp +++ b/benchmark/matrix_statistics/matrix_statistics.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -42,10 +42,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/general.hpp" #include "benchmark/utils/spmv_common.hpp" +#include "benchmark/utils/types.hpp" -// some Ginkgo shortcuts -using etype = double; +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING // See en.wikipedia.org/wiki/Five-number_summary @@ -148,6 +150,10 @@ void extract_matrix_statistics(gko::matrix_data &data, ++col_dist[v.column]; } + add_or_set_member(problem, "rows", data.size[0], allocator); + add_or_set_member(problem, "columns", data.size[1], allocator); + add_or_set_member(problem, "nonzeros", data.nonzeros.size(), allocator); + std::sort(begin(row_dist), end(row_dist)); add_or_set_member(problem, "row_distribution", rapidjson::Value(rapidjson::kObjectType), allocator); diff --git a/benchmark/preconditioner/CMakeLists.txt b/benchmark/preconditioner/CMakeLists.txt index 962831fc1d4..7b666000b04 100644 --- a/benchmark/preconditioner/CMakeLists.txt +++ b/benchmark/preconditioner/CMakeLists.txt @@ -1,2 +1 @@ -add_executable(preconditioner preconditioner.cpp) -target_link_libraries(preconditioner ginkgo gflags rapidjson) +ginkgo_add_typed_benchmark_executables(preconditioner "NO" preconditioner.cpp) diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index 11979fd6ba1..2225af0ee82 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,145 +45,86 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" +#include "benchmark/utils/preconditioners.hpp" #include "benchmark/utils/spmv_common.hpp" +#include "benchmark/utils/timer.hpp" +#include "benchmark/utils/types.hpp" -// Command-line arguments -DEFINE_uint32(max_block_size, 32, - "Maximal block size of the block-Jacobi preconditioner"); - -DEFINE_uint32(num_iterations, 5, - "Number of iterations for the ParICT/ParILU(T) preconditioner"); - -DEFINE_bool( - approx_select, true, - "Use approximate selection for the threshold filtering in ParICT/ParILUT"); - -DEFINE_double(fill_limit, 2.0, "The fill-in limit used in ParICT/ParILUT"); - -DEFINE_string(preconditioners, "jacobi,parilu,parilut,ilu", - "A comma-separated list of preconditioners to run." - "Supported values are: jacobi, parict, parilu, parilut, ilu"); - -DEFINE_string(storage_optimization, "0,0", - "Defines the kind of storage optimization to perform on " - "preconditioners that support it. Supported values are: " - "autodetect and , where and are the input " - "parameters used to construct a precision_reduction object."); - -DEFINE_double(accuracy, 1e-1, - "This value is used as the accuracy flag of the adaptive Jacobi " - "preconditioner."); - - -// some shortcuts -using etype = double; - - -// parses the storage optimization command line argument -gko::precision_reduction parse_storage_optimization(const std::string &flag) -{ - if (flag == "autodetect") { - return gko::precision_reduction::autodetect(); - } - const auto parts = split(flag, ','); - if (parts.size() != 2) { - throw std::runtime_error( - "storage_optimization has to be a list of two integers"); - } - return gko::precision_reduction(std::stoi(parts[0]), std::stoi(parts[1])); -} - - -// preconditioner mapping -const std::map( - std::shared_ptr exec)>> - precond_factory{ - {"jacobi", - [](std::shared_ptr exec) { - return gko::preconditioner::Jacobi::build() - .with_max_block_size(FLAGS_max_block_size) - .with_storage_optimization( - parse_storage_optimization(FLAGS_storage_optimization)) - .with_accuracy(FLAGS_accuracy) - .on(exec); - }}, - {"parict", - [](std::shared_ptr exec) { - auto ict_fact = std::shared_ptr( - gko::factorization::ParIct::build() - .with_iterations(FLAGS_num_iterations) - .with_approximate_select(FLAGS_approx_select) - .with_fill_in_limit(FLAGS_fill_limit) - .on(exec)); - return gko::preconditioner::Ilu<>::build() - .with_factorization_factory(ict_fact) - .on(exec); - }}, - {"parilu", - [](std::shared_ptr exec) { - auto ilu_fact = std::shared_ptr( - gko::factorization::ParIlu::build() - .with_iterations(FLAGS_num_iterations) - .on(exec)); - return gko::preconditioner::Ilu<>::build() - .with_factorization_factory(ilu_fact) - .on(exec); - }}, - {"parilut", - [](std::shared_ptr exec) { - auto ilut_fact = std::shared_ptr( - gko::factorization::ParIlut::build() - .with_iterations(FLAGS_num_iterations) - .with_approximate_select(FLAGS_approx_select) - .with_fill_in_limit(FLAGS_fill_limit) - .on(exec)); - return gko::preconditioner::Ilu<>::build() - .with_factorization_factory(ilut_fact) - .on(exec); - }}, - {"ilu", [](std::shared_ptr exec) { - auto ilu_fact = std::shared_ptr( - gko::factorization::Ilu::build().on(exec)); - return gko::preconditioner::Ilu<>::build() - .with_factorization_factory(ilu_fact) - .on(exec); - }}}; +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING // preconditioner generation and application - std::string encode_parameters(const char *precond_name) { static std::map encoder{ {"jacobi", [] { std::ostringstream oss; - oss << "jacobi-" << FLAGS_max_block_size << "-" - << FLAGS_storage_optimization; + oss << "jacobi-" << FLAGS_jacobi_max_block_size << "-" + << FLAGS_jacobi_storage; return oss.str(); }}, {"parict", [] { std::ostringstream oss; - oss << "parict-" << FLAGS_num_iterations << '-' - << FLAGS_approx_select << '-' << FLAGS_fill_limit; + oss << "parict-" << FLAGS_parilu_iterations << '-' + << FLAGS_parilut_approx_select << '-' << FLAGS_parilut_limit; return oss.str(); }}, {"parilu", [] { std::ostringstream oss; - oss << "parilu-" << FLAGS_num_iterations; + oss << "parilu-" << FLAGS_parilu_iterations; return oss.str(); }}, {"parilut", [] { std::ostringstream oss; - oss << "parilut-" << FLAGS_num_iterations << '-' - << FLAGS_approx_select << '-' << FLAGS_fill_limit; + oss << "parilut-" << FLAGS_parilu_iterations << '-' + << FLAGS_parilut_approx_select << '-' << FLAGS_parilut_limit; return oss.str(); }}, - {"ilu", [] { return std::string{"ilu"}; }}}; + {"parict-isai", + [] { + std::ostringstream oss; + oss << "parict-isai-" << FLAGS_parilu_iterations << '-' + << FLAGS_parilut_approx_select << '-' << FLAGS_parilut_limit + << '-' << FLAGS_isai_power; + return oss.str(); + }}, + {"parilu-isai", + [] { + std::ostringstream oss; + oss << "parilu-isai-" << FLAGS_parilu_iterations << '-' + << FLAGS_isai_power; + return oss.str(); + }}, + {"parilut-isai", + [] { + std::ostringstream oss; + oss << "parilut-isai-" << FLAGS_parilu_iterations << '-' + << FLAGS_parilut_approx_select << '-' << FLAGS_parilut_limit + << '-' << FLAGS_isai_power; + return oss.str(); + }}, + {"ilu-isai", + [] { + return std::string{"ilu-isai-"} + std::to_string(FLAGS_isai_power); + }}, + {"general-isai", + [] { + return std::string{"general-isai-"} + + std::to_string(FLAGS_isai_power); + }}, + {"spd-isai", [] { + return std::string{"spd-isai-"} + std::to_string(FLAGS_isai_power); + }}}; + if (encoder.find(precond_name) == encoder.end()) { + return precond_name; + } return encoder[precond_name](); } @@ -218,50 +159,38 @@ void run_preconditioner(const char *precond_name, allocator); } + IterationControl ic_gen{get_timer(exec, FLAGS_gpu_timer)}; + IterationControl ic_apply{get_timer(exec, FLAGS_gpu_timer)}; + { // fast run, gets total time auto x_clone = clone(x); auto precond = precond_factory.at(precond_name)(exec); - for (auto i = 0u; i < FLAGS_warmup; ++i) { + + for (auto _ : ic_apply.warmup_run()) { precond->generate(system_matrix)->apply(lend(b), lend(x_clone)); } - exec->synchronize(); - auto g_tic = std::chrono::steady_clock::now(); - std::unique_ptr precond_op; - for (auto i = 0u; i < FLAGS_repetitions; ++i) { + for (auto _ : ic_gen.run()) { precond_op = precond->generate(system_matrix); } - exec->synchronize(); - auto g_tac = std::chrono::steady_clock::now(); - - auto generate_time = - std::chrono::duration_cast(g_tac - - g_tic) / - FLAGS_repetitions; add_or_set_member(this_precond_data["generate"], "time", - generate_time.count(), allocator); + ic_gen.compute_average_time(), allocator); + add_or_set_member(this_precond_data["generate"], "repetitions", + ic_gen.get_num_repetitions(), allocator); - exec->synchronize(); - auto a_tic = std::chrono::steady_clock::now(); - - for (auto i = 0u; i < FLAGS_repetitions; ++i) { + for (auto _ : ic_apply.run()) { precond_op->apply(lend(b), lend(x_clone)); } - exec->synchronize(); - auto a_tac = std::chrono::steady_clock::now(); - - auto apply_time = - std::chrono::duration_cast(a_tac - - a_tic) / - FLAGS_repetitions; add_or_set_member(this_precond_data["apply"], "time", - apply_time.count(), allocator); + ic_apply.compute_average_time(), allocator); + add_or_set_member(this_precond_data["apply"], "repetitions", + ic_apply.get_num_repetitions(), allocator); } if (FLAGS_detailed) { @@ -273,24 +202,24 @@ void run_preconditioner(const char *precond_name, std::make_shared(exec, FLAGS_nested_names); exec->add_logger(gen_logger); std::unique_ptr precond_op; - for (auto i = 0u; i < FLAGS_repetitions; ++i) { + for (auto i = 0u; i < ic_gen.get_num_repetitions(); ++i) { precond_op = precond->generate(system_matrix); } exec->remove_logger(gko::lend(gen_logger)); gen_logger->write_data(this_precond_data["generate"]["components"], - allocator, FLAGS_repetitions); + allocator, ic_gen.get_num_repetitions()); auto apply_logger = std::make_shared(exec, FLAGS_nested_names); exec->add_logger(apply_logger); - for (auto i = 0u; i < FLAGS_repetitions; ++i) { + for (auto i = 0u; i < ic_apply.get_num_repetitions(); ++i) { precond_op->apply(lend(b), lend(x_clone)); } exec->remove_logger(gko::lend(apply_logger)); apply_logger->write_data(this_precond_data["apply"]["components"], - allocator, FLAGS_repetitions); + allocator, ic_apply.get_num_repetitions()); } add_or_set_member(this_precond_data, "completed", true, allocator); @@ -300,6 +229,12 @@ void run_preconditioner(const char *precond_name, rapidjson::Value(rapidjson::kObjectType), allocator); add_or_set_member(test_case["preconditioner"][encoded_name.c_str()], "completed", false, allocator); + if (FLAGS_keep_errors) { + rapidjson::Value msg_value; + msg_value.SetString(e.what(), allocator); + add_or_set_member(test_case["preconditioner"][encoded_name.c_str()], + "error", msg_value, allocator); + } std::cerr << "Error when processing test case " << test_case << "\n" << "what(): " << e.what() << std::endl; } @@ -361,7 +296,7 @@ int main(int argc, char *argv[]) std::clog << "Running test case: " << test_case << std::endl; std::ifstream mtx_fd(test_case["filename"].GetString()); - auto data = gko::read_raw(mtx_fd); + auto data = gko::read_raw(mtx_fd); auto system_matrix = share(formats::matrix_factory.at(FLAGS_formats)(exec, data)); diff --git a/benchmark/run_all_benchmarks.sh b/benchmark/run_all_benchmarks.sh index 6a782bac322..db7fa5bb5ca 100644 --- a/benchmark/run_all_benchmarks.sh +++ b/benchmark/run_all_benchmarks.sh @@ -2,18 +2,28 @@ # Environment variable detection if [ ! "${BENCHMARK}" ]; then - echo "BENCHMARK environment variable not set - assuming \"spmv\"" 1>&2 BENCHMARK="spmv" + echo "BENCHMARK environment variable not set - assuming \"${BENCHMARK}\"" 1>&2 fi if [ ! "${DRY_RUN}" ]; then - echo "DRY_RUN environment variable not set - assuming \"false\"" 1>&2 DRY_RUN="false" + echo "DRY_RUN environment variable not set - assuming \"${DRY_RUN}\"" 1>&2 fi if [ ! "${EXECUTOR}" ]; then - echo "EXECUTOR environment variable not set - assuming \"cuda\"" 1>&2 EXECUTOR="cuda" + echo "EXECUTOR environment variable not set - assuming \"${EXECUTOR}\"" 1>&2 +fi + +if [ ! "${REPETITIONS}" ]; then + REPETITIONS=10 + echo "REPETITIONS environment variable not set - assuming ${REPETITIONS}" 1>&2 +fi + +if [ ! "${SOLVER_REPETITIONS}" ]; then + SOLVER_REPETITIONS=1 + echo "SOLVER_REPETITIONS environment variable not set - assuming ${SOLVER_REPETITIONS}" 1>&2 fi if [ ! "${SEGMENTS}" ]; then @@ -26,8 +36,8 @@ elif [ ! "${SEGMENT_ID}" ]; then fi if [ ! "${PRECONDS}" ]; then - echo "PRECONDS environment variable not set - assuming \"none\"" 1>&2 PRECONDS="none" + echo "PRECONDS environment variable not set - assuming \"${PRECONDS}\"" 1>&2 fi if [ ! "${FORMATS}" ]; then @@ -35,29 +45,102 @@ if [ ! "${FORMATS}" ]; then FORMATS="csr,coo,ell,hybrid,sellp" fi +if [ ! "${ELL_IMBALANCE_LIMIT}" ]; then + echo "ELL_IMBALANCE_LIMIT environment variable not set - assuming 100" 1>&2 + ELL_IMBALANCE_LIMIT=100 +fi + if [ ! "${SOLVERS}" ]; then - echo "SOLVERS environment variable not set - assuming \"bicgstab,cg,cgs,fcg,gmres\"" 1>&2 - SOLVERS="bicgstab,cg,cgs,fcg,gmres" + SOLVERS="bicgstab,cg,cgs,fcg,gmres,cb_gmres_reduce1,idr" + echo "SOLVERS environment variable not set - assuming \"${SOLVERS}\"" 1>&2 fi if [ ! "${SOLVERS_PRECISION}" ]; then - echo "SOLVERS_PRECISION environment variable not set - assuming \"1e-6\"" 1>&2 SOLVERS_PRECISION=1e-6 + echo "SOLVERS_PRECISION environment variable not set - assuming \"${SOLVERS_PRECISION}\"" 1>&2 fi if [ ! "${SOLVERS_MAX_ITERATIONS}" ]; then - echo "SOLVERS_MAX_ITERATIONS environment variable not set - assuming \"10000\"" 1>&2 SOLVERS_MAX_ITERATIONS=10000 + echo "SOLVERS_MAX_ITERATIONS environment variable not set - assuming \"${SOLVERS_MAX_ITERATIONS}\"" 1>&2 +fi + +if [ ! "${SOLVERS_GMRES_RESTART}" ]; then + SOLVERS_GMRES_RESTART=100 + echo "SOLVERS_GMRES_RESTART environment variable not set - assuming \"${SOLVERS_GMRES_RESTART}\"" 1>&2 fi if [ ! "${SYSTEM_NAME}" ]; then - echo "SYSTEM_MANE environment variable not set - assuming \"unknown\"" 1>&2 SYSTEM_NAME="unknown" + echo "SYSTEM_MANE environment variable not set - assuming \"${SYSTEM_NAME}\"" 1>&2 fi if [ ! "${DEVICE_ID}" ]; then - echo "DEVICE_ID environment variable not set - assuming \"0\"" 1>&2 DEVICE_ID="0" + echo "DEVICE_ID environment variable not set - assuming \"${DEVICE_ID}\"" 1>&2 +fi + +if [ ! "${SOLVERS_JACOBI_MAX_BS}" ]; then + SOLVERS_JACOBI_MAX_BS="32" + echo "SOLVERS_JACOBI_MAX_BS environment variable not set - assuming \"${SOLVERS_JACOBI_MAX_BS}\"" 1>&2 +fi + +if [ ! "${BENCHMARK_PRECISION}" ]; then + BENCHMARK_PRECISION="double" + echo "BENCHMARK_PRECISION not set - assuming \"${BENCHMARK_PRECISION}\"" 1>&2 +fi + +if [ "${BENCHMARK_PRECISION}" == "double" ]; then + BENCH_SUFFIX="" +elif [ "${BENCHMARK_PRECISION}" == "single" ]; then + BENCH_SUFFIX="_single" +elif [ "${BENCHMARK_PRECISION}" == "dcomplex" ]; then + BENCH_SUFFIX="_dcomplex" +elif [ "${BENCHMARK_PRECISION}" == "scomplex" ]; then + BENCH_SUFFIX="_scomplex" +else + echo "BENCHMARK_PRECISION is set to the not supported \"${BENCHMARK_PRECISION}\"." 1>&2 + echo "Currently supported values: \"double\", \"single\", \"dcomplex\" and \"scomplex\"" 1>&2 + exit 1 +fi + +if [ ! "${SOLVERS_RHS}" ]; then + SOLVERS_RHS="1" + echo "SOLVERS_RHS environment variable not set - assuming \"${SOLVERS_RHS}\"" 1>&2 +fi + +if [ "${SOLVERS_RHS}" == "random" ]; then + SOLVERS_RHS_FLAG="--rhs_generation=random" +elif [ "${SOLVERS_RHS}" == "1" ]; then + SOLVERS_RHS_FLAG="--rhs_generation=1" +elif [ "${SOLVERS_RHS}" == "sinus" ]; then + SOLVERS_RHS_FLAG="--rhs_generation=sinus" +else + echo "SOLVERS_RHS does not support the value \"${SOLVERS_RHS}\"." 1>&2 + echo "The following values are supported: \"1\", \"random\" and \"sinus\"" 1>&2 + exit 1 +fi + +if [ ! "${SOLVERS_INITIAL_GUESS}" ]; then + SOLVERS_INITIAL_GUESS="rhs" + echo "SOLVERS_RHS environment variable not set - assuming \"${SOLVERS_INITIAL_GUESS}\"" 1>&2 +fi + +if [ "${SOLVERS_INITIAL_GUESS}" == "random" ]; then + SOLVERS_INITIAL_GUESS_FLAG="--initial_guess_generation=random" +elif [ "${SOLVERS_INITIAL_GUESS}" == "0" ]; then + SOLVERS_INITIAL_GUESS_FLAG="--initial_guess_generation=0" +elif [ "${SOLVERS_INITIAL_GUESS}" == "rhs" ]; then + SOLVERS_INITIAL_GUESS_FLAG="--initial_guess_generation=rhs" +else + echo "SOLVERS_RHS does not support the value \"${SOLVERS_RHS}\"." 1>&2 + echo "The following values are supported: \"0\", \"random\" and \"rhs\"" 1>&2 + exit 1 +fi + +if [ ! "${GPU_TIMER}" ]; then + GPU_TIMER="false" + echo "GPU_TIMER environment variable not set - assuming \"${GPU_TIMER}\"" 1>&2 fi # Control whether to run detailed benchmarks or not. @@ -116,7 +199,7 @@ keep_latest() { compute_matrix_statistics() { [ "${DRY_RUN}" == "true" ] && return cp "$1" "$1.imd" # make sure we're not loosing the original input - ./matrix_statistics/matrix_statistics \ + ./matrix_statistics/matrix_statistics${BENCH_SUFFIX} \ --backup="$1.bkp" --double_buffer="$1.bkp2" \ <"$1.imd" 2>&1 >"$1" keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd" @@ -131,9 +214,11 @@ compute_matrix_statistics() { run_conversion_benchmarks() { [ "${DRY_RUN}" == "true" ] && return cp "$1" "$1.imd" # make sure we're not loosing the original input - ./conversions/conversions --backup="$1.bkp" --double_buffer="$1.bkp2" \ + ./conversions/conversions${BENCH_SUFFIX} --backup="$1.bkp" --double_buffer="$1.bkp2" \ --executor="${EXECUTOR}" --formats="${FORMATS}" \ - --device_id="${DEVICE_ID}" \ + --device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \ + --repetitions="${REPETITIONS}" \ + --ell_imbalance_limit="${ELL_IMBALANCE_LIMIT}" \ <"$1.imd" 2>&1 >"$1" keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd" } @@ -147,9 +232,11 @@ run_conversion_benchmarks() { run_spmv_benchmarks() { [ "${DRY_RUN}" == "true" ] && return cp "$1" "$1.imd" # make sure we're not loosing the original input - ./spmv/spmv --backup="$1.bkp" --double_buffer="$1.bkp2" \ + ./spmv/spmv${BENCH_SUFFIX} --backup="$1.bkp" --double_buffer="$1.bkp2" \ --executor="${EXECUTOR}" --formats="${FORMATS}" \ - --device_id="${DEVICE_ID}" \ + --device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \ + --repetitions="${REPETITIONS}" \ + --ell_imbalance_limit="${ELL_IMBALANCE_LIMIT}" \ <"$1.imd" 2>&1 >"$1" keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd" } @@ -163,11 +250,15 @@ run_spmv_benchmarks() { run_solver_benchmarks() { [ "${DRY_RUN}" == "true" ] && return cp "$1" "$1.imd" # make sure we're not loosing the original input - ./solver/solver --backup="$1.bkp" --double_buffer="$1.bkp2" \ + ./solver/solver${BENCH_SUFFIX} --backup="$1.bkp" --double_buffer="$1.bkp2" \ --executor="${EXECUTOR}" --solvers="${SOLVERS}" \ --preconditioners="${PRECONDS}" \ --max_iters=${SOLVERS_MAX_ITERATIONS} --rel_res_goal=${SOLVERS_PRECISION} \ - ${DETAILED_STR} --device_id="${DEVICE_ID}" \ + ${SOLVERS_RHS_FLAG} ${DETAILED_STR} ${SOLVERS_INITIAL_GUESS_FLAG} \ + --gpu_timer=${GPU_TIMER} \ + --jacobi_max_block_size=${SOLVERS_JACOBI_MAX_BS} --device_id="${DEVICE_ID}" \ + --gmres_restart="${SOLVERS_GMRES_RESTART}" \ + --repetitions="${SOLVER_REPETITIONS}" \ <"$1.imd" 2>&1 >"$1" keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd" } @@ -188,12 +279,13 @@ run_preconditioner_benchmarks() { for prec in ${PRECISIONS}; do echo -e "\t\t running jacobi ($prec) for block size ${bsize}" 1>&2 cp "$1" "$1.imd" # make sure we're not loosing the original input - ./preconditioner/preconditioner \ + ./preconditioner/preconditioner${BENCH_SUFFIX} \ --backup="$1.bkp" --double_buffer="$1.bkp2" \ --executor="${EXECUTOR}" --preconditioners="jacobi" \ - --max_block_size="${bsize}" \ - --storage_optimization="${prec}" \ - --device_id="${DEVICE_ID}" \ + --jacobi_max_block_size="${bsize}" \ + --jacobi_storage="${prec}" \ + --device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \ + --repetitions="${REPETITIONS}" \ <"$1.imd" 2>&1 >"$1" keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd" done @@ -342,7 +434,7 @@ EOT generate_problem() { [ "${DRY_RUN}" == "true" ] && return cp "$1" "$1.tmp" - ./matrix_generator/matrix_generator <"$1.tmp" 2>&1 >"$1" + ./matrix_generator/matrix_generator${BENCH_SUFFIX} <"$1.tmp" 2>&1 >"$1" keep_latest "$1" "$1.tmp" } diff --git a/benchmark/solver/CMakeLists.txt b/benchmark/solver/CMakeLists.txt index 1faae042b24..11c08bdf6d2 100644 --- a/benchmark/solver/CMakeLists.txt +++ b/benchmark/solver/CMakeLists.txt @@ -1,8 +1 @@ -add_executable(solver solver.cpp) -target_link_libraries(solver ginkgo gflags rapidjson) -if (GINKGO_BUILD_CUDA) - ginkgo_benchmark_cusp_linops(solver) -endif() -if (GINKGO_BUILD_HIP) - ginkgo_benchmark_hipsp_linops(solver) -endif() \ No newline at end of file +ginkgo_add_typed_benchmark_executables(solver "YES" solver.cpp) diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index f043977ab9a..77d72c94bdf 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -41,16 +41,23 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include +#include +#include #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/overhead_linop.hpp" +#include "benchmark/utils/preconditioners.hpp" +#include "benchmark/utils/timer.hpp" +#include "benchmark/utils/types.hpp" -// some Ginkgo shortcuts -using etype = double; +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING // Command-line arguments @@ -59,29 +66,46 @@ DEFINE_uint32(max_iters, 1000, DEFINE_double(rel_res_goal, 1e-6, "The relative residual goal of the solver"); -DEFINE_string( - solvers, "cg", - "A comma-separated list of solvers to run. " - "Supported values are: bicgstab, bicg, cg, cgs, fcg, gmres, overhead"); - -DEFINE_string( - preconditioners, "none", - "A comma-separated list of preconditioners to use. " - "Supported values are: none, jacobi, adaptive-jacobi, parict, parilu, " - "parilut, ilu, overhead"); - -DEFINE_uint32(parilu_iterations, 5, - "The number of iterations for ParICT/ParILU(T)"); +DEFINE_bool( + rel_residual, false, + "Use relative residual instead of residual reduction stopping criterion"); -DEFINE_bool(parilut_approx_select, true, - "Use approximate selection for ParICT/ParILUT"); - -DEFINE_double(parilut_limit, 2.0, "The fill-in limit for ParICT/ParILUT"); +DEFINE_string(solvers, "cg", + "A comma-separated list of solvers to run. " + "Supported values are: bicgstab, bicg, cb_gmres_keep, " + "cb_gmres_reduce1, cb_gmres_reduce2, cb_gmres_integer, " + "cb_gmres_ireduce1, cb_gmres_ireduce2, cg, cgs, fcg, gmres, idr, " + "lower_trs, upper_trs, overhead"); DEFINE_uint32( nrhs, 1, "The number of right hand sides. Record the residual only when nrhs == 1."); +DEFINE_uint32(gmres_restart, 100, + "What maximum dimension of the Krylov space to use in GMRES"); + +DEFINE_uint32(idr_subspace_dim, 2, + "What dimension of the subspace to use in IDR"); + +DEFINE_double( + idr_kappa, 0.7, + "the number to check whether Av_n and v_n are too close or not in IDR"); + +DEFINE_string( + rhs_generation, "1", + "Method used to generate the right hand side. Supported values are:" + "`1`, `random`, `sinus`. `1` sets all values of the right hand side to 1, " + "`random` assigns the values to a uniformly distributed random number " + "in [-1, 1), and `sinus` assigns b = A * (s / |s|) with A := system matrix," + " s := vector with s(idx) = sin(idx) for non-complex types, and " + "s(idx) = sin(2*idx) + i * sin(2*idx+1)."); + +DEFINE_string( + initial_guess_generation, "rhs", + "Method used to generate the initial guess. Supported values are: " + "`random`, `rhs`, `0`. `random` uses a random vector, `rhs` uses the right " + "hand side, and `0 uses a zero vector as the initial guess."); + // This allows to benchmark the overhead of a solver by using the following // data: A=[1.0], x=[0.0], b=[nan]. This data can be used to benchmark normal // solvers or using the argument --solvers=overhead, a minimal solver will be @@ -96,7 +120,8 @@ DEFINE_bool(overhead, false, std::cerr << "Input has to be a JSON array of matrix configurations:\n" << " [\n" << " { \"filename\": \"my_file.mtx\", \"optimal\": { " - "\"spmv\": \"\" } },\n" + "\"spmv\": \"\" },\n" + " \"rhs\": \"my_file_rhs.mtx\" },\n" << " { \"filename\": \"my_file2.mtx\", \"optimal\": { " "\"spmv\": \"\" } }\n" << " ]" << std::endl; @@ -104,160 +129,189 @@ DEFINE_bool(overhead, false, } +template +std::unique_ptr> generate_rhs( + std::shared_ptr exec, + std::shared_ptr system_matrix, Engine engine) +{ + gko::dim<2> vec_size{system_matrix->get_size()[0], FLAGS_nrhs}; + if (FLAGS_rhs_generation == "1") { + return create_matrix(exec, vec_size, gko::one()); + } else if (FLAGS_rhs_generation == "random") { + return create_matrix(exec, vec_size, engine); + } else if (FLAGS_rhs_generation == "sinus") { + auto rhs = vec::create(exec, vec_size); + + auto tmp = create_matrix_sin(exec, vec_size); + auto scalar = gko::matrix::Dense::create( + exec->get_master(), gko::dim<2>{1, vec_size[1]}); + tmp->compute_norm2(scalar.get()); + for (gko::size_type i = 0; i < vec_size[1]; ++i) { + scalar->at(0, i) = gko::one() / scalar->at(0, i); + } + // normalize sin-vector + if (gko::is_complex_s::value) { + tmp->scale(scalar->make_complex().get()); + } else { + tmp->scale(scalar.get()); + } + system_matrix->apply(tmp.get(), rhs.get()); + return rhs; + } + throw std::invalid_argument(std::string("\"rhs_generation\" = ") + + FLAGS_rhs_generation + " is not supported!"); +} + + +template +std::unique_ptr> generate_initial_guess( + std::shared_ptr exec, + std::shared_ptr system_matrix, const vec *rhs, + Engine engine) +{ + gko::dim<2> vec_size{system_matrix->get_size()[1], FLAGS_nrhs}; + if (FLAGS_initial_guess_generation == "0") { + return create_matrix(exec, vec_size, gko::zero()); + } else if (FLAGS_initial_guess_generation == "random") { + return create_matrix(exec, vec_size, engine); + } else if (FLAGS_initial_guess_generation == "rhs") { + return rhs->clone(); + } + throw std::invalid_argument(std::string("\"initial_guess_generation\" = ") + + FLAGS_initial_guess_generation + + " is not supported!"); +} + + void validate_option_object(const rapidjson::Value &value) { if (!value.IsObject() || !value.HasMember("optimal") || !value["optimal"].HasMember("spmv") || !value["optimal"]["spmv"].IsString() || !value.HasMember("filename") || - !value["filename"].IsString()) { + !value["filename"].IsString() || + (value.HasMember("rhs") && !value["rhs"].IsString())) { print_config_error_and_exit(); } } -// solver mapping -template -std::unique_ptr create_solver( - std::shared_ptr exec, +std::shared_ptr create_criterion( + std::shared_ptr exec) +{ + std::shared_ptr residual_stop; + if (FLAGS_rel_residual) { + residual_stop = gko::share( + gko::stop::RelativeResidualNorm::build() + .with_tolerance(static_cast(FLAGS_rel_res_goal)) + .on(exec)); + } else { + residual_stop = + gko::share(gko::stop::ResidualNorm::build() + .with_reduction_factor( + static_cast(FLAGS_rel_res_goal)) + .on(exec)); + } + auto iteration_stop = gko::share( + gko::stop::Iteration::build().with_max_iters(FLAGS_max_iters).on(exec)); + std::vector> + criterion_vector{residual_stop, iteration_stop}; + return gko::stop::combine(criterion_vector); +} + + +template +std::unique_ptr add_criteria_precond_finalize( + SolverIntermediate inter, const std::shared_ptr &exec, std::shared_ptr precond) { - return SolverType::build() - .with_criteria(gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(FLAGS_rel_res_goal) - .on(exec), - gko::stop::Iteration::build() - .with_max_iters(FLAGS_max_iters) - .on(exec)) + return inter.with_criteria(create_criterion(exec)) .with_preconditioner(give(precond)) .on(exec); } -const std::map( - std::shared_ptr, - std::shared_ptr)>> - solver_factory{{"bicgstab", create_solver>}, - {"bicg", create_solver>}, - {"cg", create_solver>}, - {"cgs", create_solver>}, - {"fcg", create_solver>}, - {"gmres", create_solver>}, - {"overhead", create_solver>}}; - - -// TODO: Workaround until GPU matrix conversions are implemented -// The factory will wrap another factory, and make sure that the -// input operator is copied to the reference executor, and then sent -// through the generate function -struct ReferenceFactoryWrapper - : gko::EnablePolymorphicObject { - ReferenceFactoryWrapper(std::shared_ptr exec) - : gko::EnablePolymorphicObject(exec) - {} - - ReferenceFactoryWrapper(std::shared_ptr f) - : gko::EnablePolymorphicObject(f->get_executor()), - base_factory{f} - {} - - std::shared_ptr exec{gko::ReferenceExecutor::create()}; - std::shared_ptr base_factory; - -protected: - std::unique_ptr generate_impl( - std::shared_ptr op) const override - { - return base_factory->generate(gko::clone(exec, op)); +template +std::unique_ptr add_criteria_precond_finalize( + const std::shared_ptr &exec, + std::shared_ptr precond) +{ + return add_criteria_precond_finalize(Solver::build(), exec, precond); +} + + +std::unique_ptr generate_solver( + const std::shared_ptr &exec, + std::shared_ptr precond, + const std::string &description) +{ + std::string cb_gmres_prefix("cb_gmres_"); + if (description.find(cb_gmres_prefix) == 0) { + auto s_prec = gko::solver::cb_gmres::storage_precision::keep; + const auto spec = description.substr(cb_gmres_prefix.length()); + if (spec == "keep") { + s_prec = gko::solver::cb_gmres::storage_precision::keep; + } else if (spec == "reduce1") { + s_prec = gko::solver::cb_gmres::storage_precision::reduce1; + } else if (spec == "reduce2") { + s_prec = gko::solver::cb_gmres::storage_precision::reduce2; + } else if (spec == "integer") { + s_prec = gko::solver::cb_gmres::storage_precision::integer; + } else if (spec == "ireduce1") { + s_prec = gko::solver::cb_gmres::storage_precision::ireduce1; + } else if (spec == "ireduce2") { + s_prec = gko::solver::cb_gmres::storage_precision::ireduce2; + } else { + throw std::range_error( + std::string( + "CB-GMRES does not have a corresponding solver to <") + + description + ">!"); + } + return add_criteria_precond_finalize( + gko::solver::CbGmres::build() + .with_krylov_dim(FLAGS_gmres_restart) + .with_storage_precision(s_prec), + exec, precond); + } else if (description == "bicgstab") { + return add_criteria_precond_finalize>( + exec, precond); + } else if (description == "bicg") { + return add_criteria_precond_finalize>(exec, + precond); + } else if (description == "cg") { + return add_criteria_precond_finalize>(exec, + precond); + } else if (description == "cgs") { + return add_criteria_precond_finalize>(exec, + precond); + } else if (description == "fcg") { + return add_criteria_precond_finalize>(exec, + precond); + } else if (description == "idr") { + return add_criteria_precond_finalize( + gko::solver::Idr::build() + .with_subspace_dim(FLAGS_idr_subspace_dim) + .with_kappa(static_cast(FLAGS_idr_kappa)), + exec, precond); + } else if (description == "gmres") { + return add_criteria_precond_finalize( + gko::solver::Gmres::build().with_krylov_dim( + FLAGS_gmres_restart), + exec, precond); + } else if (description == "lower_trs") { + return gko::solver::LowerTrs::build() + .with_num_rhs(FLAGS_nrhs) + .on(exec); + } else if (description == "upper_trs") { + return gko::solver::UpperTrs::build() + .with_num_rhs(FLAGS_nrhs) + .on(exec); + } else if (description == "overhead") { + return add_criteria_precond_finalize>(exec, + precond); } -}; - - -const std::map( - std::shared_ptr)>> - precond_factory{ - {"none", - [](std::shared_ptr exec) { - return gko::matrix::IdentityFactory<>::create(exec); - }}, - {"jacobi", - [](std::shared_ptr exec) { - std::shared_ptr f = - gko::preconditioner::Jacobi<>::build().on(exec); - return std::unique_ptr( - new ReferenceFactoryWrapper(f)); - }}, - {"adaptive-jacobi", - [](std::shared_ptr exec) { - std::shared_ptr f = - gko::preconditioner::Jacobi<>::build() - .with_storage_optimization( - gko::precision_reduction::autodetect()) - .on(exec); - return std::unique_ptr( - new ReferenceFactoryWrapper(f)); - }}, - {"parict", - [](std::shared_ptr exec) { - auto fact = std::shared_ptr( - gko::factorization::ParIct<>::build() - .with_iterations(FLAGS_parilu_iterations) - .with_approximate_select(FLAGS_parilut_approx_select) - .with_fill_in_limit(FLAGS_parilut_limit) - .on(exec)); - std::shared_ptr f = - gko::preconditioner::Ilu<>::build() - .with_factorization_factory(fact) - .on(exec); - return std::unique_ptr( - new ReferenceFactoryWrapper(f)); - }}, - {"parilu", - [](std::shared_ptr exec) { - auto fact = std::shared_ptr( - gko::factorization::ParIlu<>::build() - .with_iterations(FLAGS_parilu_iterations) - .on(exec)); - std::shared_ptr f = - gko::preconditioner::Ilu<>::build() - .with_factorization_factory(fact) - .on(exec); - return std::unique_ptr( - new ReferenceFactoryWrapper(f)); - }}, - {"parilut", - [](std::shared_ptr exec) { - auto fact = std::shared_ptr( - gko::factorization::ParIlut<>::build() - .with_iterations(FLAGS_parilu_iterations) - .with_approximate_select(FLAGS_parilut_approx_select) - .with_fill_in_limit(FLAGS_parilut_limit) - .on(exec)); - std::shared_ptr f = - gko::preconditioner::Ilu<>::build() - .with_factorization_factory(fact) - .on(exec); - return std::unique_ptr( - new ReferenceFactoryWrapper(f)); - }}, - {"ilu", - [](std::shared_ptr exec) { - auto fact = std::shared_ptr( - gko::factorization::Ilu<>::build().on(exec)); - std::shared_ptr f = - gko::preconditioner::Ilu<>::build() - .with_factorization_factory(fact) - .on(exec); - return std::unique_ptr( - new ReferenceFactoryWrapper(f)); - }}, - {"overhead", [](std::shared_ptr exec) { - std::shared_ptr f = - gko::Overhead<>::build().on(exec); - return std::unique_ptr( - new ReferenceFactoryWrapper(f)); - }}}; + throw std::range_error(std::string("The provided string <") + description + + "> does not match any solver!"); +} void write_precond_info(const gko::LinOp *precond, @@ -328,9 +382,11 @@ void solve_system(const std::string &solver_name, rapidjson::Value(rapidjson::kArrayType), allocator); add_or_set_member(solver_json, "true_residuals", rapidjson::Value(rapidjson::kArrayType), allocator); + add_or_set_member(solver_json, "implicit_residuals", + rapidjson::Value(rapidjson::kArrayType), allocator); add_or_set_member(solver_json, "iteration_timestamps", rapidjson::Value(rapidjson::kArrayType), allocator); - if (FLAGS_nrhs == 1 && !FLAGS_overhead) { + if (b->get_size()[1] == 1 && !FLAGS_overhead) { auto rhs_norm = compute_norm2(lend(b)); add_or_set_member(solver_json, "rhs_norm", rhs_norm, allocator); } @@ -343,12 +399,14 @@ void solve_system(const std::string &solver_name, allocator); } + IterationControl ic{get_timer(exec, FLAGS_gpu_timer)}; + // warm run auto it_logger = std::make_shared(exec); - for (unsigned int i = 0; i < FLAGS_warmup; i++) { + for (auto _ : ic.warmup_run()) { auto x_clone = clone(x); auto precond = precond_factory.at(precond_name)(exec); - auto solver = solver_factory.at(solver_name)(exec, give(precond)) + auto solver = generate_solver(exec, give(precond), solver_name) ->generate(system_matrix); solver->add_logger(it_logger); solver->apply(lend(b), lend(x_clone)); @@ -369,7 +427,7 @@ void solve_system(const std::string &solver_name, exec->add_logger(gen_logger); auto precond = precond_factory.at(precond_name)(exec); - auto solver = solver_factory.at(solver_name)(exec, give(precond)) + auto solver = generate_solver(exec, give(precond), solver_name) ->generate(system_matrix); exec->remove_logger(gko::lend(gen_logger)); @@ -397,70 +455,66 @@ void solve_system(const std::string &solver_name, allocator, 1); // slow run, gets the recurrent and true residuals of each iteration - if (FLAGS_nrhs == 1) { + if (b->get_size()[1] == 1) { x_clone = clone(x); auto res_logger = std::make_shared>( exec, lend(system_matrix), b, solver_json["recurrent_residuals"], solver_json["true_residuals"], + solver_json["implicit_residuals"], solver_json["iteration_timestamps"], allocator); solver->add_logger(res_logger); solver->apply(lend(b), lend(x_clone)); + if (!res_logger->has_implicit_res_norms()) { + solver_json.RemoveMember("implicit_residuals"); + } } exec->synchronize(); } // timed run - std::chrono::nanoseconds apply_time(0); - std::chrono::nanoseconds generate_time(0); - for (unsigned int i = 0; i < FLAGS_repetitions; i++) { - auto x_clone = clone(x); + auto generate_timer = get_timer(exec, FLAGS_gpu_timer); + auto apply_timer = ic.get_timer(); + auto x_clone = clone(x); + for (auto status : ic.run(false)) { + x_clone = clone(x); exec->synchronize(); - auto g_tic = std::chrono::steady_clock::now(); - + generate_timer->tic(); auto precond = precond_factory.at(precond_name)(exec); - auto solver = solver_factory.at(solver_name)(exec, give(precond)) + auto solver = generate_solver(exec, give(precond), solver_name) ->generate(system_matrix); + generate_timer->toc(); exec->synchronize(); - auto g_tac = std::chrono::steady_clock::now(); - generate_time += - std::chrono::duration_cast(g_tac - - g_tic); - - exec->synchronize(); - auto a_tic = std::chrono::steady_clock::now(); - + apply_timer->tic(); solver->apply(lend(b), lend(x_clone)); - - exec->synchronize(); - auto a_tac = std::chrono::steady_clock::now(); - apply_time += std::chrono::duration_cast( - a_tac - a_tic); - - if (FLAGS_nrhs == 1 && i == FLAGS_repetitions - 1 && - !FLAGS_overhead) { - auto residual = compute_residual_norm(lend(system_matrix), - lend(b), lend(x_clone)); - add_or_set_member(solver_json, "residual_norm", residual, - allocator); - } + apply_timer->toc(); + } + if (b->get_size()[1] == 1 && !FLAGS_overhead) { + auto residual = compute_residual_norm(lend(system_matrix), lend(b), + lend(x_clone)); + add_or_set_member(solver_json, "residual_norm", residual, + allocator); } - add_or_set_member( - solver_json["generate"], "time", - static_cast(generate_time.count()) / FLAGS_repetitions, - allocator); - add_or_set_member( - solver_json["apply"], "time", - static_cast(apply_time.count()) / FLAGS_repetitions, - allocator); + add_or_set_member(solver_json["generate"], "time", + generate_timer->compute_average_time(), allocator); + add_or_set_member(solver_json["apply"], "time", + apply_timer->compute_average_time(), allocator); + add_or_set_member(solver_json, "repetitions", + apply_timer->get_num_repetitions(), allocator); // compute and write benchmark data add_or_set_member(solver_json, "completed", true, allocator); } catch (const std::exception &e) { add_or_set_member(test_case["solver"][precond_solver_name], "completed", false, allocator); + if (FLAGS_keep_errors) { + rapidjson::Value msg_value; + msg_value.SetString(e.what(), allocator); + add_or_set_member(test_case["solver"][precond_solver_name], "error", + msg_value, allocator); + } std::cerr << "Error when processing test case " << test_case << "\n" << "what(): " << e.what() << std::endl; } @@ -470,13 +524,15 @@ void solve_system(const std::string &solver_name, int main(int argc, char *argv[]) { // Set the default repetitions = 1. - FLAGS_repetitions = 1; + FLAGS_repetitions = "1"; + FLAGS_min_repetitions = 1; std::string header = "A benchmark for measuring performance of Ginkgo's solvers.\n"; std::string format = std::string() + " [\n" + " { \"filename\": \"my_file.mtx\", \"optimal\": { " - "\"spmv\": \"\" } },\n" + + "\"spmv\": \"\" },\n" + " \"rhs\": \"my_file_rhs.mtx\" },\n" + " { \"filename\": \"my_file2.mtx\", \"optimal\": { " "\"spmv\": \"\" } }\n" + " ]\n\n" + @@ -484,12 +540,14 @@ int main(int argc, char *argv[]) "format\n\n"; initialize_argument_parsing(&argc, &argv, header, format); - std::string extra_information = "Running " + FLAGS_solvers + " with " + - std::to_string(FLAGS_max_iters) + - " iterations and residual goal of " + - std::to_string(FLAGS_rel_res_goal) + - "\nThe number of right hand sides is " + - std::to_string(FLAGS_nrhs) + "\n"; + std::stringstream ss_rel_res_goal; + ss_rel_res_goal << std::scientific << FLAGS_rel_res_goal; + + std::string extra_information = + "Running " + FLAGS_solvers + " with " + + std::to_string(FLAGS_max_iters) + " iterations and residual goal of " + + ss_rel_res_goal.str() + "\nThe number of right hand sides is " + + std::to_string(FLAGS_nrhs) + "\n"; print_general_information(extra_information); auto exec = get_executor(); @@ -541,24 +599,27 @@ int main(int argc, char *argv[]) std::clog << "Running test case: " << test_case << std::endl; std::ifstream mtx_fd(test_case["filename"].GetString()); - using Vec = gko::matrix::Dense<>; + using Vec = gko::matrix::Dense; std::shared_ptr system_matrix; std::unique_ptr b; std::unique_ptr x; if (FLAGS_overhead) { system_matrix = gko::initialize({1.0}, exec); - b = gko::initialize({std::nan("")}, exec); + b = gko::initialize( + {std::numeric_limits::quiet_NaN()}, exec); x = gko::initialize({0.0}, exec); } else { - auto data = gko::read_raw(mtx_fd); + auto data = gko::read_raw(mtx_fd); system_matrix = share(formats::matrix_factory.at( test_case["optimal"]["spmv"].GetString())(exec, data)); - b = create_matrix( - exec, gko::dim<2>{system_matrix->get_size()[0], FLAGS_nrhs}, - engine); - x = create_matrix( - exec, - gko::dim<2>{system_matrix->get_size()[0], FLAGS_nrhs}); + if (test_case.HasMember("rhs")) { + std::ifstream rhs_fd{test_case["rhs"].GetString()}; + b = gko::read(rhs_fd, exec); + } else { + b = generate_rhs(exec, system_matrix, engine); + } + x = generate_initial_guess(exec, system_matrix, b.get(), + engine); } std::clog << "Matrix is of size (" << system_matrix->get_size()[0] diff --git a/benchmark/spmv/CMakeLists.txt b/benchmark/spmv/CMakeLists.txt index 222d3f750b4..6c5d10517a8 100644 --- a/benchmark/spmv/CMakeLists.txt +++ b/benchmark/spmv/CMakeLists.txt @@ -1,8 +1 @@ -add_executable(spmv spmv.cpp) -target_link_libraries(spmv ginkgo gflags rapidjson) -if (GINKGO_BUILD_CUDA) - ginkgo_benchmark_cusp_linops(spmv) -endif() -if (GINKGO_BUILD_HIP) - ginkgo_benchmark_hipsp_linops(spmv) -endif() +ginkgo_add_typed_benchmark_executables(spmv "YES" spmv.cpp) diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp index 07debcf9426..ac7562e4ed6 100644 --- a/benchmark/spmv/spmv.cpp +++ b/benchmark/spmv/spmv.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -47,20 +47,23 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/general.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/spmv_common.hpp" +#include "benchmark/utils/timer.hpp" +#include "benchmark/utils/types.hpp" -using etype = double; +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING // Command-line arguments - DEFINE_uint32(nrhs, 1, "The number of right hand sides"); // This function supposes that management of `FLAGS_overwrite` is done before // calling it void apply_spmv(const char *format_name, std::shared_ptr exec, - const gko::matrix_data &data, const vec *b, + const gko::matrix_data &data, const vec *b, const vec *x, const vec *answer, rapidjson::Value &test_case, rapidjson::MemoryPoolAllocator<> &allocator) @@ -83,40 +86,80 @@ void apply_spmv(const char *format_name, std::shared_ptr exec, exec->synchronize(); system_matrix->apply(lend(b), lend(x_clone)); exec->synchronize(); - double max_relative_norm2 = + auto max_relative_norm2 = compute_max_relative_norm2(lend(x_clone), lend(answer)); add_or_set_member(spmv_case[format_name], "max_relative_norm2", max_relative_norm2, allocator); } + + IterationControl ic{get_timer(exec, FLAGS_gpu_timer)}; // warm run - for (unsigned int i = 0; i < FLAGS_warmup; i++) { + for (auto _ : ic.warmup_run()) { auto x_clone = clone(x); exec->synchronize(); system_matrix->apply(lend(b), lend(x_clone)); exec->synchronize(); } - std::chrono::nanoseconds time(0); - // timed run - for (unsigned int i = 0; i < FLAGS_repetitions; i++) { + + // tuning run +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING + auto &format_case = spmv_case[format_name]; + if (!format_case.HasMember("tuning")) { + format_case.AddMember( + "tuning", rapidjson::Value(rapidjson::kObjectType), allocator); + } + auto &tuning_case = format_case["tuning"]; + add_or_set_member(tuning_case, "time", + rapidjson::Value(rapidjson::kArrayType), allocator); + add_or_set_member(tuning_case, "values", + rapidjson::Value(rapidjson::kArrayType), allocator); + + // Enable tuning for this portion of code + gko::_tuning_flag = true; + // Select some values we want to tune. + std::vector tuning_values{ + 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; + for (auto val : tuning_values) { + // Actually set the value that will be tuned. See + // cuda/components/format_conversion.cuh for an example of how this + // variable is used. + gko::_tuned_value = val; + auto tuning_timer = get_timer(exec, FLAGS_gpu_timer); + IterationControl ic_tuning{tuning_timer}; auto x_clone = clone(x); - exec->synchronize(); - auto tic = std::chrono::steady_clock::now(); - system_matrix->apply(lend(b), lend(x_clone)); + for (auto _ : ic_tuning.run()) { + system_matrix->apply(lend(b), lend(x_clone)); + } + tuning_case["time"].PushBack(ic_tuning.compute_average_time(), + allocator); + tuning_case["values"].PushBack(val, allocator); + } + // We put back the flag to false to use the default (non-tuned) values + // for the following + gko::_tuning_flag = false; +#endif // GINKGO_BENCHMARK_ENABLE_TUNING - exec->synchronize(); - auto toc = std::chrono::steady_clock::now(); - time += - std::chrono::duration_cast(toc - tic); + // timed run + auto x_clone = clone(x); + for (auto _ : ic.run()) { + system_matrix->apply(lend(b), lend(x_clone)); } add_or_set_member(spmv_case[format_name], "time", - static_cast(time.count()) / FLAGS_repetitions, - allocator); + ic.compute_average_time(), allocator); + add_or_set_member(spmv_case[format_name], "repetitions", + ic.get_num_repetitions(), allocator); // compute and write benchmark data add_or_set_member(spmv_case[format_name], "completed", true, allocator); } catch (const std::exception &e) { add_or_set_member(test_case["spmv"][format_name], "completed", false, allocator); + if (FLAGS_keep_errors) { + rapidjson::Value msg_value; + msg_value.SetString(e.what(), allocator); + add_or_set_member(test_case["spmv"][format_name], "error", + msg_value, allocator); + } std::cerr << "Error when processing test case " << test_case << "\n" << "what(): " << e.what() << std::endl; } @@ -169,7 +212,7 @@ int main(int argc, char *argv[]) } std::clog << "Running test case: " << test_case << std::endl; std::ifstream mtx_fd(test_case["filename"].GetString()); - auto data = gko::read_raw(mtx_fd); + auto data = gko::read_raw(mtx_fd); auto nrhs = FLAGS_nrhs; auto b = create_matrix(exec, gko::dim<2>{data.size[1], nrhs}, diff --git a/benchmark/utils/cuda_linops.hpp b/benchmark/utils/cuda_linops.hpp index 08f5ea7b61d..d64ec8b3ebd 100644 --- a/benchmark/utils/cuda_linops.hpp +++ b/benchmark/utils/cuda_linops.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "benchmark/utils/types.hpp" #include "cuda/base/cusparse_bindings.hpp" #include "cuda/base/device_guard.hpp" #include "cuda/base/pointer_mode_guard.hpp" @@ -147,7 +148,7 @@ class CuspCsrmp protected: void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override { - auto dense_b = gko::as>(b); + auto dense_b = gko::as>(b); auto dense_x = gko::as>(x); auto db = dense_b->get_const_values(); auto dx = dense_x->get_values(); @@ -328,21 +329,6 @@ class CuspCsrEx return csr_->get_num_stored_elements(); } - ~CuspCsrEx() override - { - const auto id = this->get_gpu_exec()->get_device_id(); - if (set_buffer_) { - try { - gko::cuda::device_guard g{id}; - GKO_ASSERT_NO_CUDA_ERRORS(cudaFree(buffer_)); - } catch (const std::exception &e) { - std::cerr - << "Error when unallocating CuspCsrEx temporary buffer: " - << e.what() << std::endl; - } - } - } - CuspCsrEx(const CuspCsrEx &other) = delete; CuspCsrEx &operator=(const CuspCsrEx &other) = default; @@ -370,14 +356,13 @@ class CuspCsrEx csr_->get_num_stored_elements(), &alpha, this->get_descr(), csr_->get_const_values(), csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db, &beta, dx, &buffer_size); - GKO_ASSERT_NO_CUDA_ERRORS(cudaMalloc(&buffer_, buffer_size)); - set_buffer_ = true; + buffer_.resize_and_reset(buffer_size); gko::kernels::cuda::cusparse::spmv( handle, algmode_, trans_, this->get_size()[0], this->get_size()[1], csr_->get_num_stored_elements(), &alpha, this->get_descr(), csr_->get_const_values(), csr_->get_const_row_ptrs(), - csr_->get_const_col_idxs(), db, &beta, dx, buffer_); + csr_->get_const_col_idxs(), db, &beta, dx, buffer_.get_data()); // Exiting the scope sets the pointer mode back to the default // DEVICE for Ginkgo @@ -390,7 +375,7 @@ class CuspCsrEx csr_(std::move( csr::create(exec, std::make_shared()))), trans_(CUSPARSE_OPERATION_NON_TRANSPOSE), - set_buffer_(false) + buffer_(exec) { #ifdef ALLOWMP algmode_ = CUSPARSE_ALG_MERGE_PATH; @@ -401,8 +386,7 @@ class CuspCsrEx std::shared_ptr csr_; cusparseOperation_t trans_; cusparseAlgMode_t algmode_; - mutable void *buffer_; - mutable bool set_buffer_; + mutable gko::Array buffer_; }; @@ -498,7 +482,7 @@ class CuspHybrid #if defined(CUDA_VERSION) && \ (CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) template @@ -694,42 +678,41 @@ class CuspGenericCoo #endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >= - // 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + // 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) } // namespace detail // Some shortcuts -using cusp_csrex = detail::CuspCsrEx<>; +using cusp_csrex = detail::CuspCsrEx; #if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) -using cusp_csr = detail::CuspCsr<>; -using cusp_csrmp = detail::CuspCsrmp<>; -using cusp_csrmm = detail::CuspCsrmm<>; +using cusp_csr = detail::CuspCsr; +using cusp_csrmp = detail::CuspCsrmp; +using cusp_csrmm = detail::CuspCsrmm; #endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11000) #if defined(CUDA_VERSION) && \ (CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) -using cusp_gcsr = detail::CuspGenericCsr<>; -using cusp_gcsr2 = - detail::CuspGenericCsr; -using cusp_gcoo = detail::CuspGenericCoo<>; +using cusp_gcsr = detail::CuspGenericCsr; +using cusp_gcsr2 = detail::CuspGenericCsr; +using cusp_gcoo = detail::CuspGenericCoo; #endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >= - // 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + // 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) #if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) using cusp_coo = - detail::CuspHybrid; + detail::CuspHybrid; using cusp_ell = - detail::CuspHybrid; -using cusp_hybrid = detail::CuspHybrid<>; + detail::CuspHybrid; +using cusp_hybrid = detail::CuspHybrid; #endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11000) diff --git a/benchmark/utils/formats.hpp b/benchmark/utils/formats.hpp index ca5ad5f58be..8ae4ad45e7a 100644 --- a/benchmark/utils/formats.hpp +++ b/benchmark/utils/formats.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include @@ -52,11 +53,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif // HAS_HIP +#include "benchmark/utils/types.hpp" + + namespace formats { std::string available_format = - "coo, csr, ell, sellp, hybrid, hybrid0, hybrid25, hybrid33, hybrid40, " + "coo, csr, ell, ell-mixed, sellp, hybrid, hybrid0, hybrid25, hybrid33, " + "hybrid40, " "hybrid60, hybrid80, hybridlimit0, hybridlimit25, hybridlimit33, " "hybridminstorage" #ifdef HAS_CUDA @@ -66,10 +71,10 @@ std::string available_format = #endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11000) #if defined(CUDA_VERSION) && \ (CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) ", cusp_gcsr, cusp_gcsr2, cusp_gcoo" #endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >= - // 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + // 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) #endif // HAS_CUDA #ifdef HAS_HIP ", hipsp_csr, hipsp_csrmm, hipsp_coo, hipsp_ell, hipsp_hybrid" @@ -87,6 +92,8 @@ std::string format_description = "csrm: Ginkgo's CSR implementation with merge_path strategy.\n" "ell: Ellpack format according to Bell and Garland: Efficient Sparse " "Matrix-Vector Multiplication on CUDA.\n" + "ell-mixed: Mixed Precision Ellpack format according to Bell and Garland: " + "Efficient Sparse Matrix-Vector Multiplication on CUDA.\n" "sellp: Sliced Ellpack uses a default block size of 32.\n" "hybrid: Hybrid uses ell and coo to represent the matrix.\n" "hybrid0, hybrid25, hybrid33, hybrid40, hybrid60, hybrid80: Hybrid uses " @@ -112,7 +119,7 @@ std::string format_description = "cusp_csrex: benchmark CuSPARSE with the cusparseXcsrmvEx function." #if defined(CUDA_VERSION) && \ (CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) "\n" "cusp_gcsr: benchmark CuSPARSE with the generic csr with default " "algorithm.\n" @@ -121,7 +128,7 @@ std::string format_description = "cusp_gcoo: benchmark CuSPARSE with the generic coo with default " "algorithm.\n" #endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >= - // 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + // 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) #endif // HAS_CUDA #ifdef HAS_HIP "\n" @@ -146,13 +153,17 @@ std::string format_command = // the formats command-line argument DEFINE_string(formats, "coo", formats::format_command.c_str()); +DEFINE_int64(ell_imbalance_limit, 100, + "Maximal storage overhead above which ELL benchmarks will be " + "skipped. Negative values mean no limit."); + namespace formats { // some shortcuts -using hybrid = gko::matrix::Hybrid<>; -using csr = gko::matrix::Csr<>; +using hybrid = gko::matrix::Hybrid; +using csr = gko::matrix::Csr; /** * Creates a Ginkgo matrix from the intermediate data representation format @@ -167,39 +178,125 @@ using csr = gko::matrix::Csr<>; */ template std::unique_ptr read_matrix_from_data( - std::shared_ptr exec, const gko::matrix_data<> &data) + std::shared_ptr exec, + const gko::matrix_data &data) { auto mat = MatrixType::create(std::move(exec)); mat->read(data); return mat; } + +/** + * Creates a CSR strategy of the given type for the given executor if possible, + * falls back to csr::classical for executors without support for this strategy. + * + * @tparam Strategy one of csr::automatical or csr::load_balance + */ +template +std::shared_ptr create_gpu_strategy( + std::shared_ptr exec) +{ + if (auto cuda = dynamic_cast(exec.get())) { + return std::make_shared(cuda->shared_from_this()); + } else if (auto hip = dynamic_cast(exec.get())) { + return std::make_shared(hip->shared_from_this()); + } else { + return std::make_shared(); + } +} + + +/** + * Checks whether the given matrix data exceeds the ELL imbalance limit set by + * the --ell_imbalance_limit flag + * + * @throws gko::Error if the imbalance limit is exceeded + */ +void check_ell_admissibility(const gko::matrix_data &data) +{ + if (data.size[0] == 0 || FLAGS_ell_imbalance_limit < 0) { + return; + } + std::vector row_lengths(data.size[0]); + for (auto nz : data.nonzeros) { + row_lengths[nz.row]++; + } + auto max_len = *std::max_element(row_lengths.begin(), row_lengths.end()); + auto avg_len = data.nonzeros.size() / std::max(data.size[0], 1); + if (max_len / avg_len > FLAGS_ell_imbalance_limit) { + throw gko::Error(__FILE__, __LINE__, + "Matrix exceeds ELL imbalance limit"); + } +} + + /** * Creates a Ginkgo matrix from the intermediate data representation format * gko::matrix_data with support for variable arguments. * * @param MATRIX_TYPE the Ginkgo matrix type (such as `gko::matrix::Csr<>`) */ -#define READ_MATRIX(MATRIX_TYPE, ...) \ - [](std::shared_ptr exec, \ - const gko::matrix_data<> &data) -> std::unique_ptr { \ - auto mat = MATRIX_TYPE::create(std::move(exec), __VA_ARGS__); \ - mat->read(data); \ - return mat; \ +#define READ_MATRIX(MATRIX_TYPE, ...) \ + [](std::shared_ptr exec, \ + const gko::matrix_data &data) \ + -> std::unique_ptr { \ + auto mat = MATRIX_TYPE::create(std::move(exec), __VA_ARGS__); \ + mat->read(data); \ + return mat; \ } // clang-format off const std::map( std::shared_ptr, - const gko::matrix_data<> &)>> + const gko::matrix_data &)>> matrix_factory{ - {"csr", READ_MATRIX(csr, std::make_shared())}, - {"csri", READ_MATRIX(csr, std::make_shared())}, + {"csr", + [](std::shared_ptr exec, + const gko::matrix_data &data) -> std::unique_ptr { + auto mat = + csr::create(exec, create_gpu_strategy(exec)); + mat->read(data); + return mat; + }}, + {"csri", + [](std::shared_ptr exec, + const gko::matrix_data &data) -> std::unique_ptr { + auto mat = csr::create( + exec, create_gpu_strategy(exec)); + mat->read(data); + return mat; + }}, {"csrm", READ_MATRIX(csr, std::make_shared())}, {"csrc", READ_MATRIX(csr, std::make_shared())}, - {"coo", read_matrix_from_data>}, - {"ell", read_matrix_from_data>}, + {"coo", read_matrix_from_data>}, + {"ell", [](std::shared_ptr exec, + const gko::matrix_data &data) { + check_ell_admissibility(data); + auto mat = gko::matrix::Ell::create(exec); + mat->read(data); + return mat; + }}, + {"ell-mixed", + [](std::shared_ptr exec, + const gko::matrix_data &data) { + check_ell_admissibility(data); + gko::matrix_data, itype> conv_data; + conv_data.size = data.size; + conv_data.nonzeros.resize(data.nonzeros.size()); + auto it = conv_data.nonzeros.begin(); + for (auto &el : data.nonzeros) { + it->row = el.row; + it->column = el.column; + it->value = el.value; + ++it; + } + auto mat = gko::matrix::Ell, itype>::create( + std::move(exec)); + mat->read(conv_data); + return mat; + }}, #ifdef HAS_CUDA #if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) {"cusp_csr", read_matrix_from_data}, @@ -208,20 +305,20 @@ const std::map( {"cusp_hybrid", read_matrix_from_data}, {"cusp_coo", read_matrix_from_data}, {"cusp_ell", read_matrix_from_data}, -#else // CUDA_VERSION >= 11000 - // cusp_csr, cusp_coo use the generic ones from CUDA 11 +#else // CUDA_VERSION >= 11000 + // cusp_csr, cusp_coo use the generic ones from CUDA 11 {"cusp_csr", read_matrix_from_data}, {"cusp_coo", read_matrix_from_data}, #endif {"cusp_csrex", read_matrix_from_data}, #if defined(CUDA_VERSION) && \ (CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) {"cusp_gcsr", read_matrix_from_data}, {"cusp_gcsr2", read_matrix_from_data}, {"cusp_gcoo", read_matrix_from_data}, #endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >= - // 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + // 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) #endif // HAS_CUDA #ifdef HAS_HIP {"hipsp_csr", read_matrix_from_data}, @@ -256,7 +353,8 @@ const std::map( {"hybridminstorage", READ_MATRIX(hybrid, std::make_shared())}, - {"sellp", read_matrix_from_data>}}; + {"sellp", read_matrix_from_data>} +}; // clang-format on diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index f61b92baeaf..d4f6a6853a8 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -58,6 +58,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "benchmark/utils/timer.hpp" +#include "benchmark/utils/types.hpp" + + // Global command-line arguments DEFINE_string(executor, "reference", "The executor used to run the benchmarks, one of: reference, " @@ -81,14 +85,38 @@ DEFINE_string(double_buffer, "", DEFINE_bool(detailed, true, "If set, performs several runs to obtain more detailed results"); +DEFINE_bool(keep_errors, false, + "If set, writes exception messages during the execution into the " + "JSON output"); + DEFINE_bool(nested_names, false, "If set, separately logs nested operations"); DEFINE_uint32(seed, 42, "Seed used for the random number generator"); DEFINE_uint32(warmup, 2, "Warm-up repetitions"); -DEFINE_uint32(repetitions, 10, - "Number of runs used to obtain an averaged result."); +DEFINE_string(repetitions, "10", + "The number of runs used to obtain an averaged result, if 'auto' " + "is used the number is adaptively chosen." + " In that case, the benchmark runs at least 'min_repetitions'" + " times until either 'max_repetitions' is reached or the total " + "runtime is larger than 'min_runtime'"); + +DEFINE_double(min_runtime, 0.05, + "If 'repetitions = auto' is used, the minimal runtime (seconds) " + "of a single benchmark."); + +DEFINE_uint32(min_repetitions, 10, + "If 'repetitions = auto' is used, the minimal number of" + " repetitions for a single benchmark."); + +DEFINE_uint32(max_repetitions, std::numeric_limits::max(), + "If 'repetitions = auto' is used, the maximal number of" + " repetitions for a single benchmark."); + +DEFINE_double(repetition_growth_factor, 1.5, + "If 'repetitions = auto' is used, the factor with which the" + " repetitions between two timings increase."); /** @@ -134,9 +162,17 @@ void print_general_information(std::string &extra) std::clog << gko::version_info::get() << std::endl << "Running on " << FLAGS_executor << "(" << FLAGS_device_id << ")" << std::endl - << "Running with " << FLAGS_warmup << " warm iterations and " - << FLAGS_repetitions << " running iterations" << std::endl - << "The random seed for right hand sides is " << FLAGS_seed + << "Running with " << FLAGS_warmup << " warm iterations and "; + if (FLAGS_repetitions == "auto") { + std::clog << "adaptively determined repetititions with " + << FLAGS_min_repetitions + << " <= rep <= " << FLAGS_max_repetitions + << " and a minimal runtime of " << FLAGS_min_runtime << "s" + << std::endl; + } else { + std::clog << FLAGS_repetitions << " running iterations" << std::endl; + } + std::clog << "The random seed for right hand sides is " << FLAGS_seed << std::endl << extra; } @@ -263,9 +299,14 @@ const std::map()>> return gko::CudaExecutor::create(FLAGS_device_id, gko::OmpExecutor::create(), true); }}, - {"hip", [] { + {"hip", + [] { return gko::HipExecutor::create(FLAGS_device_id, gko::OmpExecutor::create(), true); + }}, + {"dpcpp", [] { + return gko::DpcppExecutor::create(FLAGS_device_id, + gko::OmpExecutor::create()); }}}; @@ -282,22 +323,51 @@ template using vec = gko::matrix::Dense; -// creates a zero vector +// Create a matrix with value indices s[i, j] = sin(i) template -std::unique_ptr> create_vector( - std::shared_ptr exec, gko::size_type size) +std::enable_if_t::value, + std::unique_ptr>> +create_matrix_sin(std::shared_ptr exec, gko::dim<2> size) { + auto h_res = vec::create(exec->get_master(), size); + for (gko::size_type i = 0; i < size[0]; ++i) { + for (gko::size_type j = 0; j < size[1]; ++j) { + h_res->at(i, j) = std::sin(static_cast(i)); + } + } auto res = vec::create(exec); - res->read(gko::matrix_data(gko::dim<2>{size, 1})); + h_res->move_to(res.get()); return res; } +// Note: complex values are assigned s[i, j] = {sin(2 * i), sin(2 * i + 1)} +template +std::enable_if_t::value, + std::unique_ptr>> +create_matrix_sin(std::shared_ptr exec, gko::dim<2> size) +{ + using rc_vtype = gko::remove_complex; + auto h_res = vec::create(exec->get_master(), size); + for (gko::size_type i = 0; i < size[0]; ++i) { + for (gko::size_type j = 0; j < size[1]; ++j) { + h_res->at(i, j) = + ValueType{std::sin(static_cast(2 * i)), + std::sin(static_cast(2 * i + 1))}; + } + } + auto res = vec::create(exec); + h_res->move_to(res.get()); + return res; +} + + template std::unique_ptr> create_matrix( - std::shared_ptr exec, gko::dim<2> size) + std::shared_ptr exec, gko::dim<2> size, + ValueType value) { auto res = vec::create(exec); - res->read(gko::matrix_data(size)); + res->read(gko::matrix_data(size, value)); return res; } @@ -309,8 +379,22 @@ std::unique_ptr> create_matrix( RandomEngine &engine) { auto res = vec::create(exec); - res->read(gko::matrix_data( - size, std::uniform_real_distribution<>(-1.0, 1.0), engine)); + res->read(gko::matrix_data( + size, + std::uniform_real_distribution>(-1.0, + 1.0), + engine)); + return res; +} + + +// creates a zero vector +template +std::unique_ptr> create_vector( + std::shared_ptr exec, gko::size_type size) +{ + auto res = vec::create(exec); + res->read(gko::matrix_data(gko::dim<2>{size, 1})); return res; } @@ -327,25 +411,27 @@ std::unique_ptr> create_vector( // utilities for computing norms and residuals template -double get_norm(const vec *norm) +ValueType get_norm(const vec *norm) { return clone(norm->get_executor()->get_master(), norm)->at(0, 0); } template -double compute_norm2(const vec *b) +gko::remove_complex compute_norm2(const vec *b) { auto exec = b->get_executor(); - auto b_norm = gko::initialize>({0.0}, exec); + auto b_norm = + gko::initialize>>({0.0}, exec); b->compute_norm2(lend(b_norm)); return get_norm(lend(b_norm)); } template -double compute_residual_norm(const gko::LinOp *system_matrix, - const vec *b, const vec *x) +gko::remove_complex compute_residual_norm( + const gko::LinOp *system_matrix, const vec *b, + const vec *x) { auto exec = system_matrix->get_executor(); auto one = gko::initialize>({1.0}, exec); @@ -357,23 +443,24 @@ double compute_residual_norm(const gko::LinOp *system_matrix, template -double compute_max_relative_norm2(vec *result, - const vec *answer) +gko::remove_complex compute_max_relative_norm2( + vec *result, const vec *answer) { + using rc_vtype = gko::remove_complex; auto exec = answer->get_executor(); auto answer_norm = - vec::create(exec, gko::dim<2>{1, answer->get_size()[1]}); + vec::create(exec, gko::dim<2>{1, answer->get_size()[1]}); answer->compute_norm2(lend(answer_norm)); auto neg_one = gko::initialize>({-1.0}, exec); result->add_scaled(lend(neg_one), lend(answer)); auto absolute_norm = - vec::create(exec, gko::dim<2>{1, answer->get_size()[1]}); + vec::create(exec, gko::dim<2>{1, answer->get_size()[1]}); result->compute_norm2(lend(absolute_norm)); auto host_answer_norm = clone(answer_norm->get_executor()->get_master(), answer_norm); auto host_absolute_norm = clone(absolute_norm->get_executor()->get_master(), absolute_norm); - double max_relative_norm2 = 0; + rc_vtype max_relative_norm2 = 0; for (gko::size_type i = 0; i < host_answer_norm->get_size()[1]; i++) { max_relative_norm2 = std::max(host_absolute_norm->at(0, i) / host_answer_norm->at(0, i), @@ -383,4 +470,250 @@ double compute_max_relative_norm2(vec *result, } +/** + * A class for controlling the number warmup and timed iterations. + * + * The behavior is determined by the following flags + * - 'repetitions' switch between fixed and adaptive number of iterations + * - 'warmup' warmup iterations, applies in fixed and adaptive case + * - 'min_repetitions' minimal number of repetitions (adaptive case) + * - 'max_repetitions' maximal number of repetitions (adaptive case) + * - 'min_runtime' minimal total runtime (adaptive case) + * - 'repetition_growth_factor' controls the increase between two successive + * timings + * + * Usage: + * `IterationControl` exposes the member functions: + * - `warmup_run()`: controls run defined by `warmup` flag + * - `run(bool)`: controls run defined by all other flags + * - `get_timer()`: access to underlying timer + * The first two methods return an object that is to be used in a range-based + * for loop: + * ``` + * IterationControl ic(get_timer(...)); + * + * // warmup run always uses fixed number of iteration and does not issue + * // timings + * for(auto status: ic.warmup_run()){ + * // execute benchmark + * } + * // run may use adaptive number of iterations (depending on cmd line flag) + * // and issues timing (unless manage_timings is false) + * for(auto status: ic.run(manage_timings [default is true])){ + * if(! manage_timings) ic.get_timer->tic(); + * // execute benchmark + * if(! manage_timings) ic.get_timer->toc(); + * } + * + * ``` + * At the beginning of both methods, the timer is reset. + * The `status` object exposes the member + * - `cur_it`, containing the current iteration number, + * and the methods + * - `is_finished`, checks if the benchmark is finished, + */ +class IterationControl { + using IndexType = unsigned int; //!< to be compatible with GFLAGS type + + class run_control; + +public: + /** + * Creates an `IterationControl` object. + * + * Uses the commandline flags to setup the stopping criteria for the + * warmup and timed run. + * + * @param timer the timer that is to be used for the timings + */ + explicit IterationControl(const std::shared_ptr &timer) + { + status_warmup_ = {TimerManager{timer, false}, FLAGS_warmup, + FLAGS_warmup, 0., 0}; + if (FLAGS_repetitions == "auto") { + status_run_ = {TimerManager{timer, true}, FLAGS_min_repetitions, + FLAGS_max_repetitions, FLAGS_min_runtime}; + } else { + const auto reps = + static_cast(std::stoi(FLAGS_repetitions)); + status_run_ = {TimerManager{timer, true}, reps, reps, 0., 0}; + } + } + + IterationControl() = default; + IterationControl(const IterationControl &) = default; + IterationControl(IterationControl &&) = default; + + /** + * Creates iterable `run_control` object for the warmup run. + * + * This run uses always a fixed number of iterations. + */ + run_control warmup_run() + { + status_warmup_.cur_it = 0; + status_warmup_.managed_timer.clear(); + return run_control{&status_warmup_}; + } + + /** + * Creates iterable `run_control` object for the timed run. + * + * This run may be adaptive, depending on the commandline flags. + * + * @param manage_timings If true, the timer calls (`tic/toc`) are handled + * by the `run_control` object, otherwise they need to be executed outside + */ + run_control run(bool manage_timings = true) + { + status_run_.cur_it = 0; + status_run_.managed_timer.clear(); + status_run_.managed_timer.manage_timings = manage_timings; + return run_control{&status_run_}; + } + + std::shared_ptr get_timer() const + { + return status_run_.managed_timer.timer; + } + + double compute_average_time() const + { + return status_run_.managed_timer.get_total_time() / + get_num_repetitions(); + } + + IndexType get_num_repetitions() const { return status_run_.cur_it; } + +private: + struct TimerManager { + std::shared_ptr timer; + bool manage_timings = false; + + void tic() + { + if (manage_timings) { + timer->tic(); + } + } + void toc() + { + if (manage_timings) { + timer->toc(); + } + } + + void clear() { timer->clear(); } + + double get_total_time() const { return timer->get_total_time(); } + }; + + /** + * Stores stopping criteria of the adaptive benchmark run as well as the + * current iteration number. + */ + struct status { + TimerManager managed_timer{}; + + IndexType min_it = 0; + IndexType max_it = 0; + double max_runtime = 0.; + + IndexType cur_it = 0; + + /** + * checks if the adaptive run is complete + * + * the adaptive run is complete if: + * - the minimum number of iteration is reached + * - and either: + * - the maximum number of repetitions is reached + * - the total runtime is above the threshold + * + * @return completeness state of the adaptive run + */ + bool is_finished() const + { + return cur_it >= min_it && + (cur_it >= max_it || + managed_timer.get_total_time() >= max_runtime); + } + }; + + /** + * Iterable class managing the benchmark iteration. + * + * Has to be used in a range-based for loop. + */ + struct run_control { + struct iterator { + /** + * Increases the current iteration count and finishes timing if + * necessary. + * + * As `++it` is the last step of a for-loop, the managed_timer is + * stopped, if enough iterations have passed since the last timing. + * The interval between two timings is steadily increased to + * reduce the timing overhead. + */ + iterator operator++() + { + cur_info->cur_it++; + if (cur_info->cur_it >= next_timing && !stopped) { + cur_info->managed_timer.toc(); + stopped = true; + next_timing = static_cast(std::ceil( + next_timing * FLAGS_repetition_growth_factor)); + } + return *this; + } + + status operator*() const { return *cur_info; } + + /** + * Checks if the benchmark is finished and handles timing, if + * necessary. + * + * As `begin != end` is the first step in a for-loop, the + * managed_timer is started, if it was previously stopped. + * Additionally, if the benchmark is complete and the managed_timer + * is still running it is stopped. (This may occur if the maximal + * number of repetitions is surpassed) + * + * Uses only the information from the `status` object, i.e. + * the right hand side is ignored. + * + * @return true if benchmark is not finished, else false + */ + bool operator!=(const iterator &) + { + const bool is_finished = cur_info->is_finished(); + if (!is_finished && stopped) { + stopped = false; + cur_info->managed_timer.tic(); + } else if (is_finished && !stopped) { + cur_info->managed_timer.toc(); + stopped = true; + } + return !is_finished; + } + + status *cur_info; + IndexType next_timing = 1; //!< next iteration to stop timing + bool stopped = true; + }; + + iterator begin() const { return iterator{info}; } + + // not used, could potentially used in c++17 as a sentinel + iterator end() const { return iterator{}; } + + status *info; + }; + + status status_warmup_; + status status_run_; +}; + + #endif // GKO_BENCHMARK_UTILS_GENERAL_HPP_ diff --git a/benchmark/utils/hip_linops.hip.hpp b/benchmark/utils/hip_linops.hip.hpp index 5d62d605d24..f9b2066aab5 100644 --- a/benchmark/utils/hip_linops.hip.hpp +++ b/benchmark/utils/hip_linops.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "benchmark/utils/types.hpp" #include "hip/base/device_guard.hip.hpp" #include "hip/base/hipsparse_bindings.hip.hpp" @@ -321,14 +322,14 @@ class HipspHybrid // Some shortcuts -using hipsp_csr = detail::HipspCsr<>; -using hipsp_csrmm = detail::HipspCsrmm<>; +using hipsp_csr = detail::HipspCsr; +using hipsp_csrmm = detail::HipspCsrmm; using hipsp_coo = - detail::HipspHybrid; + detail::HipspHybrid; using hipsp_ell = - detail::HipspHybrid; -using hipsp_hybrid = detail::HipspHybrid<>; + detail::HipspHybrid; +using hipsp_hybrid = detail::HipspHybrid; #endif // GKO_BENCHMARK_UTILS_HIP_LINOPS_HIP_HPP_ diff --git a/benchmark/utils/loggers.hpp b/benchmark/utils/loggers.hpp index ea6bbea2797..983cd2951fb 100644 --- a/benchmark/utils/loggers.hpp +++ b/benchmark/utils/loggers.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -109,9 +110,7 @@ struct OperationLogger : gko::log::Logger { for (const auto &entry : total) { add_or_set_member( object, entry.first.c_str(), - std::chrono::duration_cast( - entry.second) - .count() / + std::chrono::duration(entry.second).count() / repetitions, alloc); } @@ -202,19 +201,31 @@ struct StorageLogger : gko::log::Logger { // Logs true and recurrent residuals of the solver template struct ResidualLogger : gko::log::Logger { - void on_iteration_complete(const gko::LinOp *, const gko::size_type &, + using rc_vtype = gko::remove_complex; + + // TODO2.0: Remove when deprecating simple overload + void on_iteration_complete(const gko::LinOp *solver, + const gko::size_type &it, const gko::LinOp *residual, const gko::LinOp *solution, const gko::LinOp *residual_norm) const override { - timestamps.PushBack( - std::chrono::duration_cast( - std::chrono::steady_clock::now() - start) - .count(), - alloc); + on_iteration_complete(solver, it, residual, solution, residual_norm, + nullptr); + } + + void on_iteration_complete( + const gko::LinOp *, const gko::size_type &, const gko::LinOp *residual, + const gko::LinOp *solution, const gko::LinOp *residual_norm, + const gko::LinOp *implicit_sq_residual_norm) const override + { + timestamps.PushBack(std::chrono::duration( + std::chrono::steady_clock::now() - start) + .count(), + alloc); if (residual_norm) { rec_res_norms.PushBack( - get_norm(gko::as>(residual_norm)), alloc); + get_norm(gko::as>(residual_norm)), alloc); } else { rec_res_norms.PushBack( compute_norm2(gko::as>(residual)), alloc); @@ -227,12 +238,22 @@ struct ResidualLogger : gko::log::Logger { } else { true_res_norms.PushBack(-1.0, alloc); } + if (implicit_sq_residual_norm) { + implicit_res_norms.PushBack( + std::sqrt(get_norm( + gko::as>(implicit_sq_residual_norm))), + alloc); + has_implicit_res_norm = true; + } else { + implicit_res_norms.PushBack(-1.0, alloc); + } } ResidualLogger(std::shared_ptr exec, const gko::LinOp *matrix, const vec *b, rapidjson::Value &rec_res_norms, rapidjson::Value &true_res_norms, + rapidjson::Value &implicit_res_norms, rapidjson::Value ×tamps, rapidjson::MemoryPoolAllocator<> &alloc) : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask), @@ -241,16 +262,22 @@ struct ResidualLogger : gko::log::Logger { start{std::chrono::steady_clock::now()}, rec_res_norms{rec_res_norms}, true_res_norms{true_res_norms}, + has_implicit_res_norm{}, + implicit_res_norms{implicit_res_norms}, timestamps{timestamps}, alloc{alloc} {} + bool has_implicit_res_norms() const { return has_implicit_res_norm; } + private: const gko::LinOp *matrix; const vec *b; std::chrono::steady_clock::time_point start; rapidjson::Value &rec_res_norms; rapidjson::Value &true_res_norms; + mutable bool has_implicit_res_norm; + rapidjson::Value &implicit_res_norms; rapidjson::Value ×tamps; rapidjson::MemoryPoolAllocator<> &alloc; }; diff --git a/benchmark/utils/overhead_linop.hpp b/benchmark/utils/overhead_linop.hpp index 0afbf57e12c..8ade2e74f83 100644 --- a/benchmark/utils/overhead_linop.hpp +++ b/benchmark/utils/overhead_linop.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -109,6 +109,15 @@ GKO_DECLARE_ALL; } // namespace hip +namespace dpcpp { +namespace overhead { + +GKO_DECLARE_ALL; + +} // namespace overhead +} // namespace dpcpp + + #undef GKO_DECLARE_ALL @@ -208,7 +217,7 @@ class Overhead : public EnableLinOp>, parameters_.preconditioner->generate(system_matrix_)); } else { set_preconditioner(matrix::Identity::create( - this->get_executor(), this->get_size()[0])); + this->get_executor(), this->get_size())); } stop_criterion_factory_ = stop::combine(std::move(parameters_.criteria)); diff --git a/benchmark/utils/preconditioners.hpp b/benchmark/utils/preconditioners.hpp new file mode 100644 index 00000000000..b9f08348b6d --- /dev/null +++ b/benchmark/utils/preconditioners.hpp @@ -0,0 +1,335 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_BENCHMARK_UTILS_PRECONDITIONERS_HPP_ +#define GKO_BENCHMARK_UTILS_PRECONDITIONERS_HPP_ + + +#include + + +#include +#include + + +#include + + +#include "benchmark/utils/general.hpp" +#include "benchmark/utils/overhead_linop.hpp" +#include "benchmark/utils/types.hpp" + + +DEFINE_string(preconditioners, "none", + "A comma-separated list of preconditioners to use. " + "Supported values are: none, jacobi, paric, parict, parilu, " + "parilut, ic, ilu, paric-isai, parict-isai, parilu-isai, " + "parilut-isai, ic-isai, ilu-isai, overhead"); + +DEFINE_uint32(parilu_iterations, 5, + "The number of iterations for ParIC(T)/ParILU(T)"); + +DEFINE_bool(parilut_approx_select, true, + "Use approximate selection for ParICT/ParILUT"); + +DEFINE_double(parilut_limit, 2.0, "The fill-in limit for ParICT/ParILUT"); + +DEFINE_int32( + isai_power, 1, + "Which power of the sparsity structure to use for ISAI preconditioners"); + +DEFINE_string(jacobi_storage, "0,0", + "Defines the kind of storage optimization to perform on " + "preconditioners that support it. Supported values are: " + "autodetect and , where and are the input " + "parameters used to construct a precision_reduction object."); + +DEFINE_double(jacobi_accuracy, 1e-1, + "This value is used as the accuracy flag of the adaptive Jacobi " + "preconditioner."); + +DEFINE_uint32(jacobi_max_block_size, 32, + "Maximal block size of the block-Jacobi preconditioner"); + + +// parses the Jacobi storage optimization command line argument +gko::precision_reduction parse_storage_optimization(const std::string &flag) +{ + if (flag == "autodetect") { + return gko::precision_reduction::autodetect(); + } + const auto parts = split(flag, ','); + if (parts.size() != 2) { + throw std::runtime_error( + "storage_optimization has to be a list of two integers"); + } + return gko::precision_reduction(std::stoi(parts[0]), std::stoi(parts[1])); +} + + +const std::map( + std::shared_ptr)>> + precond_factory{ + {"none", + [](std::shared_ptr exec) { + return gko::matrix::IdentityFactory::create(exec); + }}, + {"jacobi", + [](std::shared_ptr exec) { + return gko::preconditioner::Jacobi::build() + .with_max_block_size(FLAGS_jacobi_max_block_size) + .with_storage_optimization( + parse_storage_optimization(FLAGS_jacobi_storage)) + .with_accuracy(static_cast(FLAGS_jacobi_accuracy)) + .with_skip_sorting(true) + .on(exec); + }}, + {"paric", + [](std::shared_ptr exec) { + auto fact = + gko::share(gko::factorization::ParIc::build() + .with_iterations(FLAGS_parilu_iterations) + .with_skip_sorting(true) + .on(exec)); + return gko::preconditioner::Ic, + itype>::build() + .with_factorization_factory(fact) + .on(exec); + }}, + {"parict", + [](std::shared_ptr exec) { + auto fact = gko::share( + gko::factorization::ParIct::build() + .with_iterations(FLAGS_parilu_iterations) + .with_approximate_select(FLAGS_parilut_approx_select) + .with_fill_in_limit(FLAGS_parilut_limit) + .with_skip_sorting(true) + .on(exec)); + return gko::preconditioner:: + Ilu, + gko::solver::UpperTrs, false, itype>::build() + .with_factorization_factory(fact) + .on(exec); + }}, + {"parilu", + [](std::shared_ptr exec) { + auto fact = + gko::share(gko::factorization::ParIlu::build() + .with_iterations(FLAGS_parilu_iterations) + .with_skip_sorting(true) + .on(exec)); + return gko::preconditioner:: + Ilu, + gko::solver::UpperTrs, false, itype>::build() + .with_factorization_factory(fact) + .on(exec); + }}, + {"parilut", + [](std::shared_ptr exec) { + auto fact = gko::share( + gko::factorization::ParIlut::build() + .with_iterations(FLAGS_parilu_iterations) + .with_approximate_select(FLAGS_parilut_approx_select) + .with_fill_in_limit(FLAGS_parilut_limit) + .with_skip_sorting(true) + .on(exec)); + return gko::preconditioner:: + Ilu, + gko::solver::UpperTrs, false, itype>::build() + .with_factorization_factory(fact) + .on(exec); + }}, + {"ic", + [](std::shared_ptr exec) { + auto fact = gko::share( + gko::factorization::Ic::build().on(exec)); + return gko::preconditioner::Ic, + itype>::build() + .with_factorization_factory(fact) + .on(exec); + }}, + {"ilu", + [](std::shared_ptr exec) { + auto fact = gko::share( + gko::factorization::Ilu::build().on(exec)); + return gko::preconditioner:: + Ilu, + gko::solver::UpperTrs, false, itype>::build() + .with_factorization_factory(fact) + .on(exec); + }}, + {"paric-isai", + [](std::shared_ptr exec) { + auto fact = + gko::share(gko::factorization::ParIc::build() + .with_iterations(FLAGS_parilu_iterations) + .with_skip_sorting(true) + .on(exec)); + auto lisai = gko::share( + gko::preconditioner::LowerIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec)); + return gko::preconditioner::Ic< + gko::preconditioner::LowerIsai, + itype>::build() + .with_factorization_factory(fact) + .with_l_solver_factory(lisai) + .on(exec); + }}, + {"parict-isai", + [](std::shared_ptr exec) { + auto fact = gko::share( + gko::factorization::ParIct::build() + .with_iterations(FLAGS_parilu_iterations) + .with_approximate_select(FLAGS_parilut_approx_select) + .with_fill_in_limit(FLAGS_parilut_limit) + .with_skip_sorting(true) + .on(exec)); + auto lisai = gko::share( + gko::preconditioner::LowerIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec)); + return gko::preconditioner::Ic< + gko::preconditioner::LowerIsai, + itype>::build() + .with_factorization_factory(fact) + .with_l_solver_factory(lisai) + .on(exec); + }}, + {"parilu-isai", + [](std::shared_ptr exec) { + auto fact = + gko::share(gko::factorization::ParIlu::build() + .with_iterations(FLAGS_parilu_iterations) + .with_skip_sorting(true) + .on(exec)); + auto lisai = gko::share( + gko::preconditioner::LowerIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec)); + auto uisai = gko::share( + gko::preconditioner::UpperIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec)); + return gko::preconditioner::Ilu< + gko::preconditioner::LowerIsai, + gko::preconditioner::UpperIsai, false, + itype>::build() + .with_factorization_factory(fact) + .with_l_solver_factory(lisai) + .with_u_solver_factory(uisai) + .on(exec); + }}, + {"parilut-isai", + [](std::shared_ptr exec) { + auto fact = gko::share( + gko::factorization::ParIlut::build() + .with_iterations(FLAGS_parilu_iterations) + .with_approximate_select(FLAGS_parilut_approx_select) + .with_fill_in_limit(FLAGS_parilut_limit) + .with_skip_sorting(true) + .on(exec)); + auto lisai = gko::share( + gko::preconditioner::LowerIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec)); + auto uisai = gko::share( + gko::preconditioner::UpperIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec)); + return gko::preconditioner::Ilu< + gko::preconditioner::LowerIsai, + gko::preconditioner::UpperIsai, false, + itype>::build() + .with_factorization_factory(fact) + .with_l_solver_factory(lisai) + .with_u_solver_factory(uisai) + .on(exec); + }}, + {"ic-isai", + [](std::shared_ptr exec) { + auto fact = gko::share( + gko::factorization::Ic::build().on(exec)); + auto lisai = gko::share( + gko::preconditioner::LowerIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec)); + return gko::preconditioner::Ic< + gko::preconditioner::LowerIsai, + itype>::build() + .with_factorization_factory(fact) + .with_l_solver_factory(lisai) + .on(exec); + }}, + {"ilu-isai", + [](std::shared_ptr exec) { + auto fact = gko::share( + gko::factorization::Ilu::build().on(exec)); + auto lisai = gko::share( + gko::preconditioner::LowerIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec)); + auto uisai = gko::share( + gko::preconditioner::UpperIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec)); + return gko::preconditioner::Ilu< + gko::preconditioner::LowerIsai, + gko::preconditioner::UpperIsai, false, + itype>::build() + .with_factorization_factory(fact) + .with_l_solver_factory(lisai) + .with_u_solver_factory(uisai) + .on(exec); + }}, + {"general-isai", + [](std::shared_ptr exec) { + return gko::preconditioner::GeneralIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec); + }}, + {"spd-isai", + [](std::shared_ptr exec) { + return gko::preconditioner::SpdIsai::build() + .with_sparsity_power(FLAGS_isai_power) + .on(exec); + }}, + {"overhead", [](std::shared_ptr exec) { + return gko::Overhead::build() + .with_criteria(gko::stop::ResidualNorm::build() + .with_reduction_factor(rc_etype{}) + .on(exec)) + .on(exec); + }}}; + + +#endif // GKO_BENCHMARK_UTILS_PRECONDITIONERS_HPP_ diff --git a/benchmark/utils/spmv_common.hpp b/benchmark/utils/spmv_common.hpp index 34cd51067ae..4fd4ff21d95 100644 --- a/benchmark/utils/spmv_common.hpp +++ b/benchmark/utils/spmv_common.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -44,10 +44,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -// some shortcuts -using hybrid = gko::matrix::Hybrid<>; -using csr = gko::matrix::Csr<>; - /** * Function which outputs the input format for benchmarks similar to the spmv. */ diff --git a/benchmark/utils/timer.hpp b/benchmark/utils/timer.hpp new file mode 100644 index 00000000000..5e09e738b19 --- /dev/null +++ b/benchmark/utils/timer.hpp @@ -0,0 +1,395 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_BENCHMARK_UTILS_TIMER_HPP_ +#define GKO_BENCHMARK_UTILS_TIMER_HPP_ + + +#include + + +#include +#include + + +#include + + +#ifdef HAS_CUDA + + +#include +#include + + +#include "cuda/base/device_guard.hpp" + + +#endif // HAS_CUDA + + +#ifdef HAS_HIP + + +#include + + +#include "hip/base/device_guard.hip.hpp" + + +#endif // HAS_HIP + + +// Command-line arguments +DEFINE_bool(gpu_timer, false, + "use gpu timer based on event. It is valid only when " + "executor is cuda or hip"); + + +/** + * Timer stores the timing information + */ +class Timer { +public: + /** + * Start the timer + */ + void tic() + { + assert(tic_called_ == false); + this->tic_impl(); + tic_called_ = true; + } + + /** + * Finish the timer + */ + void toc() + { + assert(tic_called_ == true); + auto sec = this->toc_impl(); + tic_called_ = false; + this->add_record(sec); + } + + /** + * Get the summation of each time in seconds. + * + * @return the seconds of total time + */ + double get_total_time() const { return total_duration_sec_; } + + /** + * Get the number of repetitions. + * + * @return the number of repetitions + */ + std::int64_t get_num_repetitions() const { return duration_sec_.size(); } + + /** + * Compute the average time of repetitions in seconds + * + * @return the average time in seconds + */ + double compute_average_time() const + { + return this->get_total_time() / this->get_num_repetitions(); + } + + /** + * Get the vector containing the time of each repetition in seconds. + * + * @return the vector of time for each repetition in seconds + */ + std::vector get_time_detail() const { return duration_sec_; } + + /** + * Get the latest result in seconds. If there is no result yet, return + * 0. + * + * @return the latest result in seconds + */ + double get_latest_time() const + { + if (duration_sec_.size() >= 1) { + return duration_sec_.back(); + } else { + return 0; + } + } + + /** + * Clear the results of timer + */ + void clear() + { + duration_sec_.clear(); + tic_called_ = false; + total_duration_sec_ = 0; + } + + /** + * Create a timer + */ + Timer() : tic_called_(false), total_duration_sec_(0) {} + +protected: + /** + * Put the second result into vector + * + * @param sec the second result to insert + */ + void add_record(double sec) + { + // add the result; + duration_sec_.emplace_back(sec); + total_duration_sec_ += sec; + } + + /** + * The implementation of tic. + */ + virtual void tic_impl() = 0; + + /** + * The implementation of toc. Return the seconds result. + * + * @return the seconds result + */ + virtual double toc_impl() = 0; + +private: + std::vector duration_sec_; + bool tic_called_; + double total_duration_sec_; +}; + + +/** + * CpuTimer uses the synchronize of the executor and std::chrono to measure the + * timing. + */ +class CpuTimer : public Timer { +public: + /** + * Create a CpuTimer + * + * @param exec Executor associated to the timer + */ + CpuTimer(std::shared_ptr exec) : Timer(), exec_(exec) + {} + +protected: + void tic_impl() override + { + exec_->synchronize(); + start_ = std::chrono::steady_clock::now(); + } + + double toc_impl() override + { + exec_->synchronize(); + auto stop = std::chrono::steady_clock::now(); + std::chrono::duration duration_time = stop - start_; + return duration_time.count(); + } + +private: + std::shared_ptr exec_; + std::chrono::time_point start_; +}; + + +#ifdef HAS_CUDA + + +/** + * CudaTimer uses cuda executor and cudaEvent to measure the timing. + */ +class CudaTimer : public Timer { +public: + /** + * Create a CudaTimer. + * + * @param exec Executor which should be a CudaExecutor + */ + CudaTimer(std::shared_ptr exec) + : CudaTimer(std::dynamic_pointer_cast(exec)) + {} + + /** + * Create a CudaTimer. + * + * @param exec CudaExecutor associated to the timer + */ + CudaTimer(std::shared_ptr exec) : Timer() + { + assert(exec != nullptr); + exec_ = exec; + id_ = exec_->get_device_id(); + gko::cuda::device_guard g{id_}; + GKO_ASSERT_NO_CUDA_ERRORS(cudaEventCreate(&start_)); + GKO_ASSERT_NO_CUDA_ERRORS(cudaEventCreate(&stop_)); + } + +protected: + void tic_impl() override + { + exec_->synchronize(); + gko::cuda::device_guard g{id_}; + // Currently, gko::CudaExecutor always use default stream. + GKO_ASSERT_NO_CUDA_ERRORS(cudaEventRecord(start_)); + } + + double toc_impl() override + { + gko::cuda::device_guard g{id_}; + // Currently, gko::CudaExecutor always use default stream. + GKO_ASSERT_NO_CUDA_ERRORS(cudaEventRecord(stop_)); + GKO_ASSERT_NO_CUDA_ERRORS(cudaEventSynchronize(stop_)); + float duration_time = 0; + // cudaEventElapsedTime gives the duration_time in milliseconds with a + // resolution of around 0.5 microseconds + GKO_ASSERT_NO_CUDA_ERRORS( + cudaEventElapsedTime(&duration_time, start_, stop_)); + constexpr int sec_in_ms = 1e3; + return static_cast(duration_time) / sec_in_ms; + } + +private: + std::shared_ptr exec_; + cudaEvent_t start_; + cudaEvent_t stop_; + int id_; +}; + + +#endif // HAS_CUDA + + +#ifdef HAS_HIP + + +/** + * HipTimer uses hip executor and hipEvent to measure the timing. + */ +class HipTimer : public Timer { +public: + /** + * Create a HipTimer. + * + * @param exec Executor which should be a HipExecutor + */ + HipTimer(std::shared_ptr exec) + : HipTimer(std::dynamic_pointer_cast(exec)) + {} + + /** + * Create a HipTimer. + * + * @param exec HipExecutor associated to the timer + */ + HipTimer(std::shared_ptr exec) : Timer() + { + assert(exec != nullptr); + exec_ = exec; + id_ = exec_->get_device_id(); + gko::hip::device_guard g{id_}; + GKO_ASSERT_NO_HIP_ERRORS(hipEventCreate(&start_)); + GKO_ASSERT_NO_HIP_ERRORS(hipEventCreate(&stop_)); + } + +protected: + void tic_impl() override + { + exec_->synchronize(); + gko::hip::device_guard g{id_}; + // Currently, gko::HipExecutor always use default stream. + GKO_ASSERT_NO_HIP_ERRORS(hipEventRecord(start_)); + } + + double toc_impl() override + { + gko::hip::device_guard g{id_}; + // Currently, gko::HipExecutor always use default stream. + GKO_ASSERT_NO_HIP_ERRORS(hipEventRecord(stop_)); + GKO_ASSERT_NO_HIP_ERRORS(hipEventSynchronize(stop_)); + float duration_time = 0; + // hipEventElapsedTime gives the duration_time in milliseconds with a + // resolution of around 0.5 microseconds + GKO_ASSERT_NO_HIP_ERRORS( + hipEventElapsedTime(&duration_time, start_, stop_)); + constexpr int sec_in_ms = 1e3; + return static_cast(duration_time) / sec_in_ms; + } + +private: + std::shared_ptr exec_; + hipEvent_t start_; + hipEvent_t stop_; + int id_; +}; + + +#endif // HAS_HIP + + +/** + * Get the timer. If the executor does not support gpu timer, still return the + * cpu timer. + * + * @param exec Executor associated to the timer + * @param use_gpu_timer whether to use the gpu timer + */ +std::shared_ptr get_timer(std::shared_ptr exec, + bool use_gpu_timer) +{ + if (use_gpu_timer) { +#ifdef HAS_CUDA + if (auto cuda = + std::dynamic_pointer_cast(exec)) { + return std::make_shared(cuda); + } +#endif // HAS_CUDA + +#ifdef HAS_HIP + if (auto hip = + std::dynamic_pointer_cast(exec)) { + return std::make_shared(hip); + } +#endif // HAS_HIP + } + // No cuda/hip executor available or no gpu_timer used + return std::make_shared(exec); +} + +#endif // GKO_BENCHMARK_UTILS_TIMER_HPP_ diff --git a/common/components/precision_conversion.hpp.inc b/benchmark/utils/tuning_variables.cpp similarity index 81% rename from common/components/precision_conversion.hpp.inc rename to benchmark/utils/tuning_variables.cpp index c486354f156..b01dd98d635 100644 --- a/common/components/precision_conversion.hpp.inc +++ b/benchmark/utils/tuning_variables.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,12 +30,19 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -template -__global__ void convert_precision(size_type size, const SourceType *in, - TargetType *out) -{ - auto tnum = thread::get_thread_num_flat(); - for (auto i = thread::get_thread_id_flat(); i < size; i += tnum) { - out[i] = in[i]; - } -} \ No newline at end of file +#include + + +#include "benchmark/utils/tuning_variables.hpp" + + +namespace gko { + + +bool _tuning_flag = false; + + +size_type _tuned_value = 0; + + +} // namespace gko diff --git a/core/devices/hip/executor.cpp b/benchmark/utils/tuning_variables.hpp similarity index 82% rename from core/devices/hip/executor.cpp rename to benchmark/utils/tuning_variables.hpp index f4787523290..be172c02cd0 100644 --- a/core/devices/hip/executor.cpp +++ b/benchmark/utils/tuning_variables.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,25 +30,23 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include +#ifndef GKO_BENCHMARK_UTILS_TUNING_VARIABLES_HPP_ +#define GKO_BENCHMARK_UTILS_TUNING_VARIABLES_HPP_ -namespace gko { +#include -std::shared_ptr HipExecutor::get_master() noexcept { return master_; } +namespace gko { -std::shared_ptr HipExecutor::get_master() const noexcept -{ - return master_; -} +extern bool _tuning_flag; -int HipExecutor::num_execs[max_devices]; +extern size_type _tuned_value; -std::mutex HipExecutor::mutex[max_devices]; +} // namespace gko -} // namespace gko +#endif // GKO_BENCHMARK_UTILS_TUNING_VARIABLES_HPP_ diff --git a/benchmark/utils/types.hpp b/benchmark/utils/types.hpp new file mode 100644 index 00000000000..61ee056c5ff --- /dev/null +++ b/benchmark/utils/types.hpp @@ -0,0 +1,61 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_BENCHMARK_UTILS_TYPES_HPP_ +#define GKO_BENCHMARK_UTILS_TYPES_HPP_ + + +#include + + +#include + + +using itype = gko::int32; + + +#if defined(GKO_BENCHMARK_USE_DOUBLE_PRECISION) +using etype = double; +#elif defined(GKO_BENCHMARK_USE_SINGLE_PRECISION) +using etype = float; +#elif defined(GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION) +using etype = std::complex; +#elif defined(GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION) +using etype = std::complex; +#else // default to double precision +using etype = double; +#endif + +using rc_etype = gko::remove_complex; + + +#endif // GKO_BENCHMARK_UTILS_TYPES_HPP_ diff --git a/cmake/DownloadCMakeLists.txt.in b/cmake/DownloadCMakeLists.txt.in deleted file mode 100644 index 2eb96a85dea..00000000000 --- a/cmake/DownloadCMakeLists.txt.in +++ /dev/null @@ -1,25 +0,0 @@ -cmake_minimum_required(VERSION 3.9) -project(${package_name}) - -include(ExternalProject) -ExternalProject_Add(${package_name} - GIT_REPOSITORY "${package_url}" - GIT_TAG "${package_tag}" - SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/src" - BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/build" - CMAKE_ARGS "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}" - "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" - "-DCMAKE_BUILD_TYPE=${GINKGO_THIRD_PARTY_BUILD_TYPE}" - # These are only useful if you're cross-compiling. - # They, however, will not hurt regardless. - "-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}" - "-DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}" - "-DCMAKE_AR=${CMAKE_AR}" - "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" - "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" - "-DCMAKE_FIND_ROOT_PATH=${CMAKE_FIND_ROOT_PATH}" - "${ARGN}" - INSTALL_COMMAND "" - TEST_COMMAND "" - UPDATE_DISCONNECTED ${GINKGO_SKIP_DEPENDENCY_UPDATE} -) diff --git a/cmake/DownloadNonCMakeCMakeLists.txt.in b/cmake/DownloadNonCMakeCMakeLists.txt.in new file mode 100644 index 00000000000..c2d848e8d49 --- /dev/null +++ b/cmake/DownloadNonCMakeCMakeLists.txt.in @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.9) +project(${package_name}) + +include(ExternalProject) +ExternalProject_Add(${package_name} + URL "${package_url}" + URL_HASH "${package_hash}" + DOWNLOAD_NO_PROGRESS TRUE + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/src" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/build" + CONFIGURE_COMMAND "${config_command}" "${ARGN}" + INSTALL_COMMAND "" + UPDATE_DISCONNECTED ${GINKGO_SKIP_DEPENDENCY_UPDATE} + ) diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in index 0348f956e7b..f3e2cf2b430 100644 --- a/cmake/GinkgoConfig.cmake.in +++ b/cmake/GinkgoConfig.cmake.in @@ -36,6 +36,7 @@ set(GINKGO_BUILD_REFERENCE @GINKGO_BUILD_REFERENCE@) set(GINKGO_BUILD_OMP @GINKGO_BUILD_OMP@) set(GINKGO_BUILD_CUDA @GINKGO_BUILD_CUDA@) set(GINKGO_BUILD_HIP @GINKGO_BUILD_HIP@) +set(GINKGO_BUILD_DPCPP @GINKGO_BUILD_DPCPP@) set(GINKGO_DEVEL_TOOLS @GINKGO_DEVEL_TOOLS@) set(GINKGO_BUILD_TESTS @GINKGO_BUILD_TESTS@) @@ -62,31 +63,28 @@ set(GINKGO_JACOBI_FULL_OPTIMIZATIONS @GINKGO_JACOBI_FULL_OPTIMIZATIONS@) set(GINKGO_CUDA_ARCHITECTURES @GINKGO_CUDA_ARCHITECTURES@) set(GINKGO_CUDA_DEFAULT_HOST_COMPILER @GINKGO_CUDA_DEFAULT_HOST_COMPILER@) set(GINKGO_CUDA_HOST_COMPILER @CMAKE_CUDA_HOST_COMPILER@) +set(GINKGO_CUDA_ARCH_FLAGS @GINKGO_CUDA_ARCH_FLAGS@) set(GINKGO_HIP_COMPILER_FLAGS @GINKGO_HIP_COMPILER_FLAGS@) set(GINKGO_HIP_HCC_COMPILER_FLAGS @GINKGO_HIP_HCC_COMPILER_FLAGS@) set(GINKGO_HIP_NVCC_COMPILER_FLAGS @GINKGO_HIP_NVCC_COMPILER_FLAGS@) +set(GINKGO_HIP_CLANG_COMPILER_FLAGS @GINKGO_HIP_CLANG_COMPILER_FLAGS@) set(GINKGO_HIP_PLATFORM @GINKGO_HIP_PLATFORM@) +set(GINKGO_HIP_PLATFORM_AMD_REGEX @HIP_PLATFORM_AMD_REGEX@) +set(GINKGO_HIP_PLATFORM_NVIDIA_REGEX @HIP_PLATFORM_NVIDIA_REGEX@) set(GINKGO_HIP_AMDGPU @GINKGO_HIP_AMDGPU@) set(GINKGO_HIP_VERSION @GINKGO_HIP_VERSION@) +set(GINKGO_AMD_ARCH_FLAGS @GINKGO_AMD_ARCH_FLAGS@) + +set(GINKGO_DPCPP_VERSION @GINKGO_DPCPP_VERSION@) +set(GINKGO_DPCPP_FLAGS @GINKGO_DPCPP_FLAGS@) +set(GINKGO_MKL_ROOT @GINKGO_MKL_ROOT@) +set(GINKGO_DPL_ROOT @GINKGO_DPL_ROOT@) set(GINKGO_HAVE_PAPI_SDE @GINKGO_HAVE_PAPI_SDE@) -# Ginkgo external package variables -set(GINKGO_USE_EXTERNAL_CAS "@GINKGO_USE_EXTERNAL_CAS@") -set(GINKGO_USE_EXTERNAL_GTEST "@GINKGO_USE_EXTERNAL_GTEST@") -set(GINKGO_USE_EXTERNAL_GFLAGS "@GINKGO_USE_EXTERNAL_GFLAGS@") -set(GINKGO_USE_EXTERNAL_RAPIDJSON "@GINKGO_USE_EXTERNAL_RAPIDJSON@") - -set(TPL_ENABLE_GTEST "@TPL_ENABLE_GTEST@") -set(TPL_GTEST_LIBRARIES "@TPL_GTEST_LIBRARIES@") -set(TPL_GTEST_INCLUDE_DIRS "@TPL_GTEST_INCLUDE_DIRS@") -set(TPL_ENABLE_GFLAGS "@TPL_ENABLE_GFLAGS@") -set(TPL_GFLAGS_LIBRARIES "@TPL_GFLAGS_LIBRARIES@") -set(TPL_GFLAGS_INCLUDE_DIRS "@TPL_GFLAGS_INCLUDE_DIRS@") -set(TPL_ENABLE_RAPIDJSON "@TPL_ENABLE_RAPIDJSON@") -set(TPL_RAPIDJSON_LIBRARIES "@TPL_RAPIDJSON_LIBRARIES@") -set(TPL_RAPIDJSON_INCLUDE_DIRS "@TPL_RAPIDJSON_INCLUDE_DIRS@") +set(GINKGO_HAVE_HWLOC @GINKGO_HAVE_HWLOC@) +set(GINKGO_BUILD_HWLOC @GINKGO_BUILD_HWLOC@) # Ginkgo installation configuration set(GINKGO_CONFIG_FILE_PATH "${CMAKE_CURRENT_LIST_DIR}") @@ -96,6 +94,10 @@ set(GINKGO_INSTALL_LIBRARY_DIR "${GINKGO_INSTALL_PREFIX}/@GINKGO_INSTALL_LIBRARY set(GINKGO_INSTALL_PKGCONFIG_DIR "${GINKGO_INSTALL_PREFIX}/@GINKGO_INSTALL_PKGCONFIG_DIR@") set(GINKGO_INSTALL_CONFIG_DIR "${GINKGO_INSTALL_PREFIX}/@GINKGO_INSTALL_CONFIG_DIR@") set(GINKGO_INSTALL_MODULE_DIR "${GINKGO_INSTALL_PREFIX}/@GINKGO_INSTALL_MODULE_DIR@") +set(GINKGO_INSTALL_RPATH_FOR_HIP "-Wl,-rpath,${GINKGO_INSTALL_LIBRARY_DIR}") +set(GINKGO_INSTALL_RPATH ${GINKGO_INSTALL_RPATH}) +set(GINKGO_INSTALL_RPATH_USE_ORIGIN ${GINKGO_INSTALL_RPATH_USE_ORIGIN}) +set(GINKGO_INSTALL_RPATH_DEPENDENCIES ${GINKGO_INSTALL_RPATH_DEPENDENCIES}) # Forward Ginkgo's MODULE PATH and the PREFIX PATH for HIP and more list(APPEND CMAKE_MODULE_PATH "@CMAKE_MODULE_PATH@" "${GINKGO_INSTALL_MODULE_DIR}") @@ -128,25 +130,29 @@ set(GINKGO_OPENMP_LIBRARIES @OpenMP_CXX_LIBRARIES@) set(GINKGO_OPENMP_FLAGS "@OpenMP_CXX_FLAGS@") -# Provide useful HIP helper functions -include(${CMAKE_CURRENT_LIST_DIR}/hip_helpers.cmake) -include(${CMAKE_CURRENT_LIST_DIR}/windows_helpers.cmake) - # NOTE: we do not export benchmarks, examples, tests or devel tools # so `third_party` libraries are currently unneeded. -# propagate CUDA_HOST_COMPILER if Ginkgo was built with CUDA -if (GINKGO_BUILD_CUDA AND GINKGO_CUDA_HOST_COMPILER AND NOT CMAKE_CUDA_HOST_COMPILER) - message(STATUS "Ginkgo: Setting CUDA host compiler to ${GINKGO_CXX_COMPILER}") - set(CMAKE_CUDA_HOST_COMPILER "${GINKGO_CXX_COMPILER}" CACHE STRING "" FORCE) +# propagate CUDA_HOST_COMPILER if needed +if (GINKGO_BUILD_CUDA OR (GINKGO_BUILD_HIP + AND GINKGO_HIP_PLATFORM MATCHES "${GINKGO_HIP_PLATFORM_NVIDIA_REGEX}")) + if (GINKGO_CUDA_HOST_COMPILER AND NOT CMAKE_CUDA_HOST_COMPILER + AND EXISTS "${GINKGO_CUDA_HOST_COMPILER}") + message(STATUS "Ginkgo: Setting CUDA host compiler to ${GINKGO_CUDA_HOST_COMPILER}") + set(CMAKE_CUDA_HOST_COMPILER "${GINKGO_CUDA_HOST_COMPILER}" CACHE STRING "" FORCE) + endif() endif() if(GINKGO_HAVE_PAPI_SDE) find_package(PAPI REQUIRED OPTIONAL_COMPONENTS sde) endif() -# HIP depends on Threads::Threads in some circumstances, but doesn't find it -if (GINKGO_BUILD_HIP) +if(GINKGO_HAVE_HWLOC) + find_package(HWLOC REQUIRED) +endif() + +# HIP and OpenMP depend on Threads::Threads in some circumstances, but don't find it +if (GINKGO_BUILD_HIP OR GINKGO_BUILD_OMP) find_package(Threads REQUIRED) endif() @@ -159,16 +165,14 @@ endif() if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_HIP) find_package(HIP REQUIRED) find_package(hipblas REQUIRED) + find_package(hiprand REQUIRED) find_package(hipsparse REQUIRED) - if(GINKGO_HIP_PLATFORM MATCHES "hcc") - ginkgo_hip_ban_link_hcflag(hcc::hccrt) - ginkgo_hip_ban_link_hcflag(hcc::hc_am) - ginkgo_hip_ban_link_hcflag(hcc::mcwamp) - ginkgo_hip_ban_compile_hcflag(hcc::hccrt) - ginkgo_hip_ban_compile_hcflag(hcc::hc_am) - ginkgo_hip_ban_compile_hcflag(hcc::mcwamp) - endif() + find_package(rocrand REQUIRED) endif() +if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_DPCPP) + find_package(MKL CONFIG REQUIRED HINTS "${GINKGO_MKL_ROOT}") + find_package(oneDPL REQUIRED HINTS "${GINKGO_DPL_ROOT}") +endif() include(${CMAKE_CURRENT_LIST_DIR}/GinkgoTargets.cmake) diff --git a/cmake/Modules/FindHWLOC.cmake b/cmake/Modules/FindHWLOC.cmake new file mode 100644 index 00000000000..161ee406e00 --- /dev/null +++ b/cmake/Modules/FindHWLOC.cmake @@ -0,0 +1,128 @@ +### +# +# @copyright (c) 2012-2020 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2020 Florent Pruvost +# Copyright 2020-2021 Ginkgo Project +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) +# +# Modified for Ginkgo (See ABOUT-LICENSING.md for additional details). +# +### +# +# - Find HWLOC include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(HWLOC +# [REQUIRED] [VERSION]) # Fail with error if hwloc is not found +# +# This module defines the following :prop_tgt:`IMPORTED` target: +# ``hwloc`` +# +#============================================================================= +include(CheckStructHasMember) +include(CheckCSourceCompiles) + +include(hwloc_helpers) + +find_path(HWLOC_INCLUDE_DIRS + NAMES "hwloc.h" + HINTS ${HWLOC_DIR} $ENV{HWLOC_DIR} + PATH_SUFFIXES include + DOC "Find the hwloc.h main header" + ) + +find_library(HWLOC_LIBRARIES "hwloc" + HINTS ${HWLOC_DIR} $ENV{HWLOC_DIR} + PATH_SUFFIXES lib lib64 + DOC "Find the hwloc library" + ) + +if (HWLOC_INCLUDE_DIRS) + # Find the version of hwloc found + if(NOT HWLOC_VERSION) + file(READ "${HWLOC_INCLUDE_DIRS}/hwloc.h" + HEADER_CONTENTS LIMIT 16384) + string(REGEX REPLACE ".*#define HWLOC_API_VERSION (0[xX][0-9a-fA-F]+).*" "\\1" + HWLOC_API_VERSION "${HEADER_CONTENTS}") + string(SUBSTRING "${HWLOC_API_VERSION}" 4 2 HEX_MAJOR) + string(SUBSTRING "${HWLOC_API_VERSION}" 6 2 HEX_MINOR) + string(SUBSTRING "${HWLOC_API_VERSION}" 8 2 HEX_PATCH) + get_dec_from_hex("${HEX_MAJOR}" DEC_MAJOR) + get_dec_from_hex("${HEX_MINOR}" DEC_MINOR) + get_dec_from_hex("${HEX_PATCH}" DEC_PATCH) + set(HWLOC_VERSION "${DEC_MAJOR}.${DEC_MINOR}.${DEC_PATCH}" CACHE STRING "HWLOC version") + endif() +endif() + +if (NOT HWLOC_FIND_QUIETLY) + if (HWLOC_INCLUDE_DIRS AND HWLOC_LIBRARIES) + message(STATUS "Looking for HWLOC - found version ${HWLOC_VERSION}") + else() + message(STATUS "${Magenta}Looking for HWLOC - not found" + "\n Please check that your environment variable HWLOC_DIR" + "\n has been set properly.${ColourReset}") + unset(HWLOC_LIBRARIES CACHE) + unset(HWLOC_INCLUDE_DIRS CACHE) + endif() +endif() + +# check a function to validate what was found +if(HWLOC_INCLUDE_DIRS AND HWLOC_LIBRARIES) + # set required libraries for linking + ginkgo_set_required_test_lib_link(HWLOC) + + # test linking + unset(HWLOC_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(hwloc_topology_init HWLOC_WORKS) + mark_as_advanced(HWLOC_WORKS) + + if(NOT HWLOC_WORKS) + if(NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for hwloc : test of hwloc_topology_init with hwloc library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "CMAKE_REQUIRED_FLAGS: ${CMAKE_REQUIRED_FLAGS}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) + + string(SUBSTRING "${HWLOC_VERSION}" 0 3 HWLOC_VERSION) + if(HWLOC_VERSION VERSION_LESS HWLOC_FIND_VERSION) + message(STATUS "Required version ${HWLOC_FIND_VERSION}, but found version ${HWLOC_VERSION}") + unset(HWLOC_FOUND CACHE) + unset(HWLOC_LIBRARIES CACHE) + unset(HWLOC_INCLUDE_DIRS CACHE) + else() + unset(HWLOC_FOUND CACHE) + set(HWLOC_FOUND 1) + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(HWLOC + REQUIRED_VARS HWLOC_LIBRARIES HWLOC_INCLUDE_DIRS HWLOC_WORKS + VERSION_VAR HWLOC_VERSION) + mark_as_advanced(HWLOC_INCLUDE_DIRS HWLOC_LIBRARIES HWLOC_VERSION HWLOC_WORKS) + endif() +endif(HWLOC_INCLUDE_DIRS AND HWLOC_LIBRARIES) + +if(HWLOC_FOUND AND NOT TARGET hwloc) + add_library(hwloc SHARED IMPORTED GLOBAL) + set_target_properties(hwloc PROPERTIES IMPORTED_LOCATION ${HWLOC_LIBRARIES}) + set_target_properties(hwloc PROPERTIES INTERFACE_LINK_LIBRARIES ${HWLOC_LIBRARIES}) + set_target_properties(hwloc PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${HWLOC_INCLUDE_DIRS}) +endif() diff --git a/cmake/Modules/FindNUMA.cmake b/cmake/Modules/FindNUMA.cmake new file mode 100644 index 00000000000..89774584dcf --- /dev/null +++ b/cmake/Modules/FindNUMA.cmake @@ -0,0 +1,59 @@ +#.rst: +# FindNUMA +# ------- +# +# Find the NUMA library, usually provided by `numactl`. +# +# Imported targets +# ^^^^^^^^^^^^^^^^ +# +# This module defines the following :prop_tgt:`IMPORTED` target: +# +# ``NUMA::NUMA`` +# The NUMA library, if found. +# +# Result variables +# ^^^^^^^^^^^^^^^^ +# +# This module will set the following variables in your project: +# +# ``NUMA_INCLUDE_DIRS`` +# where to find numa.h +# +# ``NUMA_LIBRARIES`` +# the libraries to link against in order to use the NUMA library. +# +# ``NUMA_FOUND`` +# If false, do not try to use the NUMA library. + + +find_path(NUMA_ROOT_DIR NAMES include/numa.h) + +find_path(NUMA_INCLUDE_DIR NAMES numa.h HINTS ${NUMA_ROOT_DIR}) +mark_as_advanced(NUMA_INCLUDE_DIR) + + +if(NOT NUMA_LIBRARY) + find_library(NUMA_LIBRARY NAMES numa HINTS ${NUMA_ROOT_DIR}) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(NUMA REQUIRED_VARS NUMA_LIBRARY NUMA_INCLUDE_DIR) + +if(NUMA_FOUND) + set(NUMA_LIBRARIES ${NUMA_LIBRARY}) + set(NUMA_INCLUDE_DIRS ${NUMA_INCLUDE_DIR}) + unset(NUMA_LIBRARY) + unset(NUMA_INCLUDE_DIR) + + if(NOT TARGET NUMA::NUMA) + add_library(NUMA::NUMA UNKNOWN IMPORTED) + set_target_properties(NUMA::NUMA PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${NUMA_INCLUDE_DIRS}") + if(EXISTS "${NUMA_LIBRARIES}") + set_target_properties(NUMA::NUMA PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${NUMA_LIBRARIES}") + endif() + endif() +endif() diff --git a/cmake/Modules/FindPAPI.cmake b/cmake/Modules/FindPAPI.cmake index 3e16af2e125..c2648c252fb 100644 --- a/cmake/Modules/FindPAPI.cmake +++ b/cmake/Modules/FindPAPI.cmake @@ -70,7 +70,7 @@ if(PAPI_INCLUDE_DIR) # find the components enable_language(C) foreach(component IN LISTS PAPI_FIND_COMPONENTS) - file(WRITE "${CMAKE_BINARY_DIR}/papi_${component}_detect.c" + file(WRITE "${PROJECT_BINARY_DIR}/papi_${component}_detect.c" " #include int main() { @@ -85,8 +85,8 @@ if(PAPI_INCLUDE_DIR) ) try_run(PAPI_${component}_FOUND gko_result_unused - "${CMAKE_BINARY_DIR}" - "${CMAKE_BINARY_DIR}/papi_${component}_detect.c" + "${PROJECT_BINARY_DIR}" + "${PROJECT_BINARY_DIR}/papi_${component}_detect.c" LINK_LIBRARIES ${PAPI_LIBRARY} ) diff --git a/cmake/Modules/hwloc_helpers.cmake b/cmake/Modules/hwloc_helpers.cmake new file mode 100644 index 00000000000..689179142ab --- /dev/null +++ b/cmake/Modules/hwloc_helpers.cmake @@ -0,0 +1,110 @@ +### +# +# @copyright (c) 2012-2020 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2020 Florent Pruvost +# Copyright 2020-2021 Ginkgo Project +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) +# +# Modified for Ginkgo (See ABOUT-LICENSING.md for additional details). +# + +macro(ginkgo_set_required_test_lib_link name) + set(CMAKE_REQUIRED_INCLUDES "${${name}${STATIC}_INCLUDE_DIRS}") + if (${name}${STATIC}_CFLAGS_OTHER) + set(REQUIRED_FLAGS_COPY "${${name}${STATIC}_CFLAGS_OTHER}") + set(REQUIRED_FLAGS) + set(REQUIRED_DEFINITIONS) + foreach(_flag ${REQUIRED_FLAGS_COPY}) + if (_flag MATCHES "^-D") + list(APPEND REQUIRED_DEFINITIONS "${_flag}") + endif() + string(REGEX REPLACE "^-D.*" "" _flag "${_flag}") + list(APPEND REQUIRED_FLAGS "${_flag}") + endforeach() + endif() + foreach(_var "${REQUIRED_FLAGS_COPY};${REQUIRED_FLAGS};${REQUIRED_LIBRARIES}" ) + if(${_var}) + list(REMOVE_DUPLICATES ${_var}) + endif() + endforeach() + set(CMAKE_REQUIRED_DEFINITIONS "${REQUIRED_DEFINITIONS}") + set(CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${${name}${STATIC}_LDFLAGS_OTHER}") + list(APPEND CMAKE_REQUIRED_LIBRARIES "${${name}${STATIC}_LIBRARIES}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") +endmacro() + +# Modified function from Morse +macro(ginkgo_check_static_or_dynamic package libraries) + list(GET ${libraries} 0 _first_lib) + get_filename_component(_suffix ${_first_lib} EXT) + if (NOT _suffix) + unset (_lib_path CACHE) + find_library(_lib_path ${_first_lib} HINTS ${${package}_LIBDIR} ${${package}_LIBRARY_DIRS} NO_DEFAULT_PATH) + get_filename_component(_suffix ${_lib_path} EXT) + endif() + if (_suffix) + if(${_suffix} MATCHES ".so$" OR ${_suffix} MATCHES ".dylib$" OR ${_suffix} MATCHES ".dll$") + set(${package}_STATIC 0) + elseif(${_suffix} MATCHES ".a$") + set(${package}_STATIC 1) + else() + message(FATAL_ERROR "${package} library extension not in list .a, .so, .dylib, .dll") + endif() + else() + message(FATAL_ERROR "${package} could not detect library extension") + endif() +endmacro() + +function(HEX2DEC str res) + string(LENGTH "${str}" len) + if("${len}" EQUAL 1) + if("${str}" MATCHES "[0-9]") + set(${res} "${str}" PARENT_SCOPE) + elseif( "${str}" MATCHES "[aA]") + set(${res} 10 PARENT_SCOPE) + elseif( "${str}" MATCHES "[bB]") + set(${res} 11 PARENT_SCOPE) + elseif( "${str}" MATCHES "[cC]") + set(${res} 12 PARENT_SCOPE) + elseif( "${str}" MATCHES "[dD]") + set(${res} 13 PARENT_SCOPE) + elseif( "${str}" MATCHES "[eE]") + set(${res} 14 PARENT_SCOPE) + elseif( "${str}" MATCHES "[fF]") + set(${res} 15 PARENT_SCOPE) + else() + return() + endif() + else() + string(SUBSTRING "${str}" 0 1 str1) + string(SUBSTRING "${str}" 1 -1 str2) + hex2dec(${str1} res1) + hex2dec(${str2} res2) + math(EXPR val "16 * ${res1} + ${res2}") + set(${res} "${val}" PARENT_SCOPE) + endif() +endfunction() + +macro(get_dec_from_hex hex dec) + if(${CMAKE_VERSION} VERSION_GREATER 3.13) + math(EXPR ${dec} ${hex} OUTPUT_FORMAT DECIMAL) + else() + hex2dec(${hex} ${dec}) + endif() +endmacro() diff --git a/cmake/autodetect_executors.cmake b/cmake/autodetect_executors.cmake index 1f90640acb9..71e2456edbc 100644 --- a/cmake/autodetect_executors.cmake +++ b/cmake/autodetect_executors.cmake @@ -1,9 +1,13 @@ set(GINKGO_HAS_OMP OFF) set(GINKGO_HAS_CUDA OFF) +set(GINKGO_HAS_DPCPP OFF) set(GINKGO_HAS_HIP OFF) -find_package(OpenMP) +find_package(OpenMP 3.0) include(CheckLanguage) check_language(CUDA) +try_compile(GKO_CAN_COMPILE_DPCPP ${PROJECT_BINARY_DIR}/dpcpp + SOURCES ${PROJECT_SOURCE_DIR}/dpcpp/test_dpcpp.dp.cpp + CXX_STANDARD 17) if(OpenMP_CXX_FOUND) if(NOT DEFINED GINKGO_BUILD_OMP) @@ -25,3 +29,10 @@ if(GINKGO_HIPCONFIG_PATH) endif() set(GINKGO_HAS_HIP ON) endif() + +if (GKO_CAN_COMPILE_DPCPP) + if(NOT DEFINED GINKGO_BUILD_DPCPP) + message(STATUS "Enabling DPCPP executor") + endif() + set(GINKGO_HAS_DPCPP ON) +endif() diff --git a/cmake/build_helpers.cmake b/cmake/build_helpers.cmake index ef7d47f26ba..4b89cf451c4 100644 --- a/cmake/build_helpers.cmake +++ b/cmake/build_helpers.cmake @@ -1,3 +1,5 @@ +set(GINKGO_LIBRARY_PATH "${PROJECT_BINARY_DIR}/lib") + function(ginkgo_default_includes name) # set include path depending on used interface target_include_directories("${name}" @@ -7,6 +9,12 @@ function(ginkgo_default_includes name) $ $ ) + if(GINKGO_HAVE_HWLOC) + target_include_directories("${name}" + PUBLIC + $ + ) + endif() endfunction() function(ginkgo_compile_features name) @@ -20,26 +28,13 @@ function(ginkgo_compile_features name) # Set an appropriate SONAME set_property(TARGET "${name}" PROPERTY SOVERSION "${Ginkgo_VERSION}") - if(GINKGO_CHANGED_SHARED_LIBRARY) - # Put all shared libraries and corresponding imported libraries into the specified path - set_property(TARGET "${name}" PROPERTY - RUNTIME_OUTPUT_DIRECTORY "${GINKGO_WINDOWS_SHARED_LIBRARY_PATH}") - set_property(TARGET "${name}" PROPERTY - ARCHIVE_OUTPUT_DIRECTORY "${GINKGO_WINDOWS_SHARED_LIBRARY_PATH}") - if(MSVC) - # MSVC would create subfolder according to build_type. Ginkgo forces the output be the same whatever build_type is. - foreach(CONFIG ${CMAKE_CONFIGURATION_TYPES}) - string(TOUPPER ${CONFIG} CONFIG ) - set_property(TARGET "${name}" PROPERTY - RUNTIME_OUTPUT_DIRECTORY_${CONFIG} "${GINKGO_WINDOWS_SHARED_LIBRARY_PATH}") - set_property(TARGET "${name}" PROPERTY - ARCHIVE_OUTPUT_DIRECTORY_${CONFIG} "${GINKGO_WINDOWS_SHARED_LIBRARY_PATH}") - endforeach() - endif() - if(GINKGO_CHECK_PATH) - ginkgo_check_shared_library("${CMAKE_SHARED_LIBRARY_PREFIX}${name}${CMAKE_SHARED_LIBRARY_SUFFIX}") - endif() - endif() + # Put all shared libraries and corresponding imported libraries into the specified path + set_property(TARGET "${name}" PROPERTY + RUNTIME_OUTPUT_DIRECTORY "${GINKGO_LIBRARY_PATH}") + set_property(TARGET "${name}" PROPERTY + ARCHIVE_OUTPUT_DIRECTORY "${GINKGO_LIBRARY_PATH}") + set_property(TARGET "${name}" PROPERTY + LIBRARY_OUTPUT_DIRECTORY "${GINKGO_LIBRARY_PATH}") if (GINKGO_CHECK_CIRCULAR_DEPS) target_link_libraries("${name}" PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") @@ -48,7 +43,7 @@ function(ginkgo_compile_features name) set_target_properties("${name}" PROPERTIES POSITION_INDEPENDENT_CODE ON) endfunction() -function(ginkgo_check_headers target) +function(ginkgo_check_headers target defines) # build object library used to "compile" the headers # add a proxy source file for each header in the target source list file(GLOB_RECURSE CUDA_HEADERS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" CONFIGURE_DEPENDS "*.cuh") @@ -56,10 +51,21 @@ function(ginkgo_check_headers target) file(GLOB_RECURSE CXX_HEADERS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" CONFIGURE_DEPENDS "*.hpp") list(FILTER CXX_HEADERS EXCLUDE REGEX ".*\.hip\.hpp$") list(FILTER CXX_HEADERS EXCLUDE REGEX "^test.*") + list(FILTER CXX_HEADERS EXCLUDE REGEX "^base/kernel_launch.*") list(FILTER CUDA_HEADERS EXCLUDE REGEX "^test.*") + list(FILTER CUDA_HEADERS EXCLUDE REGEX "^base/kernel_launch.*") list(FILTER HIP_HEADERS EXCLUDE REGEX "^test.*") + list(FILTER HIP_HEADERS EXCLUDE REGEX "^base/kernel_launch.*") set(SOURCES "") + # if we have any CUDA files in there, compile everything as CUDA + if(CUDA_HEADERS) + set(CUDA_HEADERS ${CUDA_HEADERS} ${CXX_HEADERS}) + set(CXX_HEADERS "") + if (HIP_HEADERS) + message(FATAL_ERROR "Mixing CUDA and HIP files in header check") + endif() + endif() foreach(HEADER ${CUDA_HEADERS}) set(HEADER_SOURCEFILE "${CMAKE_CURRENT_BINARY_DIR}/${HEADER}.cu") file(WRITE "${HEADER_SOURCEFILE}" "#include \"${HEADER}\"") @@ -71,10 +77,13 @@ function(ginkgo_check_headers target) file(WRITE "${HEADER_SOURCEFILE}" "#include \"${HEADER}\"") list(APPEND SOURCES "${HEADER_SOURCEFILE}") endforeach() - if (SOURCES) + if(SOURCES) add_library(${target}_headers OBJECT ${SOURCES}) target_link_libraries(${target}_headers PRIVATE ${target}) target_include_directories(${target}_headers PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") + if(defines) + target_compile_definitions(${target}_headers PRIVATE ${defines}) + endif() endif() set(HIP_SOURCES "") @@ -83,54 +92,21 @@ function(ginkgo_check_headers target) file(WRITE "${HEADER_SOURCEFILE}" "#include \"${HEADER}\"") list(APPEND HIP_SOURCES "${HEADER_SOURCEFILE}") endforeach() - if (HIP_SOURCES) + if(HIP_SOURCES) set_source_files_properties(${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) hip_add_library(${target}_headers_hip ${HIP_SOURCES}) # the compiler options get set by linking to ginkgo_hip - target_link_libraries(${target}_headers_hip PRIVATE ${target} roc::hipblas roc::hipsparse) + target_link_libraries(${target}_headers_hip PRIVATE ${target} roc::hipblas roc::hipsparse hip::hiprand roc::rocrand) target_include_directories(${target}_headers_hip PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${GINKGO_HIP_THRUST_PATH}" "${HIPBLAS_INCLUDE_DIRS}" + "${hiprand_INCLUDE_DIRS}" "${HIPSPARSE_INCLUDE_DIRS}" "${ROCPRIM_INCLUDE_DIRS}") endif() endfunction() -function(ginkgo_check_shared_library name) - # Cygwin uses : not ; to split path - if(CYGWIN) - string(REPLACE ":" ";" ENV_PATH "$ENV{PATH}") - else() - set(ENV_PATH "$ENV{PATH}") - endif() - set(PATH_LIST ${ENV_PATH}) - set(PASSED_TEST FALSE) - foreach(ITEM IN LISTS PATH_LIST) - string(REPLACE "\\" "/" ITEM "${ITEM}") - if("${ITEM}" STREQUAL "${GINKGO_WINDOWS_SHARED_LIBRARY_PATH}") - set(PASSED_TEST TRUE) - break() - else() - # If any path before this build, the path must not contain the ginkgo shared library - find_file(EXISTING_DLL "${name}" PATHS "${ITEM}" NO_DEFAULT_PATH) - if(NOT "${EXISTING_DLL}" STREQUAL "EXISTING_DLL-NOTFOUND") - # clean the EXISTING_DLL before termination - unset(EXISTING_DLL CACHE) - message(FATAL_ERROR "Detect ${name} in ${ITEM} eariler than this build. " - "Please add ${GINKGO_WINDOWS_SHARED_LIBRARY_PATH} before other ginkgo path.") - endif() - # do not keep this variable in cache - unset(EXISTING_DLL CACHE) - endif() - endforeach() - if(NOT PASSED_TEST) - # Did not find this build in the environment variable PATH - message(FATAL_ERROR "Did not find this build in the environment variable PATH. " - "Please add ${GINKGO_WINDOWS_SHARED_LIBRARY_PATH} into the environment variable PATH.") - endif() -endfunction() - macro(ginkgo_modify_flags name) # add escape before " # the result var is ${name}_MODIFY @@ -156,3 +132,23 @@ function(ginkgo_extract_clang_version CLANG_COMPILER GINKGO_CLANG_VERSION) file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/extract_clang_ver.cpp) file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/extract_clang_ver) endfunction() + +# Extract the DPC++ version +function(ginkgo_extract_dpcpp_version DPCPP_COMPILER GINKGO_DPCPP_VERSION) + set(DPCPP_VERSION_PROG "#include \n#include \n" + "int main() {std::cout << __SYCL_COMPILER_VERSION << '\\n'\;" + "return 0\;}") + file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver.cpp" ${DPCPP_VERSION_PROG}) + execute_process(COMMAND ${DPCPP_COMPILER} ${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver.cpp + -o ${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver + ERROR_VARIABLE DPCPP_EXTRACT_VER_ERROR) + execute_process(COMMAND ${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver + OUTPUT_VARIABLE FOUND_DPCPP_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_STRIP_TRAILING_WHITESPACE + ) + + set (${GINKGO_DPCPP_VERSION} "${FOUND_DPCPP_VERSION}" PARENT_SCOPE) + file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver.cpp) + file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver) +endfunction() diff --git a/cmake/build_type_helpers.cmake b/cmake/build_type_helpers.cmake index f3366b031e0..09fc4a7dc9b 100644 --- a/cmake/build_type_helpers.cmake +++ b/cmake/build_type_helpers.cmake @@ -88,13 +88,12 @@ foreach(_LANG IN LISTS ENABLED_LANGUAGES ITEMS "HIP") ${PROJECT_NAME}_${_LANG}_${_TYPE}_SUPPORTED) else() if(DEFINED ${PROJECT_NAME}_${_LANG}_${_TYPE}_SUPPORTED) - message(STATUS "Skipping ${_LANG}, not supported by build_type.cmake script") + message(STATUS "Skipping ${_LANG}, not supported by build_type_helpers.cmake script") endif() set(${PROJECT_NAME}_${_LANG}_${_TYPE}_SUPPORTED FALSE) - continue() endif() if(${PROJECT_NAME}_${_LANG}_${_TYPE}_SUPPORTED) - if(_LANG STREQUAL "HIP" AND GINKGO_HIP_PLATFORM STREQUAL "nvcc") + if(_LANG STREQUAL "HIP" AND GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") set(CMAKE_${_LANG}_FLAGS_${_TYPE} ${${PROJECT_NAME}_NVCC_${_TYPE}_COMPILER_FLAGS} CACHE STRING "Flags used by the ${_LANG} compiler during ${_TYPE} builds." FORCE @@ -116,6 +115,11 @@ foreach(_LANG IN LISTS ENABLED_LANGUAGES ITEMS "HIP") endforeach() endforeach() +if (CMAKE_BUILD_TYPE + AND (CMAKE_BUILD_TYPE IN_LIST ${PROJECT_NAME}_CUSTOM_BUILD_TYPES) + AND (NOT ${PROJECT_NAME}_${CMAKE_BUILD_TYPE}_SUPPORTED)) + message(FATAL_ERROR "Custom CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} not supported by the compiler") +endif() foreach(_TYPE IN LISTS ${PROJECT_NAME}_CUSTOM_BUILD_TYPES) cmake_dependent_option(${PROJECT_NAME}_${_TYPE}_IN_CONFIGURATION_TYPES diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index f2ff8496e64..9d36b49911e 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -1,167 +1,175 @@ -function(ginkgo_create_test test_name) +function(ginkgo_build_test_name test_name target_name) file(RELATIVE_PATH REL_BINARY_DIR ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") - add_executable(${TEST_TARGET_NAME} ${test_name}.cpp) - target_compile_features("${TEST_TARGET_NAME}" PUBLIC cxx_std_14) - target_include_directories("${TEST_TARGET_NAME}" - PRIVATE - "$" - ) - set_target_properties(${TEST_TARGET_NAME} PROPERTIES + set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE) +endfunction() + +function(ginkgo_set_test_target_properties test_name test_target_name) + file(RELATIVE_PATH REL_BINARY_DIR + ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + set_target_properties(${test_target_name} PROPERTIES OUTPUT_NAME ${test_name}) + if (GINKGO_FAST_TESTS) + target_compile_definitions(${test_target_name} PRIVATE GINKGO_FAST_TESTS) + endif() + if (GINKGO_COMPILING_DPCPP_TEST AND GINKGO_DPCPP_SINGLE_MODE) + target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1) + endif() if (GINKGO_CHECK_CIRCULAR_DEPS) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") + target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") endif() - target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest ${ARGN}) - add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${TEST_TARGET_NAME}) + add_test(NAME ${REL_BINARY_DIR}/${test_name} + COMMAND ${test_target_name} + WORKING_DIRECTORY "$") + target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR}) + target_link_libraries(${test_target_name} PRIVATE ginkgo GTest::Main GTest::GTest) +endfunction() + +function(ginkgo_create_test test_name) + ginkgo_build_test_name(${test_name} test_target_name) + add_executable(${test_target_name} ${test_name}.cpp) + target_compile_features(${test_target_name} PUBLIC cxx_std_14) + target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) + target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + ginkgo_set_test_target_properties(${test_name} ${test_target_name}) endfunction(ginkgo_create_test) +function(ginkgo_create_dpcpp_test test_name) + ginkgo_build_test_name(${test_name} test_target_name) + add_executable(${test_target_name} ${test_name}.dp.cpp) + target_compile_features(${test_target_name} PUBLIC cxx_std_17) + target_compile_options(${test_target_name} PRIVATE "${GINKGO_DPCPP_FLAGS}") + target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel) + ginkgo_set_test_target_properties(${test_name} ${test_target_name}) + # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test. + if (MKL_ENV) + set_tests_properties(${test_target_name} PROPERTIES ENVIRONMENT "${MKL_ENV}") + endif() +endfunction(ginkgo_create_dpcpp_test) + function(ginkgo_create_thread_test test_name) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) - file(RELATIVE_PATH REL_BINARY_DIR - ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) - string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") - add_executable(${TEST_TARGET_NAME} ${test_name}.cpp) - target_compile_features("${TEST_TARGET_NAME}" PUBLIC cxx_std_14) - target_include_directories("${TEST_TARGET_NAME}" - PRIVATE - "$" - ) - set_target_properties(${TEST_TARGET_NAME} PROPERTIES - OUTPUT_NAME ${test_name}) - if (GINKGO_CHECK_CIRCULAR_DEPS) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") - endif() - target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest Threads::Threads ${ARGN}) - add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${TEST_TARGET_NAME}) + ginkgo_build_test_name(${test_name} test_target_name) + add_executable(${test_target_name} ${test_name}.cpp) + target_compile_features(${test_target_name} PUBLIC cxx_std_14) + target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) + target_link_libraries(${test_target_name} PRIVATE Threads::Threads ${ARGN}) + ginkgo_set_test_target_properties(${test_name} ${test_target_name}) endfunction(ginkgo_create_thread_test) function(ginkgo_create_test_cpp_cuda_header test_name) - file(RELATIVE_PATH REL_BINARY_DIR - ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) - string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") - add_executable(${TEST_TARGET_NAME} ${test_name}.cpp) - target_compile_features("${TEST_TARGET_NAME}" PUBLIC cxx_std_14) - target_include_directories("${TEST_TARGET_NAME}" - PRIVATE - "$" - "${CUDA_INCLUDE_DIRS}" - ) - set_target_properties(${TEST_TARGET_NAME} PROPERTIES - OUTPUT_NAME ${test_name}) - if (GINKGO_CHECK_CIRCULAR_DEPS) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") - endif() - target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest ${ARGN}) - add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${TEST_TARGET_NAME}) + ginkgo_build_test_name(${test_name} test_target_name) + add_executable(${test_target_name} ${test_name}.cpp) + target_compile_features(${test_target_name} PUBLIC cxx_std_14) + target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) + target_include_directories(${test_target_name} PRIVATE "${CUDA_INCLUDE_DIRS}") + target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + ginkgo_set_test_target_properties(${test_name} ${test_target_name}) endfunction(ginkgo_create_test_cpp_cuda_header) function(ginkgo_create_cuda_test test_name) - file(RELATIVE_PATH REL_BINARY_DIR - ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) - string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") - add_executable(${TEST_TARGET_NAME} ${test_name}.cu) - target_compile_features("${TEST_TARGET_NAME}" PUBLIC cxx_std_14) - target_include_directories("${TEST_TARGET_NAME}" - PRIVATE - "$" - ) - cas_target_cuda_architectures(${TEST_TARGET_NAME} - ARCHITECTURES ${GINKGO_CUDA_ARCHITECTURES} - UNSUPPORTED "20" "21") - set_target_properties(${TEST_TARGET_NAME} PROPERTIES - OUTPUT_NAME ${test_name}) - - if (GINKGO_CHECK_CIRCULAR_DEPS) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") - endif() - target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest ${ARGN}) - add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${TEST_TARGET_NAME}) -endfunction(ginkgo_create_cuda_test) - -function(ginkgo_create_hip_test_special_linkage test_name) - # use gcc to compile but use hip to link - file(RELATIVE_PATH REL_BINARY_DIR - ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) - string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") - add_executable(${TEST_TARGET_NAME} ${test_name}.cpp) - target_compile_features("${TEST_TARGET_NAME}" PUBLIC cxx_std_14) - # Fix the missing metadata when building static library. - if(GINKGO_HIP_PLATFORM MATCHES "hcc" AND NOT BUILD_SHARED_LIBS) - set_target_properties(${TEST_TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP) + ginkgo_build_test_name(${test_name} test_target_name) + add_executable(${test_target_name} ${test_name}.cu) + target_compile_features(${test_target_name} PUBLIC cxx_std_14) + target_compile_options(${test_target_name} + PRIVATE "$<$:${GINKGO_CUDA_ARCH_FLAGS}>") + if(MSVC) + target_compile_options(${test_target_name} + PRIVATE + $<$:--extended-lambda --expt-relaxed-constexpr>) + elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + target_compile_options(${test_target_name} + PRIVATE + $<$:--expt-extended-lambda --expt-relaxed-constexpr>) endif() - target_include_directories("${TEST_TARGET_NAME}" - PRIVATE - "$" - ) - set_target_properties(${TEST_TARGET_NAME} PROPERTIES - OUTPUT_NAME ${test_name}) - if (GINKGO_CHECK_CIRCULAR_DEPS) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") + target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + # we handle CUDA architecture flags for now, disable CMake handling + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) + set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF) endif() - target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest ${ARGN}) - add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${TEST_TARGET_NAME}) -endfunction(ginkgo_create_hip_test_special_linkage) + ginkgo_set_test_target_properties(${test_name} ${test_target_name}) +endfunction(ginkgo_create_cuda_test) function(ginkgo_create_hip_test test_name) - file(RELATIVE_PATH REL_BINARY_DIR - ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) - string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") - +ginkgo_build_test_name(${test_name} test_target_name) set_source_files_properties(${test_name}.hip.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) + set(GINKGO_TEST_HIP_DEFINES) + if (GINKGO_FAST_TESTS) + set(GINKGO_TEST_HIP_DEFINES -DGINKGO_FAST_TESTS) + endif() - if (HIP_VERSION GREATER_EQUAL "3.5") - hip_add_executable(${TEST_TARGET_NAME} ${test_name}.hip.cpp - HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} + # NOTE: With how HIP works, passing the flags `HIPCC_OPTIONS` etc. here + # creates a redefinition of all flags. This creates some issues with `nvcc`, + # but `clang` seems fine with the redefinitions. + if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") + hip_add_executable(${test_target_name} ${test_name}.hip.cpp + # If `FindHIP.cmake`, namely `HIP_PARSE_HIPCC_OPTIONS` macro and + # call gets fixed, uncomment this. + HIPCC_OPTIONS ${GINKGO_TEST_HIP_DEFINES} # ${GINKGO_HIPCC_OPTIONS} + # NVCC_OPTIONS ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_NVCC_OPTIONS} + # CLANG_OPTIONS ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_CLANG_OPTIONS} + --expt-relaxed-constexpr --expt-extended-lambda + ) + else() # hcc/clang + hip_add_executable(${test_target_name} ${test_name}.hip.cpp + HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} ${GINKGO_TEST_HIP_DEFINES} NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} - HCC_OPTIONS ${GINKGO_HIP_HCC_OPTIONS} - CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS}) - else() - hip_add_executable(${TEST_TARGET_NAME} ${test_name}.hip.cpp - HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} - NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} - HCC_OPTIONS ${GINKGO_HIP_HCC_OPTIONS}) + CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS} + ) endif() - target_compile_features("${TEST_TARGET_NAME}" PUBLIC cxx_std_14) - # Let's really not use nvcc for linking here - if (GINKGO_HIP_PLATFORM MATCHES "nvcc") - set_target_properties(${TEST_TARGET_NAME} PROPERTIES LINKER_LANGUAGE CXX) - endif() + # Let's use a normal compiler for linking + set_target_properties(${test_target_name} PROPERTIES LINKER_LANGUAGE CXX) - target_include_directories("${TEST_TARGET_NAME}" + target_include_directories(${test_target_name} PRIVATE - "$" # Only `math` requires it so far, but it's much easier # to put these this way. ${GINKGO_HIP_THRUST_PATH} - # Only `exception_helpers` requires thess so far, but it's much easier + # Only `exception_helpers` requires these so far, but it's much easier # to put these this way. ${HIPBLAS_INCLUDE_DIRS} + ${hiprand_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS} ) - set_target_properties(${TEST_TARGET_NAME} PROPERTIES - OUTPUT_NAME ${test_name}) + target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + ginkgo_set_test_target_properties(${test_name} ${test_target_name}) +endfunction(ginkgo_create_hip_test) - # Pass in the `--amdgpu-target` flags if asked - if(GINKGO_HIP_AMDGPU AND GINKGO_HIP_PLATFORM MATCHES "hcc") - foreach(target ${GINKGO_HIP_AMDGPU}) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE --amdgpu-target=${target}) - endforeach() +function(ginkgo_create_common_test test_name) + set(executors) + if(GINKGO_BUILD_OMP) + list(APPEND executors omp) endif() - - # GINKGO_RPATH_FOR_HIP needs to be populated before calling this for the linker to include - # our libraries path into the executable's runpath. - if(BUILD_SHARED_LIBS) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_RPATH_FOR_HIP}") - - if (GINKGO_CHECK_CIRCULAR_DEPS) - target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}") - endif() + if(GINKGO_BUILD_HIP) + list(APPEND executors hip) endif() - - target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest ${ARGN}) - add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${TEST_TARGET_NAME}) -endfunction(ginkgo_create_hip_test) + if(GINKGO_BUILD_CUDA) + list(APPEND executors cuda) + endif() + if(GINKGO_BUILD_DPCPP) + list(APPEND executors dpcpp) + endif() + foreach(exec ${executors}) + ginkgo_build_test_name(${test_name} test_target_name) + # build executor typename out of shorthand + string(SUBSTRING ${exec} 0 1 exec_initial) + string(SUBSTRING ${exec} 1 -1 exec_tail) + string(TOUPPER ${exec_initial} exec_initial) + set(exec_type ${exec_initial}${exec_tail}Executor) + # set up actual test + set(test_target_name ${test_target_name}_${exec}) + add_executable(${test_target_name} ${test_name}.cpp) + target_compile_features(${test_target_name} PUBLIC cxx_std_14) + target_compile_options(${test_target_name} PRIVATE ${GINKGO_COMPILER_FLAGS}) + target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=${exec_type} EXEC_NAMESPACE=${exec}) + target_link_libraries(${test_target_name} PRIVATE ${ARGN}) + # use float for DPC++ if necessary + if((exec STREQUAL "dpcpp") AND GINKGO_DPCPP_SINGLE_MODE) + target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1) + endif() + ginkgo_set_test_target_properties(${test_name}_${exec} ${test_target_name}) + endforeach() +endfunction(ginkgo_create_common_test) diff --git a/cmake/get_info.cmake b/cmake/get_info.cmake index 0860d5c03e9..99c387d30d3 100644 --- a/cmake/get_info.cmake +++ b/cmake/get_info.cmake @@ -1,5 +1,5 @@ -SET(detailed_log "${CMAKE_BINARY_DIR}/detailed.log") -SET(minimal_log "${CMAKE_BINARY_DIR}/minimal.log") +SET(detailed_log "${PROJECT_BINARY_DIR}/detailed.log") +SET(minimal_log "${PROJECT_BINARY_DIR}/minimal.log") FILE(REMOVE ${detailed_log} ${minimal_log}) MACRO(_both) @@ -72,6 +72,27 @@ function(ginkgo_print_variable log_type var_name) FILE(APPEND ${log_type} "${upd_string}") endfunction() + +function(ginkgo_print_env_variable log_type var_name) + string(SUBSTRING + " +-- ${var_name}: " 0 55 upd_string) + if(DEFINED ENV{${var_name}}) + set(str_value "$ENV{${var_name}}") + else() + set(str_value "") + endif() + string(APPEND upd_string "${str_value}") + FILE(APPEND ${log_type} "${upd_string}") +endfunction() + + +macro(ginkgo_print_foreach_variable log_type) + foreach(var ${ARGN}) + ginkgo_print_variable(${log_type} ${var} ) + endforeach() +endmacro() + IF("${GINKGO_GIT_SHORTREV}" STREQUAL "") set(to_print "Summary of Configuration for Ginkgo (version ${Ginkgo_VERSION} with tag ${Ginkgo_VERSION_TAG}) --" @@ -79,7 +100,7 @@ IF("${GINKGO_GIT_SHORTREV}" STREQUAL "") ginkgo_print_generic_header(${detailed_log} "${to_print}") ginkgo_print_generic_header(${minimal_log} "${to_print}") ELSE() - set(to_print "Summary of Configuration for (Ginkgo version ${Ginkgo_VERSION} with tag ${Ginkgo_VERSION_TAG}, shortrev ${GINKGO_GIT_SHORTREV})" + set(to_print "Summary of Configuration for Ginkgo (version ${Ginkgo_VERSION} with tag ${Ginkgo_VERSION_TAG}, shortrev ${GINKGO_GIT_SHORTREV})" ) ginkgo_print_generic_header(${detailed_log} "${to_print}") ginkgo_print_generic_header(${minimal_log} "${to_print}") @@ -88,12 +109,9 @@ ENDIF() set(log_types "detailed_log;minimal_log") foreach(log_type ${log_types}) ginkgo_print_module_footer(${${log_type}} "Ginkgo configuration:") - set(print_var - "CMAKE_BUILD_TYPE;BUILD_SHARED_LIBS;CMAKE_INSTALL_PREFIX;CMAKE_SOURCE_DIR;CMAKE_BINARY_DIR" - ) - foreach(var ${print_var}) - ginkgo_print_variable(${${log_type}} ${var} ) - endforeach() + ginkgo_print_foreach_variable(${${log_type}} + "CMAKE_BUILD_TYPE;BUILD_SHARED_LIBS;CMAKE_INSTALL_PREFIX" + "PROJECT_SOURCE_DIR;PROJECT_BINARY_DIR") string(SUBSTRING " -- CMAKE_CXX_COMPILER: " 0 55 print_string) @@ -108,31 +126,16 @@ foreach(log_type ${log_types}) FILE(APPEND ${${log_type}} "${print_string}") ginkgo_print_module_footer(${${log_type}} "User configuration:") ginkgo_print_module_footer(${${log_type}} " Enabled modules:") - set(print_var - "GINKGO_BUILD_OMP;GINKGO_BUILD_REFERENCE;GINKGO_BUILD_CUDA;GINKGO_BUILD_HIP" - ) - foreach(var ${print_var}) - ginkgo_print_variable(${${log_type}} ${var} ) - endforeach() + ginkgo_print_foreach_variable(${${log_type}} + "GINKGO_BUILD_OMP;GINKGO_BUILD_REFERENCE;GINKGO_BUILD_CUDA;GINKGO_BUILD_HIP;GINKGO_BUILD_DPCPP") + ginkgo_print_module_footer(${${log_type}} " Enabled features:") + ginkgo_print_foreach_variable(${${log_type}} + "GINKGO_MIXED_PRECISION") ginkgo_print_module_footer(${${log_type}} " Tests, benchmarks and examples:") - set(print_var - "GINKGO_BUILD_TESTS;GINKGO_BUILD_EXAMPLES;GINKGO_EXTLIB_EXAMPLE;GINKGO_BUILD_BENCHMARKS") - foreach(var ${print_var}) - ginkgo_print_variable(${${log_type}} ${var} ) - endforeach() + ginkgo_print_foreach_variable(${${log_type}} + "GINKGO_BUILD_TESTS;GINKGO_FAST_TESTS;GINKGO_BUILD_EXAMPLES;GINKGO_EXTLIB_EXAMPLE;GINKGO_BUILD_BENCHMARKS;GINKGO_BENCHMARK_ENABLE_TUNING") ginkgo_print_module_footer(${${log_type}} " Documentation:") - set(print_var - "GINKGO_BUILD_DOC;GINKGO_VERBOSE_LEVEL") - foreach(var ${print_var}) - ginkgo_print_variable(${${log_type}} ${var} ) - endforeach() - ginkgo_print_module_footer(${${log_type}} " Developer helpers:") - set(print_var - "GINKGO_DEVEL_TOOLS;GINKGO_WITH_CLANG_TIDY;GINKGO_WITH_IWYU" - "GINKGO_CHECK_CIRCULAR_DEPS;GINKGO_CHECK_PATH") - foreach(var ${print_var}) - ginkgo_print_variable(${${log_type}} ${var} ) - endforeach() + ginkgo_print_foreach_variable(${${log_type}} "GINKGO_BUILD_DOC;GINKGO_VERBOSE_LEVEL") ginkgo_print_module_footer(${${log_type}} "") endforeach() @@ -160,18 +163,44 @@ IF(GINKGO_BUILD_HIP) include(hip/get_info.cmake) ENDIF() -ginkgo_print_generic_header(${detailed_log} "Optional Components:") -ginkgo_print_variable(${detailed_log} "GKO_HAVE_PAPI_SDE") +IF(GINKGO_BUILD_DPCPP) + include(dpcpp/get_info.cmake) +ENDIF() + +ginkgo_print_generic_header(${minimal_log} " Developer Tools:") +ginkgo_print_generic_header(${detailed_log} " Developer Tools:") +ginkgo_print_foreach_variable(${minimal_log} + "GINKGO_DEVEL_TOOLS;GINKGO_WITH_CLANG_TIDY;GINKGO_WITH_IWYU" + "GINKGO_CHECK_CIRCULAR_DEPS;GINKGO_WITH_CCACHE") +ginkgo_print_foreach_variable(${detailed_log} + "GINKGO_DEVEL_TOOLS;GINKGO_WITH_CLANG_TIDY;GINKGO_WITH_IWYU" + "GINKGO_CHECK_CIRCULAR_DEPS;GINKGO_WITH_CCACHE") +ginkgo_print_module_footer(${detailed_log} " CCACHE:") +ginkgo_print_variable(${detailed_log} "CCACHE_PROGRAM") +ginkgo_print_env_variable(${detailed_log} "CCACHE_DIR") +ginkgo_print_env_variable(${detailed_log} "CCACHE_MAXSIZE") +ginkgo_print_module_footer(${detailed_log} " PATH of other tools:") +ginkgo_print_variable(${detailed_log} "GINKGO_CLANG_TIDY_PATH") +ginkgo_print_variable(${detailed_log} "GINKGO_IWYU_PATH") +ginkgo_print_module_footer(${detailed_log} "") + +ginkgo_print_generic_header(${minimal_log} " Components:") +ginkgo_print_generic_header(${detailed_log} " Components:") if(PAPI_sde_FOUND) ginkgo_print_variable(${detailed_log} "PAPI_VERSION") ginkgo_print_variable(${detailed_log} "PAPI_INCLUDE_DIR") ginkgo_print_flags(${detailed_log} "PAPI_LIBRARY") endif() +ginkgo_print_variable(${minimal_log} "GINKGO_BUILD_HWLOC") +ginkgo_print_variable(${detailed_log} "GINKGO_BUILD_HWLOC") +ginkgo_print_variable(${detailed_log} "HWLOC_VERSION") +ginkgo_print_variable(${detailed_log} "HWLOC_LIBRARIES") +ginkgo_print_variable(${detailed_log} "HWLOC_INCLUDE_DIRS") _minimal( - "\n + " --\n-- Detailed information (More compiler flags, module configuration) can be found in detailed.log - ") +-- ") _both( "\n--\n-- Now, run cmake --build . to compile Ginkgo!\n" ) diff --git a/cmake/hip_helpers.cmake b/cmake/hip_helpers.cmake deleted file mode 100644 index c296ffc1228..00000000000 --- a/cmake/hip_helpers.cmake +++ /dev/null @@ -1,30 +0,0 @@ -macro(ginkgo_hip_ban_link_hcflag target) - if(TARGET ${target}) - get_target_property(GINKGO_TARGET_ILL ${target} INTERFACE_LINK_LIBRARIES) - string(REPLACE "-hc " "" GINKGO_TARGET_NEW_ILL "${GINKGO_TARGET_ILL}") - set_target_properties(${target} PROPERTIES INTERFACE_LINK_LIBRARIES "${GINKGO_TARGET_NEW_ILL}") - endif() -endmacro() - -macro(ginkgo_hip_ban_compile_hcflag target) - if(TARGET ${target}) - get_target_property(GINKGO_TARGET_ILL ${target} INTERFACE_COMPILE_OPTIONS) - string(REPLACE "-hc" "" GINKGO_TARGET_NEW_ILL "${GINKGO_TARGET_ILL}") - set_target_properties(${target} PROPERTIES INTERFACE_COMPILE_OPTIONS "${GINKGO_TARGET_NEW_ILL}") - endif() -endmacro() - -macro(ginkgo_hip_clang_ban_hip_device_flags) - if (GINKGO_HIP_VERSION VERSION_GREATER_EQUAL "3.5") - # Compile options somehow add hip-clang specific flags. Wipe them. - # Currently, the flags wiped out should be: - # -x;hip;--hip-device-lib-path=/opt/rocm/lib;--cuda-gpu-arch=gfx900; - # --cuda-gpu-arch=gfx906 - set_target_properties(hip::device PROPERTIES INTERFACE_COMPILE_OPTIONS "") - # In addition, link libraries have a similar problem. We only keep - # `hip::host`. Currently, the flags should be: - # hip::host;--hip-device-lib-path=/opt/rocm/lib;--hip-link; - # --cuda-gpu-arch=gfx900;--cuda-gpu-arch=gfx906 - set_target_properties(hip::device PROPERTIES INTERFACE_LINK_LIBRARIES "hip::host") - endif() -endmacro() diff --git a/cmake/information_helpers.cmake b/cmake/information_helpers.cmake index e128fb5869a..e4f9033be50 100644 --- a/cmake/information_helpers.cmake +++ b/cmake/information_helpers.cmake @@ -1,11 +1,11 @@ macro(ginkgo_interface_information) get_target_property(GINKGO_INTERFACE_LINK_LIBRARIES ginkgo INTERFACE_LINK_LIBRARIES) - set(GINKGO_INTERFACE_LINK_FLAGS "-L${CMAKE_INSTALL_PREFIX}/${GINKGO_INSTALL_LIBRARY_DIR} -lginkgo") - set(GINKGO_INTERFACE_CXX_FLAGS "-I${CMAKE_INSTALL_PREFIX}/${GINKGO_INSTALL_INCLUDE_DIR}") + set(GINKGO_INTERFACE_LINK_FLAGS "-L${CMAKE_INSTALL_PREFIX}/${GINKGO_INSTALL_LIBRARY_DIR} -lginkgo") + set(GINKGO_INTERFACE_CXX_FLAGS "-I${CMAKE_INSTALL_PREFIX}/${GINKGO_INSTALL_INCLUDE_DIR}") - foreach(_libs IN LISTS GINKGO_INTERFACE_LINK_LIBRARIES) - set(GINKGO_INTERFACE_LINK_FLAGS "${GINKGO_INTERFACE_LINK_FLAGS} -l${_libs}") - endforeach() + foreach(_libs IN LISTS GINKGO_INTERFACE_LINK_LIBRARIES) + set(GINKGO_INTERFACE_LINK_FLAGS "${GINKGO_INTERFACE_LINK_FLAGS} -l${_libs}") + endforeach() endmacro(ginkgo_interface_information) macro(ginkgo_git_information) diff --git a/cmake/install_helpers.cmake b/cmake/install_helpers.cmake index ba7ea3fd468..af5bf5b18b8 100644 --- a/cmake/install_helpers.cmake +++ b/cmake/install_helpers.cmake @@ -2,28 +2,69 @@ include(CMakePackageConfigHelpers) include(GNUInstallDirs) -set(GINKGO_INSTALL_INCLUDE_DIR "include") -set(GINKGO_INSTALL_LIBRARY_DIR "lib") -set(GINKGO_INSTALL_PKGCONFIG_DIR "lib/pkgconfig") -set(GINKGO_INSTALL_CONFIG_DIR "lib/cmake/Ginkgo") -set(GINKGO_INSTALL_MODULE_DIR "lib/cmake/Ginkgo/Modules") +set(GINKGO_INSTALL_INCLUDE_DIR "${CMAKE_INSTALL_INCLUDEDIR}") +set(GINKGO_INSTALL_LIBRARY_DIR "${CMAKE_INSTALL_LIBDIR}") +set(GINKGO_INSTALL_RUNTIME_DIR "${CMAKE_INSTALL_BINDIR}") +set(GINKGO_INSTALL_PKGCONFIG_DIR "${CMAKE_INSTALL_LIBDIR}/pkgconfig") +set(GINKGO_INSTALL_CONFIG_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/Ginkgo") +set(GINKGO_INSTALL_MODULE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/Ginkgo/Modules") + +# This function adds the correct RPATH properties to a Ginkgo target. +# +# The behavior depends on three options GINKGO_INSTALL_RPATH[*] variables. It +# does the following: +# +# 1. GINKGO_INSTALL_RPATH : If this flag is not set, no RPATH information is +# added. +# 2. GINKGO_INSTALL_RPATH_ORIGIN : Allows adding the library directory to the +# RPATH. +# 3. GINKGO_INSTALL_RPATH_DEPENDENCIES : Allows adding any extra paths to the +# RPATH. +# +# @param name the name of the target +# @param ARGN any external dependencies path to be added +function(ginkgo_add_install_rpath name) + if (GINKGO_INSTALL_RPATH_ORIGIN) + if (APPLE) + set(ORIGIN_OR_LOADER_PATH "@loader_path") + else() + set(ORIGIN_OR_LOADER_PATH "$ORIGIN") + endif() + endif() + if (GINKGO_INSTALL_RPATH_DEPENDENCIES) + set(RPATH_DEPENDENCIES "${ARGN}") + if(GINKGO_HAVE_HWLOC AND HWLOC_FOUND) + get_filename_component(HWLOC_LIB_PATH ${HWLOC_LIBRARIES} DIRECTORY) + list(APPEND RPATH_DEPENDENCIES "${HWLOC_LIBRARIES}") + endif() + endif() + if (GINKGO_INSTALL_RPATH) + set_property(TARGET "${name}" PROPERTY INSTALL_RPATH + "${ORIGIN_OR_LOADER_PATH}" "${RPATH_DEPENDENCIES}") + endif() +endfunction() + +# Handles installation settings for a Ginkgo library. +# +# @param name the name of the Ginkgo library target +# @param ARGN this should contain any external dependency's library PATH +function(ginkgo_install_library name) + ginkgo_add_install_rpath("${name}" "${ARGN}") -function(ginkgo_install_library name subdir) - if (WIN32 OR CYGWIN) # dll is considered as runtime install(TARGETS "${name}" EXPORT Ginkgo - LIBRARY DESTINATION ${GINKGO_INSTALL_LIBRARY_DIR} - ARCHIVE DESTINATION ${GINKGO_INSTALL_LIBRARY_DIR} - RUNTIME DESTINATION ${GINKGO_INSTALL_LIBRARY_DIR} + LIBRARY DESTINATION "${GINKGO_INSTALL_LIBRARY_DIR}" + ARCHIVE DESTINATION "${GINKGO_INSTALL_LIBRARY_DIR}" + RUNTIME DESTINATION "${GINKGO_INSTALL_RUNTIME_DIR}" ) else () # install .so and .a files install(TARGETS "${name}" EXPORT Ginkgo - LIBRARY DESTINATION ${GINKGO_INSTALL_LIBRARY_DIR} - ARCHIVE DESTINATION ${GINKGO_INSTALL_LIBRARY_DIR} + LIBRARY DESTINATION "${GINKGO_INSTALL_LIBRARY_DIR}" + ARCHIVE DESTINATION "${GINKGO_INSTALL_LIBRARY_DIR}" ) endif () endfunction() @@ -37,19 +78,36 @@ function(ginkgo_install) DESTINATION "${GINKGO_INSTALL_INCLUDE_DIR}" FILES_MATCHING PATTERN "*.hpp" ) - install(DIRECTORY "${Ginkgo_BINARY_DIR}/include/" - DESTINATION "${GINKGO_INSTALL_INCLUDE_DIR}" - FILES_MATCHING PATTERN "*.hpp" + install(FILES "${Ginkgo_BINARY_DIR}/include/ginkgo/config.hpp" + DESTINATION "${GINKGO_INSTALL_INCLUDE_DIR}/ginkgo" ) if (GINKGO_HAVE_PAPI_SDE) install(FILES "${Ginkgo_SOURCE_DIR}/third_party/papi_sde/papi_sde_interface.h" DESTINATION "${GINKGO_INSTALL_INCLUDE_DIR}/third_party/papi_sde" ) - install(FILES "${Ginkgo_SOURCE_DIR}/cmake/Modules/FindPAPI.cmake" - DESTINATION "${GINKGO_INSTALL_MODULE_DIR}/" + endif() + + if (GINKGO_HAVE_HWLOC AND NOT HWLOC_FOUND) + get_filename_component(HWLOC_LIB_PATH ${HWLOC_LIBRARIES} DIRECTORY) + file(GLOB HWLOC_LIBS "${HWLOC_LIB_PATH}/libhwloc*") + install(FILES ${HWLOC_LIBS} + DESTINATION "${GINKGO_INSTALL_LIBRARY_DIR}" + ) + # We only use hwloc and not netloc + install(DIRECTORY "${HWLOC_INCLUDE_DIRS}/hwloc" + DESTINATION "${GINKGO_INSTALL_INCLUDE_DIR}" + ) + install(FILES "${HWLOC_INCLUDE_DIRS}/hwloc.h" + DESTINATION "${GINKGO_INSTALL_INCLUDE_DIR}" ) endif() + # Install CMake modules + install(DIRECTORY "${Ginkgo_SOURCE_DIR}/cmake/Modules" + DESTINATION "${GINKGO_INSTALL_MODULE_DIR}" + FILES_MATCHING PATTERN "*.cmake" + ) + # export targets export(EXPORT Ginkgo NAMESPACE Ginkgo:: @@ -60,7 +118,7 @@ function(ginkgo_install) write_basic_package_version_file( "${Ginkgo_BINARY_DIR}/GinkgoConfigVersion.cmake" VERSION "${PROJECT_VERSION}" - COMPATIBILITY AnyNewerVersion + COMPATIBILITY SameMajorVersion ) configure_package_config_file( "${Ginkgo_SOURCE_DIR}/cmake/GinkgoConfig.cmake.in" @@ -70,11 +128,9 @@ function(ginkgo_install) install(FILES "${Ginkgo_BINARY_DIR}/GinkgoConfig.cmake" "${Ginkgo_BINARY_DIR}/GinkgoConfigVersion.cmake" - "${Ginkgo_SOURCE_DIR}/cmake/hip_helpers.cmake" - "${Ginkgo_SOURCE_DIR}/cmake/windows_helpers.cmake" DESTINATION "${GINKGO_INSTALL_CONFIG_DIR}" ) - install(EXPORT Ginkgo + install(EXPORT Ginkgo NAMESPACE Ginkgo:: FILE GinkgoTargets.cmake DESTINATION "${GINKGO_INSTALL_CONFIG_DIR}") diff --git a/cmake/package_helpers.cmake b/cmake/package_helpers.cmake index 74208b86e09..e1d196ad553 100644 --- a/cmake/package_helpers.cmake +++ b/cmake/package_helpers.cmake @@ -1,184 +1,59 @@ -set(PACKAGE_DOWNLOADER_SCRIPT - "${CMAKE_CURRENT_LIST_DIR}/DownloadCMakeLists.txt.in") +set(NON_CMAKE_PACKAGE_DOWNLOADER_SCRIPT + "${CMAKE_CURRENT_LIST_DIR}/DownloadNonCMakeCMakeLists.txt.in") -function(ginkgo_load_git_package package_name package_url package_tag) + +# Load a package from the url provided and run configure (Non-CMake projects) +# +# \param package_name Name of the package +# \param package_url Url of the package +# \param package_tag Tag or version of the package to be downloaded. +# \param config_command The command for the configuration step. +# +function(ginkgo_load_and_configure_package package_name package_url package_hash config_command) set(GINKGO_THIRD_PARTY_BUILD_TYPE "Debug") if (CMAKE_BUILD_TYPE MATCHES "[Rr][Ee][Ll][Ee][Aa][Ss][Ee]") set(GINKGO_THIRD_PARTY_BUILD_TYPE "Release") endif() - configure_file(${PACKAGE_DOWNLOADER_SCRIPT} - download/CMakeLists.txt) - execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . + configure_file(${NON_CMAKE_PACKAGE_DOWNLOADER_SCRIPT} + download/CMakeLists.txt) + set(TOOLSET "") + if (NOT "${CMAKE_GENERATOR_TOOLSET}" STREQUAL "") + set(TOOLSET "-T${CMAKE_GENERATOR_TOOLSET}") + endif() + execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" "${TOOLSET}" . RESULT_VARIABLE result WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/download) if(result) message(FATAL_ERROR "CMake step for ${package_name}/download failed: ${result}") + return() endif() - if(MSVC) - # MSVC decides the build_type in build step not cmake step, so Ginkgo builds Debug and Release type. - execute_process(COMMAND ${CMAKE_COMMAND} --build . --config Debug - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/download) - if(result) - message(FATAL_ERROR - "Build Debug step for ${package_name}/download failed: ${result}") - endif() - execute_process(COMMAND ${CMAKE_COMMAND} --build . --config Release - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/download) - if(result) - message(FATAL_ERROR - "Build Release step for ${package_name}/download failed: ${result}") - endif() - else() - execute_process(COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/download) - if(result) - message(FATAL_ERROR - "Build step for ${package_name}/download failed: ${result}") - endif() + execute_process(COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/download) + if(result) + message(FATAL_ERROR + "Build step for ${package_name}/download failed: ${result}") + return() endif() endfunction() -# Add external target to external project. -# Create a new target and declare it as `IMPORTED` for libraries or `INTERFACE` -# for header only projects. +# Download a file and verify the download # -# \param new_target New target for the external project -# \param external_name Name of the external project -# \param includedir Path to include directory -# \param libdir Path to library directory -# \param header_only Boolean indicating if this should be a header only target +# \param url The url of file to be downloaded +# \param filename The name of the file +# \param hash_type The type of hash, See CMake file() documentation for more details. +# \param hash The hash itself, See CMake file() documentation for more details. # -macro(ginkgo_add_tpl_target new_target external_name includedir libdir header_only) - # Declare include directories and library files - set(${external_name}_INCLUDE_DIR "${includedir}") - set(${external_name}_LIBRARY "${libdir}") - - # Create an IMPORTED external library available in the GLOBAL scope - if (${header_only}) - add_library(${new_target} INTERFACE) +function(ginkgo_download_file url filename hash_type hash) + file(DOWNLOAD ${url} ${filename} + TIMEOUT 60 # seconds + EXPECTED_HASH "${hash_type}=${hash}" + TLS_VERIFY ON) + if(EXISTS ${filename}) + message(STATUS "${filename} downloaded from ${url}") else() - add_library(${new_target} UNKNOWN IMPORTED GLOBAL) - endif() - - # Set the target's properties, namely library file and include directory - if (NOT ${header_only}) - foreach (lib in LISTS ${${external_name}_LIBRARY}) - set_target_properties(${new_target} PROPERTIES IMPORTED_LOCATION ${lib}) - endforeach() - endif() - foreach (inc in LISTS ${${external_name}_INCLUDE_DIR}) - set_target_properties(${new_target} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${inc}) - endforeach() -endmacro(ginkgo_add_tpl_target) - - -# Add external target to external project. -# Create a new target and declare it as `IMPORTED` for libraries or `INTERFACE` -# for header only projects. -# -# \param new_target New target for the external project -# \param external_name Name of the external project -# \param includedir Path to include directory -# \param libdir Path to library directory -# \param build_type Build type {STATIC, SHARED} -# \param debug_postfix The debug postfix to use when building in debug mode -# \param external Name of the external target -# \param header_only Boolean indicating if this should be a header only target -# -macro(ginkgo_add_external_target new_target external_name includedir libdir build_type debug_postfix external header_only) - # Declare include directories and library files - set(${external_name}_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${libdir}) - set(${external_name}_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/${includedir}") - if(MSVC) - # Ginkgo only builds Debug and Release, so set the path without CMAKE_CFG_INTDIR. - set(${external_name}_LIBRARY_RELEASE "${${external_name}_BINARY_DIR}/Release/${CMAKE_${build_type}_LIBRARY_PREFIX}${external_name}${CMAKE_${build_type}_LIBRARY_SUFFIX}") - set(${external_name}_LIBRARY_DEBUG "${${external_name}_BINARY_DIR}/Debug/${CMAKE_${build_type}_LIBRARY_PREFIX}${external_name}${debug_postfix}${CMAKE_${build_type}_LIBRARY_SUFFIX}") - else() - set(${external_name}_LIBRARY_RELEASE "${${external_name}_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${CMAKE_${build_type}_LIBRARY_PREFIX}${external_name}${CMAKE_${build_type}_LIBRARY_SUFFIX}") - set(${external_name}_LIBRARY_DEBUG "${${external_name}_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${CMAKE_${build_type}_LIBRARY_PREFIX}${external_name}${debug_postfix}${CMAKE_${build_type}_LIBRARY_SUFFIX}") - endif() - # Create an IMPORTED external library available in the GLOBAL scope - if (${header_only}) - add_library(${new_target} INTERFACE) - else() - add_library(${new_target} ${build_type} IMPORTED GLOBAL) - endif() - - # Set a dependency to the external target (ExternalProject fetcher and builder) - add_dependencies(${new_target} ${external}) - - # Set the target's properties, namely library file and include directory - if (NOT ${header_only}) - set_target_properties(${new_target} PROPERTIES IMPORTED_LOCATION_RELEASE ${${external_name}_LIBRARY_RELEASE}) - set_target_properties(${new_target} PROPERTIES IMPORTED_LOCATION_DEBUG ${${external_name}_LIBRARY_DEBUG}) - # Since we do not really manage other build types, let's globally use the DEBUG symbols - if(MSVC) - # Only Debug build uses MDd or MTd, and others use MD or MT. - # MSVC would like to use same runtime library, so we use Debug third-party in Debug and Release third-party in others. - set_target_properties(${new_target} PROPERTIES IMPORTED_LOCATION - ${${external_name}_LIBRARY_RELEASE}) - else() - if (NOT CMAKE_BUILD_TYPE MATCHES "[Rr][Ee][Ll][Ee][Aa][Ss][Ee]" - AND NOT CMAKE_BUILD_TYPE MATCHES "[Dd][Ee][Bb][Uu][Gg]") - set_target_properties(${new_target} PROPERTIES IMPORTED_LOCATION - ${${external_name}_LIBRARY_DEBUG}) - endif() - endif() - endif() - set_target_properties(${new_target} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${${external_name}_INCLUDE_DIR}) -endmacro(ginkgo_add_external_target) - - -# Ginkgo specific add_subdirectory helper macro. -# If the package was not found or if requested by the user, use the -# internal version of the package. -# -# \param package_name Name of package to be found -# \param dir_name Name of the subdirectory for the package -# -macro(ginkgo_add_subdirectory package_name dir_name) - if (NOT ${package_name}_FOUND) - add_subdirectory(${dir_name}) - endif() -endmacro(ginkgo_add_subdirectory) - - -# Ginkgo specific find_package helper macro. Use this macro for third -# party libraries. -# If the user does not specify otherwise, try to find the package. -# -# \param package_name Name of package to be found -# \param target_list For TPL packages, declare a new target for each library -# \param header_only For TPL packages, declare the tpl package as header only -# \param ARGN Extra specifications for the package finder -# -macro(ginkgo_find_package package_name target_list header_only) - string(TOUPPER ${package_name} _UPACKAGE_NAME) - if (GINKGO_USE_EXTERNAL_${_UPACKAGE_NAME} OR TPL_ENABLE_${_UPACKAGE_NAME}) - if (TPL_${_UPACKAGE_NAME}_LIBRARIES AND TPL_${_UPACKAGE_NAME}_INCLUDE_DIRS) - set(${package_name}_FOUND "${TPL_${_UPACKAGE_NAME}_LIBRARIES};${TPL_${_UPACKAGE_NAME}_INCLUDE_DIRS}") - set(_target_list ${target_list}) # CMake weirdness: target_list is not a list anymore - # Count the number of elements in the list. Substract by one to iterate from 0 to the end. - list(LENGTH _target_list _GKO_len1) - math(EXPR _GKO_len2 "${_GKO_len1} - 1") - foreach(val RANGE ${_GKO_len2}) - list(GET _target_list ${val} target) # access element number "val" in _target_list - list(GET TPL_${_UPACKAGE_NAME}_LIBRARIES ${val} lib) - ginkgo_add_tpl_target("${target}" "${_UPACKAGE_NAME}" "${TPL_${_UPACKAGE_NAME}_INCLUDE_DIRS}" - "${lib}" ${header_only}) - endforeach() - else() - find_package(${package_name} QUIET ${ARGN}) - if (${package_name}_FOUND) - message(STATUS "Using external version of package ${package_name}. In case of problems, consider setting -DGINKGO_USE_EXTERNAL_${_UPACKAGE_NAME}=OFF.") - else() - message(STATUS "Ginkgo could not find ${package_name}. The internal version will be used. Consider setting `-DCMAKE_PREFIX_PATH` if the package was not system-installed.") - endif() - endif() + message(FATAL_ERROR "Download of ${filename} failed.") endif() -endmacro(ginkgo_find_package) +endfunction(ginkgo_download_file) diff --git a/cmake/windows_helpers.cmake b/cmake/windows_helpers.cmake deleted file mode 100644 index 5f517a555ad..00000000000 --- a/cmake/windows_helpers.cmake +++ /dev/null @@ -1,22 +0,0 @@ -function(ginkgo_switch_windows_link lang from to) - foreach(flag_var - "CMAKE_${lang}_FLAGS" "CMAKE_${lang}_FLAGS_DEBUG" "CMAKE_${lang}_FLAGS_RELEASE" - "CMAKE_${lang}_FLAGS_MINSIZEREL" "CMAKE_${lang}_FLAGS_RELWITHDEBINFO" - ) - if(${flag_var} MATCHES "/${from}") - string(REGEX REPLACE "/${from}" "/${to}" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "/${from}") - if(${flag_var} MATCHES "-${from}") - string(REGEX REPLACE "-${from}" "-${to}" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "-${from}") - set(${flag_var} "${${flag_var}}" CACHE STRING "" FORCE) - endforeach() -endfunction() - -macro(ginkgo_switch_to_windows_static lang) - ginkgo_switch_windows_link(${lang} "MD" "MT") -endmacro() - -macro(ginkgo_switch_to_windows_dynamic lang) - ginkgo_switch_windows_link(${lang} "MT" "MD") -endmacro() diff --git a/common/base/executor.hpp.inc b/common/cuda_hip/base/executor.hpp.inc similarity index 98% rename from common/base/executor.hpp.inc rename to common/cuda_hip/base/executor.hpp.inc index 705404b314e..0903f6dad90 100644 --- a/common/base/executor.hpp.inc +++ b/common/cuda_hip/base/executor.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/base/math.hpp.inc b/common/cuda_hip/base/math.hpp.inc similarity index 93% rename from common/base/math.hpp.inc rename to common/cuda_hip/base/math.hpp.inc index 3ba49b585c3..b56a11ddfef 100644 --- a/common/base/math.hpp.inc +++ b/common/cuda_hip/base/math.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -54,10 +54,14 @@ struct is_complex_impl> : public std::integral_constant {}; +template +struct is_complex_or_scalar_impl> : std::is_scalar {}; + + template struct truncate_type_impl> { using type = thrust::complex::type>; }; -} // namespace detail \ No newline at end of file +} // namespace detail diff --git a/common/cuda_hip/components/absolute_array.hpp.inc b/common/cuda_hip/components/absolute_array.hpp.inc new file mode 100644 index 00000000000..8c82324175d --- /dev/null +++ b/common/cuda_hip/components/absolute_array.hpp.inc @@ -0,0 +1,61 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +namespace kernel { + + +template +__global__ + __launch_bounds__(default_block_size) void inplace_absolute_array_kernel( + const size_type n, ValueType *__restrict__ array) +{ + const auto tidx = thread::get_thread_id_flat(); + if (tidx < n) { + array[tidx] = abs(array[tidx]); + } +} + + +template +__global__ + __launch_bounds__(default_block_size) void outplace_absolute_array_kernel( + const size_type n, const ValueType *__restrict__ in, + remove_complex *__restrict__ out) +{ + const auto tidx = thread::get_thread_id_flat(); + if (tidx < n) { + out[tidx] = abs(in[tidx]); + } +} + + +} // namespace kernel diff --git a/common/components/atomic.hpp.inc b/common/cuda_hip/components/atomic.hpp.inc similarity index 64% rename from common/components/atomic.hpp.inc rename to common/cuda_hip/components/atomic.hpp.inc index 6ae0ac5e751..a1c514f8a55 100644 --- a/common/components/atomic.hpp.inc +++ b/common/cuda_hip/components/atomic.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -44,6 +44,14 @@ struct atomic_helper { "specializations are."); // TODO: add proper implementation of generic atomic add } + __forceinline__ __device__ static ValueType atomic_max(ValueType *, + ValueType) + { + static_assert(sizeof(ValueType) == 0, + "This default function is not implemented, only the " + "specializations are."); + // TODO: add proper implementation of generic atomic max + } }; @@ -64,6 +72,32 @@ __forceinline__ __device__ ResultType reinterpret(ValueType val) std::enable_if_t<(sizeof(ValueType) == sizeof(CONVERTER_TYPE))>> { \ __forceinline__ __device__ static ValueType atomic_add( \ ValueType *__restrict__ addr, ValueType val) \ + { \ + using c_type = CONVERTER_TYPE; \ + return atomic_wrapper(addr, [&val](c_type &old, c_type assumed, \ + c_type *c_addr) { \ + old = atomicCAS(c_addr, assumed, \ + reinterpret( \ + val + reinterpret(assumed))); \ + }); \ + } \ + __forceinline__ __device__ static ValueType atomic_max( \ + ValueType *__restrict__ addr, ValueType val) \ + { \ + using c_type = CONVERTER_TYPE; \ + return atomic_wrapper( \ + addr, [&val](c_type &old, c_type assumed, c_type *c_addr) { \ + if (reinterpret(assumed) < val) { \ + old = atomicCAS(c_addr, assumed, \ + reinterpret(val)); \ + } \ + }); \ + } \ + \ + private: \ + template \ + __forceinline__ __device__ static ValueType atomic_wrapper( \ + ValueType *__restrict__ addr, Callable set_old) \ { \ CONVERTER_TYPE *address_as_converter = \ reinterpret_cast(addr); \ @@ -71,17 +105,15 @@ __forceinline__ __device__ ResultType reinterpret(ValueType val) CONVERTER_TYPE assumed; \ do { \ assumed = old; \ - old = atomicCAS(address_as_converter, assumed, \ - reinterpret( \ - val + reinterpret(assumed))); \ + set_old(old, assumed, address_as_converter); \ } while (assumed != old); \ return reinterpret(old); \ } \ }; -// Support 64-bit ATOMIC_ADD +// Support 64-bit ATOMIC_ADD and ATOMIC_MAX GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned long long int); -// Support 32-bit ATOMIC_ADD +// Support 32-bit ATOMIC_ADD and ATOMIC_MAX GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned int); @@ -115,7 +147,6 @@ GKO_BIND_ATOMIC_ADD(unsigned int); GKO_BIND_ATOMIC_ADD(unsigned long long int); GKO_BIND_ATOMIC_ADD(float); - #if !defined(__HIPCC__) || \ (defined(__HIP_DEVICE_COMPILE__) && GINKGO_HIP_PLATFORM_NVCC) @@ -153,3 +184,37 @@ GKO_BIND_ATOMIC_ADD(__half2); #undef GKO_BIND_ATOMIC_ADD + +template +__forceinline__ __device__ T atomic_max(T *__restrict__ addr, T val) +{ + return detail::atomic_helper::atomic_max(addr, val); +} + + +#define GKO_BIND_ATOMIC_MAX(ValueType) \ + __forceinline__ __device__ ValueType atomic_max( \ + ValueType *__restrict__ addr, ValueType val) \ + { \ + return atomicMax(addr, val); \ + } + +GKO_BIND_ATOMIC_MAX(int); +GKO_BIND_ATOMIC_MAX(unsigned int); + +#if !defined(__HIPCC__) || \ + (defined(__HIP_DEVICE_COMPILE__) && GINKGO_HIP_PLATFORM_NVCC) + + +#if defined(__CUDA_ARCH__) && (350 <= __CUDA_ARCH__) +// Only Compute Capability 3.5 and higher supports 64-bit atomicMax +GKO_BIND_ATOMIC_MAX(unsigned long long int); +#endif + +#else // Is HIP platform & on AMD hardware +GKO_BIND_ATOMIC_MAX(unsigned long long int); +#endif // !defined(__HIPCC__) || (defined(__HIP_DEVICE_COMPILE__) && + // GINKGO_HIP_PLATFORM_HCC) + + +#undef GKO_BIND_ATOMIC_MAX diff --git a/common/components/diagonal_block_manipulation.hpp.inc b/common/cuda_hip/components/diagonal_block_manipulation.hpp.inc similarity index 98% rename from common/components/diagonal_block_manipulation.hpp.inc rename to common/cuda_hip/components/diagonal_block_manipulation.hpp.inc index a9a199e33c5..baf991550e8 100644 --- a/common/components/diagonal_block_manipulation.hpp.inc +++ b/common/cuda_hip/components/diagonal_block_manipulation.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/components/fill_array.hpp.inc b/common/cuda_hip/components/fill_array.hpp.inc similarity index 86% rename from common/components/fill_array.hpp.inc rename to common/cuda_hip/components/fill_array.hpp.inc index 04e6fe67b79..d33350636c3 100644 --- a/common/components/fill_array.hpp.inc +++ b/common/cuda_hip/components/fill_array.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,4 +45,15 @@ __global__ __launch_bounds__(default_block_size) void fill_array( } +template +__global__ __launch_bounds__(default_block_size) void fill_seq_array( + size_type n, ValueType *__restrict__ array) +{ + const auto tidx = thread::get_thread_id_flat(); + if (tidx < n) { + array[tidx] = tidx; + } +} + + } // namespace kernel diff --git a/common/components/intrinsics.hpp.inc b/common/cuda_hip/components/intrinsics.hpp.inc similarity index 98% rename from common/components/intrinsics.hpp.inc rename to common/cuda_hip/components/intrinsics.hpp.inc index f89fa434eb4..fa1581c4e84 100644 --- a/common/components/intrinsics.hpp.inc +++ b/common/cuda_hip/components/intrinsics.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/components/merging.hpp.inc b/common/cuda_hip/components/merging.hpp.inc similarity index 99% rename from common/components/merging.hpp.inc rename to common/cuda_hip/components/merging.hpp.inc index c91f76e1fd4..4ddd68b5beb 100644 --- a/common/components/merging.hpp.inc +++ b/common/cuda_hip/components/merging.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/components/prefix_sum.hpp.inc b/common/cuda_hip/components/prefix_sum.hpp.inc similarity index 98% rename from common/components/prefix_sum.hpp.inc rename to common/cuda_hip/components/prefix_sum.hpp.inc index 01612295379..1d57c20b2e5 100644 --- a/common/components/prefix_sum.hpp.inc +++ b/common/cuda_hip/components/prefix_sum.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -57,7 +57,7 @@ __forceinline__ __device__ void subwarp_prefix_sum(ValueType element, total_sum = element; #pragma unroll // hypercube prefix sum - for (auto step = 1; step < subwarp.size(); step *= 2) { + for (int step = 1; step < subwarp.size(); step *= 2) { auto neighbor = subwarp.shfl_xor(total_sum, step); total_sum += neighbor; prefix_sum += bool(subwarp.thread_rank() & step) ? neighbor : 0; diff --git a/common/components/reduction.hpp.inc b/common/cuda_hip/components/reduction.hpp.inc similarity index 79% rename from common/components/reduction.hpp.inc rename to common/cuda_hip/components/reduction.hpp.inc index ce35337a7ff..bbdaf1217ae 100644 --- a/common/components/reduction.hpp.inc +++ b/common/cuda_hip/components/reduction.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -128,6 +128,54 @@ __device__ void reduce(const Group &__restrict__ group, } +/** + * @internal + * + * Computes `num` reductions using the binary operation `reduce_op` on an + * entire block. + * The data range for the ith (i < num) reduction is: + * [data + i * stride, data + block_size) (block_size == group.size()) + * The `data` array for each reduction must be of size `block_size` and + * accessible from all threads. The `data` array is also + * used as work space (so its content will be destroyed in the process), as well + * as to store the return value - which is stored in the (i * stride)-th + * position of the array. + */ +template < + typename Group, typename ValueType, typename Operator, + typename = xstd::enable_if_t::value>> +__device__ void multireduce(const Group &__restrict__ group, + ValueType *__restrict__ data, size_type stride, + size_type num, Operator reduce_op = Operator{}) +{ + const auto local_id = group.thread_rank(); + + for (int k = group.size() / 2; k >= config::warp_size; k /= 2) { + group.sync(); + if (local_id < k) { + for (int j = 0; j < num; j++) { + data[j * stride + local_id] = + reduce_op(data[j * stride + local_id], + data[j * stride + local_id + k]); + } + } + } + + const auto warp = group::tiled_partition(group); + const auto warp_id = group.thread_rank() / warp.size(); + if (warp_id > 0) { + return; + } + for (int j = 0; j < num; j++) { + auto result = + reduce(warp, data[j * stride + warp.thread_rank()], reduce_op); + if (warp.thread_rank() == 0) { + data[j * stride] = result; + } + } +} + + /** * @internal * diff --git a/common/components/searching.hpp.inc b/common/cuda_hip/components/searching.hpp.inc similarity index 99% rename from common/components/searching.hpp.inc rename to common/cuda_hip/components/searching.hpp.inc index e7e558508f0..d731aae319c 100644 --- a/common/components/searching.hpp.inc +++ b/common/cuda_hip/components/searching.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/components/segment_scan.hpp.inc b/common/cuda_hip/components/segment_scan.hpp.inc similarity index 98% rename from common/components/segment_scan.hpp.inc rename to common/cuda_hip/components/segment_scan.hpp.inc index 9de87b6c702..f16b3794d4d 100644 --- a/common/components/segment_scan.hpp.inc +++ b/common/cuda_hip/components/segment_scan.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/components/sorting.hpp.inc b/common/cuda_hip/components/sorting.hpp.inc similarity index 97% rename from common/components/sorting.hpp.inc rename to common/cuda_hip/components/sorting.hpp.inc index 611b99c5538..cd772e08adb 100644 --- a/common/components/sorting.hpp.inc +++ b/common/cuda_hip/components/sorting.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -70,7 +70,7 @@ struct bitonic_local { bool reverse) { auto els_mid = els + (num_elements / 2); - for (auto i = 0; i < num_elements / 2; ++i) { + for (int i = 0; i < num_elements / 2; ++i) { bitonic_cas(els[i], els_mid[i], reverse); } half::merge(els, reverse); @@ -131,7 +131,7 @@ struct bitonic_warp { auto tile = group::tiled_partition(group::this_thread_block()); auto new_reverse = reverse != upper_half(); - for (auto i = 0; i < num_local; ++i) { + for (int i = 0; i < num_local; ++i) { auto other = tile.shfl_xor(els[i], num_threads / 2); bitonic_cas(els[i], other, new_reverse); } @@ -206,7 +206,7 @@ struct bitonic_global { auto upper_shared_els = shared_els + (num_groups * num_threads / 2); // only the lower group executes the CAS if (!upper_half()) { - for (auto i = 0; i < num_local; ++i) { + for (int i = 0; i < num_local; ++i) { auto j = shared_idx(i); bitonic_cas(shared_els[j], upper_shared_els[j], reverse); } @@ -241,11 +241,11 @@ struct bitonic_global { bool reverse) { group::this_thread_block().sync(); - for (auto i = 0; i < num_local; ++i) { + for (int i = 0; i < num_local; ++i) { local_els[i] = shared_els[shared_idx(i)]; } warp::merge(local_els, reverse); - for (auto i = 0; i < num_local; ++i) { + for (int i = 0; i < num_local; ++i) { shared_els[shared_idx(i)] = local_els[i]; } } @@ -258,7 +258,7 @@ struct bitonic_global { // This is the first step, so we don't need to load from shared memory warp::sort(local_els, reverse); // store the sorted elements in shared memory - for (auto i = 0; i < num_local; ++i) { + for (int i = 0; i < num_local; ++i) { shared_els[shared_idx(i)] = local_els[i]; } } diff --git a/common/components/thread_ids.hpp.inc b/common/cuda_hip/components/thread_ids.hpp.inc similarity index 99% rename from common/components/thread_ids.hpp.inc rename to common/cuda_hip/components/thread_ids.hpp.inc index 3a28dad5326..c148d11b2d4 100644 --- a/common/components/thread_ids.hpp.inc +++ b/common/cuda_hip/components/thread_ids.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/components/uninitialized_array.hpp.inc b/common/cuda_hip/components/uninitialized_array.hpp.inc similarity index 91% rename from common/components/uninitialized_array.hpp.inc rename to common/cuda_hip/components/uninitialized_array.hpp.inc index ced072c40f4..e951cf06860 100644 --- a/common/components/uninitialized_array.hpp.inc +++ b/common/cuda_hip/components/uninitialized_array.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,7 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /** * Stores an array with uninitialized contents. * - * This class needed for datatypes that do have a non-empty constructor when` + * This class is needed for datatypes that do have a non-empty constructor when * using them as shared memory, for example `thrust::complex`. * * @tparam ValueType the type of values @@ -49,7 +49,7 @@ public: * * @return the constexpr pointer to the first entry of the array. */ - constexpr GKO_ATTRIBUTES operator ValueType *() const noexcept + constexpr GKO_ATTRIBUTES operator const ValueType *() const noexcept { return &(*this)[0]; } @@ -70,7 +70,8 @@ public: * * @return a reference to the array entry at the given index. */ - constexpr GKO_ATTRIBUTES ValueType &operator[](size_type pos) const noexcept + constexpr GKO_ATTRIBUTES const ValueType &operator[](size_type pos) const + noexcept { return reinterpret_cast(data_)[pos]; } diff --git a/common/components/warp_blas.hpp.inc b/common/cuda_hip/components/warp_blas.hpp.inc similarity index 89% rename from common/components/warp_blas.hpp.inc rename to common/cuda_hip/components/warp_blas.hpp.inc index 279ada4ae2f..6c7f608511b 100644 --- a/common/components/warp_blas.hpp.inc +++ b/common/cuda_hip/components/warp_blas.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -83,6 +83,54 @@ __device__ __forceinline__ void apply_gauss_jordan_transform( } +/** + * @internal + * + * Applies a Gauss-Jordan transformation (single step of Gauss-Jordan + * elimination) to a `max_problem_size`-by-`max_problem_size` matrix using the + * thread group `group. Each thread contributes one `row` of the matrix, and the + * routine uses warp shuffles to exchange data between rows. The transform is + * performed by using the `key_row`-th row and `key_col`-th column of the + * matrix. + * Works with one right hand side vector `rhs` which can be directly worked on + * when solving Ax = rhs without the need of storing the inverse of A. + */ +template < + int max_problem_size, typename Group, typename ValueType, + typename = std::enable_if_t::value>> +__device__ __forceinline__ void apply_gauss_jordan_transform_with_rhs( + const Group &__restrict__ group, int32 key_row, int32 key_col, + ValueType *__restrict__ row, ValueType *__restrict__ rhs, + bool &__restrict__ status) +{ + auto key_col_elem = group.shfl(row[key_col], key_row); + auto key_rhs_elem = group.shfl(rhs[0], key_row); + if (key_col_elem == zero()) { + // TODO: implement error handling for GPUs to be able to properly + // report it here + status = false; + return; + } + if (group.thread_rank() == key_row) { + key_col_elem = one() / key_col_elem; + rhs[0] = key_rhs_elem * key_col_elem; + } else { + key_col_elem = -row[key_col] / key_col_elem; + rhs[0] += key_rhs_elem * key_col_elem; + } +#pragma unroll + for (int32 i = 0; i < max_problem_size; ++i) { + const auto key_row_elem = group.shfl(row[i], key_row); + if (group.thread_rank() == key_row) { + row[i] = zero(); + } + // rhs[0] += key_rhs_elem * key_row_elem; + row[i] += key_col_elem * key_row_elem; + } + row[key_col] = key_col_elem; +} + + /** * @internal * diff --git a/common/factorization/factorization_kernels.hpp.inc b/common/cuda_hip/factorization/factorization_kernels.hpp.inc similarity index 99% rename from common/factorization/factorization_kernels.hpp.inc rename to common/cuda_hip/factorization/factorization_kernels.hpp.inc index 7050c5ce116..80178bc76a7 100644 --- a/common/factorization/factorization_kernels.hpp.inc +++ b/common/cuda_hip/factorization/factorization_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/cuda_hip/factorization/par_ic_kernels.hpp.inc b/common/cuda_hip/factorization/par_ic_kernels.hpp.inc new file mode 100644 index 00000000000..f83ab19c8ba --- /dev/null +++ b/common/cuda_hip/factorization/par_ic_kernels.hpp.inc @@ -0,0 +1,95 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +namespace kernel { + + +template +__global__ __launch_bounds__(default_block_size) void ic_init( + const IndexType *__restrict__ l_row_ptrs, ValueType *__restrict__ l_vals, + size_type num_rows) +{ + auto row = thread::get_thread_id_flat(); + if (row >= num_rows) { + return; + } + auto l_nz = l_row_ptrs[row + 1] - 1; + auto diag = sqrt(l_vals[l_nz]); + if (is_finite(diag)) { + l_vals[l_nz] = diag; + } else { + l_vals[l_nz] = one(); + } +} + + +template +__global__ __launch_bounds__(default_block_size) void ic_sweep( + const IndexType *__restrict__ a_row_idxs, + const IndexType *__restrict__ a_col_idxs, + const ValueType *__restrict__ a_vals, + const IndexType *__restrict__ l_row_ptrs, + const IndexType *__restrict__ l_col_idxs, ValueType *__restrict__ l_vals, + IndexType l_nnz) +{ + const auto l_nz = thread::get_thread_id_flat(); + if (l_nz >= l_nnz) { + return; + } + const auto row = a_row_idxs[l_nz]; + const auto col = l_col_idxs[l_nz]; + const auto a_val = a_vals[l_nz]; + auto l_row_begin = l_row_ptrs[row]; + const auto l_row_end = l_row_ptrs[row + 1]; + auto lh_col_begin = l_row_ptrs[col]; + const auto lh_col_end = l_row_ptrs[col + 1]; + ValueType sum{}; + auto last_entry = col; + while (l_row_begin < l_row_end && lh_col_begin < lh_col_end) { + auto l_col = l_col_idxs[l_row_begin]; + auto lh_row = l_col_idxs[lh_col_begin]; + if (l_col == lh_row && l_col < last_entry) { + sum += l_vals[l_row_begin] * conj(l_vals[lh_col_begin]); + } + l_row_begin += l_col <= lh_row; + lh_col_begin += l_col >= lh_row; + } + auto to_write = row == col + ? sqrt(a_val - sum) + : (a_val - sum) / l_vals[l_row_ptrs[col + 1] - 1]; + if (is_finite(to_write)) { + l_vals[l_nz] = to_write; + } +} + + +} // namespace kernel diff --git a/common/factorization/par_ict_spgeam_kernels.hpp.inc b/common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc similarity index 76% rename from common/factorization/par_ict_spgeam_kernels.hpp.inc rename to common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc index 7a9febf3f03..5e1dea39544 100644 --- a/common/factorization/par_ict_spgeam_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_nnz( - const IndexType *__restrict__ llt_row_ptrs, - const IndexType *__restrict__ llt_col_idxs, + const IndexType *__restrict__ llh_row_ptrs, + const IndexType *__restrict__ llh_col_idxs, const IndexType *__restrict__ a_row_ptrs, const IndexType *__restrict__ a_col_idxs, IndexType *__restrict__ l_new_row_ptrs, IndexType num_rows) @@ -48,20 +48,20 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_nnz( return; } - auto llt_begin = llt_row_ptrs[row]; - auto llt_size = llt_row_ptrs[row + 1] - llt_begin; + auto llh_begin = llh_row_ptrs[row]; + auto llh_size = llh_row_ptrs[row + 1] - llh_begin; auto a_begin = a_row_ptrs[row]; auto a_size = a_row_ptrs[row + 1] - a_begin; IndexType count{}; group_merge( - a_col_idxs + a_begin, a_size, llt_col_idxs + llt_begin, llt_size, + a_col_idxs + a_begin, a_size, llh_col_idxs + llh_begin, llh_size, subwarp, - [&](IndexType a_nz, IndexType a_col, IndexType llt_nz, - IndexType llt_col, IndexType out_nz, bool valid) { - auto col = min(a_col, llt_col); + [&](IndexType a_nz, IndexType a_col, IndexType llh_nz, + IndexType llh_col, IndexType out_nz, bool valid) { + auto col = min(a_col, llh_col); // count the number of unique elements being merged count += - popcnt(subwarp.ballot(col <= row && a_col != llt_col && valid)); + popcnt(subwarp.ballot(col <= row && a_col != llh_col && valid)); return true; }); if (subwarp.thread_rank() == 0) { @@ -72,9 +72,9 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_nnz( template __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_init( - const IndexType *__restrict__ llt_row_ptrs, - const IndexType *__restrict__ llt_col_idxs, - const ValueType *__restrict__ llt_vals, + const IndexType *__restrict__ llh_row_ptrs, + const IndexType *__restrict__ llh_col_idxs, + const ValueType *__restrict__ llh_vals, const IndexType *__restrict__ a_row_ptrs, const IndexType *__restrict__ a_col_idxs, const ValueType *__restrict__ a_vals, @@ -100,16 +100,16 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_init( auto l_begin = l_row_ptrs[row]; auto l_end = l_row_ptrs[row + 1]; - auto llt_begin = llt_row_ptrs[row]; - auto llt_end = llt_row_ptrs[row + 1]; - auto llt_size = llt_end - llt_begin; + auto llh_begin = llh_row_ptrs[row]; + auto llh_end = llh_row_ptrs[row + 1]; + auto llh_size = llh_end - llh_begin; auto a_begin = a_row_ptrs[row]; auto a_end = a_row_ptrs[row + 1]; auto a_size = a_end - a_begin; IndexType out_begin{}; - auto out_size = llt_size + a_size; + auto out_size = llh_size + a_size; IndexType l_new_begin = l_new_row_ptrs[row]; @@ -117,44 +117,44 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_init( // load column indices and values for the first merge step auto a_col = checked_load(a_col_idxs, a_begin + lane, a_end, sentinel); auto a_val = checked_load(a_vals, a_begin + lane, a_end, zero()); - auto llt_col = - checked_load(llt_col_idxs, llt_begin + lane, llt_end, sentinel); - auto llt_val = - checked_load(llt_vals, llt_begin + lane, llt_end, zero()); + auto llh_col = + checked_load(llh_col_idxs, llh_begin + lane, llh_end, sentinel); + auto llh_val = + checked_load(llh_vals, llh_begin + lane, llh_end, zero()); auto l_col = checked_load(l_col_idxs, l_begin + lane, l_end, sentinel); auto l_val = checked_load(l_vals, l_begin + lane, l_end, zero()); bool skip_first{}; while (out_begin < out_size) { - // merge subwarp.size() elements from A and L*L^T + // merge subwarp.size() elements from A and L*L^H auto merge_result = - group_merge_step(a_col, llt_col, subwarp); + group_merge_step(a_col, llh_col, subwarp); auto a_cur_col = merge_result.a_val; - auto llt_cur_col = merge_result.b_val; + auto llh_cur_col = merge_result.b_val; auto a_cur_val = subwarp.shfl(a_val, merge_result.a_idx); - auto llt_cur_val = subwarp.shfl(llt_val, merge_result.b_idx); + auto llh_cur_val = subwarp.shfl(llh_val, merge_result.b_idx); auto valid = out_begin + lane < out_size; // check if the previous thread has matching columns - auto equal_mask = subwarp.ballot(a_cur_col == llt_cur_col && valid); + auto equal_mask = subwarp.ballot(a_cur_col == llh_cur_col && valid); auto prev_equal_mask = equal_mask << 1 | skip_first; skip_first = bool(equal_mask >> (subwarp_size - 1)); auto prev_equal = bool(prev_equal_mask & lanemask_eq); - auto r_col = min(a_cur_col, llt_cur_col); + auto r_col = min(a_cur_col, llh_cur_col); // find matching entry of L - // S(L) is a subset of S(A - L * L^T) since L has a diagonal + // S(L) is a subset of S(A - L * L^H) since L has a diagonal auto l_source = synchronous_fixed_binary_search( [&](int i) { return subwarp.shfl(l_col, i) >= r_col; }); auto l_cur_col = subwarp.shfl(l_col, l_source); auto l_cur_val = subwarp.shfl(l_val, l_source); - // determine actual values of A and L*L^T at r_col + // determine actual values of A and L*L^H at r_col if (r_col != a_cur_col) { a_cur_val = zero(); } - if (r_col != llt_cur_col) { - llt_cur_val = zero(); + if (r_col != llh_cur_col) { + llh_cur_val = zero(); } - auto r_val = a_cur_val - llt_cur_val; + auto r_val = a_cur_val - llh_cur_val; // early return when reaching the upper diagonal if (subwarp.all(r_col > row)) { @@ -176,11 +176,11 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_init( // advance *_begin offsets auto a_advance = merge_result.a_advance; - auto llt_advance = merge_result.b_advance; + auto llh_advance = merge_result.b_advance; auto l_advance = popcnt(subwarp.ballot(do_write && use_l)); auto l_new_advance = popcnt(l_new_advance_mask); a_begin += a_advance; - llt_begin += llt_advance; + llh_begin += llh_advance; l_begin += l_advance; l_new_begin += l_new_advance; out_begin += subwarp_size; @@ -188,25 +188,25 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_init( // shuffle the unmerged elements to the front a_col = subwarp.shfl_down(a_col, a_advance); a_val = subwarp.shfl_down(a_val, a_advance); - llt_col = subwarp.shfl_down(llt_col, llt_advance); - llt_val = subwarp.shfl_down(llt_val, llt_advance); + llh_col = subwarp.shfl_down(llh_col, llh_advance); + llh_val = subwarp.shfl_down(llh_val, llh_advance); l_col = subwarp.shfl_down(l_col, l_advance); l_val = subwarp.shfl_down(l_val, l_advance); /* - * To optimize memory access, we load the new elements for `a` and `llt` + * To optimize memory access, we load the new elements for `a` and `llh` * with a single load instruction: * the lower part of the group loads new elements for `a` - * the upper part of the group loads new elements for `llt` + * the upper part of the group loads new elements for `llh` * `load_lane` is the part-local lane idx * The elements for `a` have to be shuffled up afterwards. */ auto load_a = lane < a_advance; auto load_lane = load_a ? lane : lane - a_advance; - auto load_source_col = load_a ? a_col_idxs : llt_col_idxs; - auto load_source_val = load_a ? a_vals : llt_vals; + auto load_source_col = load_a ? a_col_idxs : llh_col_idxs; + auto load_source_val = load_a ? a_vals : llh_vals; auto load_begin = - load_a ? a_begin + llt_advance : llt_begin + a_advance; - auto load_end = load_a ? a_end : llt_end; + load_a ? a_begin + llh_advance : llh_begin + a_advance; + auto load_end = load_a ? a_end : llh_end; auto load_idx = load_begin + load_lane; auto loaded_col = @@ -214,15 +214,15 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_init( auto loaded_val = checked_load(load_source_val, load_idx, load_end, zero()); // shuffle the `a` values to the end of the warp - auto lower_loaded_col = subwarp.shfl_up(loaded_col, llt_advance); - auto lower_loaded_val = subwarp.shfl_up(loaded_val, llt_advance); - if (lane >= llt_advance) { + auto lower_loaded_col = subwarp.shfl_up(loaded_col, llh_advance); + auto lower_loaded_val = subwarp.shfl_up(loaded_val, llh_advance); + if (lane >= llh_advance) { a_col = lower_loaded_col; a_val = lower_loaded_val; } if (lane >= a_advance) { - llt_col = loaded_col; - llt_val = loaded_val; + llh_col = loaded_col; + llh_val = loaded_val; } // load the new values for l if (lane >= subwarp_size - l_advance) { @@ -234,4 +234,4 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_init( } -} // namespace kernel \ No newline at end of file +} // namespace kernel diff --git a/common/factorization/par_ict_sweep_kernels.hpp.inc b/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc similarity index 83% rename from common/factorization/par_ict_sweep_kernels.hpp.inc rename to common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc index 060bacb2144..418efef9c5c 100644 --- a/common/factorization/par_ict_sweep_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -62,27 +62,27 @@ __global__ __launch_bounds__(default_block_size) void ict_sweep( auto a_val = has_a ? a_vals[a_idx] : zero(); auto l_row_begin = l_row_ptrs[row]; auto l_row_size = l_row_ptrs[row + 1] - l_row_begin; - auto lt_col_begin = l_row_ptrs[col]; - auto lt_col_size = l_row_ptrs[col + 1] - lt_col_begin; + auto lh_col_begin = l_row_ptrs[col]; + auto lh_col_size = l_row_ptrs[col + 1] - lh_col_begin; ValueType sum{}; - IndexType lt_nz{}; + IndexType lh_nz{}; auto last_entry = col; group_merge( - l_col_idxs + l_row_begin, l_row_size, l_col_idxs + lt_col_begin, - lt_col_size, subwarp, - [&](IndexType l_idx, IndexType l_col, IndexType lt_idx, - IndexType lt_row, IndexType, bool) { + l_col_idxs + l_row_begin, l_row_size, l_col_idxs + lh_col_begin, + lh_col_size, subwarp, + [&](IndexType l_idx, IndexType l_col, IndexType lh_idx, + IndexType lh_row, IndexType, bool) { // we don't need to use the `bool valid` because last_entry is // already a smaller sentinel value than the one used in group_merge - if (l_col == lt_row && l_col < last_entry) { - sum += - l_vals[l_idx + l_row_begin] * l_vals[lt_idx + lt_col_begin]; + if (l_col == lh_row && l_col < last_entry) { + sum += l_vals[l_idx + l_row_begin] * + conj(l_vals[lh_idx + lh_col_begin]); } // remember the transposed element - auto found_transp = subwarp.ballot(lt_row == row); + auto found_transp = subwarp.ballot(lh_row == row); if (found_transp) { - lt_nz = - subwarp.shfl(lt_idx + lt_col_begin, ffs(found_transp) - 1); + lh_nz = + subwarp.shfl(lh_idx + lh_col_begin, ffs(found_transp) - 1); } return true; }); @@ -100,4 +100,4 @@ __global__ __launch_bounds__(default_block_size) void ict_sweep( } -} // namespace kernel \ No newline at end of file +} // namespace kernel diff --git a/common/factorization/par_ilu_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilu_kernels.hpp.inc similarity index 98% rename from common/factorization/par_ilu_kernels.hpp.inc rename to common/cuda_hip/factorization/par_ilu_kernels.hpp.inc index af28012cf81..f2fdc1468e1 100644 --- a/common/factorization/par_ilu_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ilu_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/factorization/par_ilut_filter_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc similarity index 98% rename from common/factorization/par_ilut_filter_kernels.hpp.inc rename to common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc index 2d520f33019..b5f7d43db67 100644 --- a/common/factorization/par_ilut_filter_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -55,7 +55,7 @@ __device__ void abstract_filter_impl(const IndexType *row_ptrs, auto end = row_ptrs[row + 1]; begin_cb(row); auto num_steps = ceildiv(end - begin, subwarp_size); - for (auto step = 0; step < num_steps; ++step) { + for (IndexType step = 0; step < num_steps; ++step) { auto idx = begin + lane + step * subwarp_size; auto keep = idx < end && pred(idx, begin, end); auto mask = subwarp.ballot(keep); @@ -189,4 +189,4 @@ __global__ __launch_bounds__(default_block_size) void bucket_filter( } -} // namespace kernel \ No newline at end of file +} // namespace kernel diff --git a/common/factorization/par_ilut_select_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc similarity index 97% rename from common/factorization/par_ilut_select_kernels.hpp.inc rename to common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc index 338588d5695..e443d7b6ba7 100644 --- a/common/factorization/par_ilut_select_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -62,7 +62,7 @@ __global__ __launch_bounds__(searchtree_width) void build_searchtree( // assuming rounding towards zero auto stride = double(size) / sample_size; #pragma unroll - for (auto i = 0; i < sampleselect_oversampling; ++i) { + for (int i = 0; i < sampleselect_oversampling; ++i) { auto lidx = idx * sampleselect_oversampling + i; auto val = input[static_cast(lidx * stride)]; samples[i] = abs(val); @@ -119,7 +119,7 @@ __global__ __launch_bounds__(default_block_size) void count_buckets( auto el = abs(input[i]); IndexType tree_idx{}; #pragma unroll - for (auto level = 0; level < sampleselect_searchtree_height; ++level) { + for (int level = 0; level < sampleselect_searchtree_height; ++level) { auto cmp = !(el < sh_tree[tree_idx]); tree_idx = 2 * tree_idx + 1 + cmp; } @@ -168,7 +168,7 @@ __global__ __launch_bounds__(default_block_size) void block_prefix_sum( // compute prefix sum over warp-sized blocks IndexType total{}; auto base_idx = warp_idx * work_per_warp * warp.size(); - for (auto step = 0; step < work_per_warp; ++step) { + for (IndexType step = 0; step < work_per_warp; ++step) { auto idx = warp_lane + step * warp.size() + base_idx; auto val = idx < num_blocks ? local_counters[idx] : zero(); IndexType warp_total{}; @@ -207,7 +207,7 @@ __global__ __launch_bounds__(default_block_size) void block_prefix_sum( // add block prefix sum to each warp's block of data block.sync(); auto warp_prefixsum = warp_sums[warp_idx]; - for (auto step = 0; step < work_per_warp; ++step) { + for (IndexType step = 0; step < work_per_warp; ++step) { auto idx = warp_lane + step * warp.size() + base_idx; auto val = idx < num_blocks ? local_counters[idx] : zero(); if (idx < num_blocks) { diff --git a/common/factorization/par_ilut_spgeam_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilut_spgeam_kernels.hpp.inc similarity index 99% rename from common/factorization/par_ilut_spgeam_kernels.hpp.inc rename to common/cuda_hip/factorization/par_ilut_spgeam_kernels.hpp.inc index 903968bf4a6..b8ba3b0171f 100644 --- a/common/factorization/par_ilut_spgeam_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ilut_spgeam_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/factorization/par_ilut_sweep_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc similarity index 98% rename from common/factorization/par_ilut_sweep_kernels.hpp.inc rename to common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc index 96cfc951b64..5b78d07f28e 100644 --- a/common/factorization/par_ilut_sweep_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/matrix/coo_kernels.hpp.inc b/common/cuda_hip/matrix/coo_kernels.hpp.inc similarity index 95% rename from common/matrix/coo_kernels.hpp.inc rename to common/cuda_hip/matrix/coo_kernels.hpp.inc index c6d550a4149..22d8191ab8a 100644 --- a/common/matrix/coo_kernels.hpp.inc +++ b/common/cuda_hip/matrix/coo_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -272,20 +272,4 @@ __global__ __launch_bounds__(default_block_size) void fill_in_dense( } -template -__global__ __launch_bounds__(default_block_size) void extract_diagonal( - size_type nnz, const ValueType *__restrict__ orig_values, - const IndexType *__restrict__ orig_row_idxs, - const IndexType *__restrict__ orig_col_idxs, ValueType *__restrict__ diag) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx < nnz) { - if (orig_row_idxs[tidx] == orig_col_idxs[tidx]) { - diag[orig_row_idxs[tidx]] = orig_values[tidx]; - } - } -} - - } // namespace kernel diff --git a/common/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc similarity index 89% rename from common/matrix/csr_kernels.hpp.inc rename to common/cuda_hip/matrix/csr_kernels.hpp.inc index bb7e6b27b57..49c3a138e90 100644 --- a/common/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -76,8 +76,9 @@ __device__ __forceinline__ void find_next_row( if (ind >= *row_end) { *row = row_predict; *row_end = row_predict_end; - for (; ind >= *row_end; *row_end = row_ptr[++*row + 1]) - ; + while (ind >= *row_end) { + *row_end = row_ptr[++*row + 1]; + } } } else { @@ -140,8 +141,8 @@ template __device__ __forceinline__ IndexType get_warp_start_idx( const IndexType nwarps, const IndexType nnz, const IndexType warp_idx) { - const long long cache_lines = ceildivT(nnz, wsize); - return (warp_idx * cache_lines / nwarps) * wsize; + const long long cache_lines = ceildivT(nnz, config::warp_size); + return (warp_idx * cache_lines / nwarps) * config::warp_size; } @@ -160,6 +161,7 @@ __device__ __forceinline__ void spmv_kernel( } const IndexType data_size = row_ptrs[num_rows]; const IndexType start = get_warp_start_idx(nwarps, data_size, warp_idx); + constexpr IndexType wsize = config::warp_size; const IndexType end = min(get_warp_start_idx(nwarps, data_size, warp_idx + 1), ceildivT(data_size, wsize) * wsize); @@ -218,17 +220,6 @@ __global__ __launch_bounds__(spmv_block_size) void abstract_spmv( } -template -__global__ __launch_bounds__(default_block_size) void set_zero( - const size_type nnz, ValueType *__restrict__ val) -{ - const auto ind = thread::get_thread_id_flat(); - if (ind < nnz) { - val[ind] = zero(); - } -} - - template __forceinline__ __device__ void merge_path_search( const IndexType diagonal, const IndexType a_len, const IndexType b_len, @@ -359,8 +350,7 @@ __device__ void merge_path_spmv( tmp_val[threadIdx.x] = value; tmp_ind[threadIdx.x] = row_i; group::this_thread_block().sync(); - bool last = block_segment_scan_reverse(static_cast(tmp_ind), - static_cast(tmp_val)); + bool last = block_segment_scan_reverse(tmp_ind, tmp_val); if (threadIdx.x == spmv_block_size - 1) { row_out[blockIdx.x] = min(end_x, num_rows - 1); val_out[blockIdx.x] = tmp_val[threadIdx.x]; @@ -946,3 +936,113 @@ __global__ __launch_bounds__(default_block_size) void conjugate_kernel( } // namespace + + +template +__global__ __launch_bounds__(default_block_size) void row_ptr_permute_kernel( + size_type num_rows, const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, IndexType *__restrict__ out_nnz) +{ + auto tid = thread::get_thread_id_flat(); + if (tid >= num_rows) { + return; + } + auto in_row = permutation[tid]; + auto out_row = tid; + out_nnz[out_row] = in_row_ptrs[in_row + 1] - in_row_ptrs[in_row]; +} + + +template +__global__ + __launch_bounds__(default_block_size) void inv_row_ptr_permute_kernel( + size_type num_rows, const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + IndexType *__restrict__ out_nnz) +{ + auto tid = thread::get_thread_id_flat(); + if (tid >= num_rows) { + return; + } + auto in_row = tid; + auto out_row = permutation[tid]; + out_nnz[out_row] = in_row_ptrs[in_row + 1] - in_row_ptrs[in_row]; +} + + +template +__global__ __launch_bounds__(default_block_size) void row_permute_kernel( + size_type num_rows, const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + const IndexType *__restrict__ in_cols, + const ValueType *__restrict__ in_vals, + const IndexType *__restrict__ out_row_ptrs, + IndexType *__restrict__ out_cols, ValueType *__restrict__ out_vals) +{ + auto tid = thread::get_subwarp_id_flat(); + if (tid >= num_rows) { + return; + } + auto lane = threadIdx.x % subwarp_size; + auto in_row = permutation[tid]; + auto out_row = tid; + auto in_begin = in_row_ptrs[in_row]; + auto in_size = in_row_ptrs[in_row + 1] - in_begin; + auto out_begin = out_row_ptrs[out_row]; + for (IndexType i = lane; i < in_size; i += subwarp_size) { + out_cols[out_begin + i] = in_cols[in_begin + i]; + out_vals[out_begin + i] = in_vals[in_begin + i]; + } +} + + +template +__global__ __launch_bounds__(default_block_size) void inv_row_permute_kernel( + size_type num_rows, const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + const IndexType *__restrict__ in_cols, + const ValueType *__restrict__ in_vals, + const IndexType *__restrict__ out_row_ptrs, + IndexType *__restrict__ out_cols, ValueType *__restrict__ out_vals) +{ + auto tid = thread::get_subwarp_id_flat(); + if (tid >= num_rows) { + return; + } + auto lane = threadIdx.x % subwarp_size; + auto in_row = tid; + auto out_row = permutation[tid]; + auto in_begin = in_row_ptrs[in_row]; + auto in_size = in_row_ptrs[in_row + 1] - in_begin; + auto out_begin = out_row_ptrs[out_row]; + for (IndexType i = lane; i < in_size; i += subwarp_size) { + out_cols[out_begin + i] = in_cols[in_begin + i]; + out_vals[out_begin + i] = in_vals[in_begin + i]; + } +} + + +template +__global__ __launch_bounds__(default_block_size) void inv_symm_permute_kernel( + size_type num_rows, const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + const IndexType *__restrict__ in_cols, + const ValueType *__restrict__ in_vals, + const IndexType *__restrict__ out_row_ptrs, + IndexType *__restrict__ out_cols, ValueType *__restrict__ out_vals) +{ + auto tid = thread::get_subwarp_id_flat(); + if (tid >= num_rows) { + return; + } + auto lane = threadIdx.x % subwarp_size; + auto in_row = tid; + auto out_row = permutation[tid]; + auto in_begin = in_row_ptrs[in_row]; + auto in_size = in_row_ptrs[in_row + 1] - in_begin; + auto out_begin = out_row_ptrs[out_row]; + for (IndexType i = lane; i < in_size; i += subwarp_size) { + out_cols[out_begin + i] = permutation[in_cols[in_begin + i]]; + out_vals[out_begin + i] = in_vals[in_begin + i]; + } +} diff --git a/common/matrix/dense_kernels.hpp.inc b/common/cuda_hip/matrix/dense_kernels.hpp.inc similarity index 70% rename from common/matrix/dense_kernels.hpp.inc rename to common/cuda_hip/matrix/dense_kernels.hpp.inc index 9d0285e71f7..c7ebafd0627 100644 --- a/common/matrix/dense_kernels.hpp.inc +++ b/common/cuda_hip/matrix/dense_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,62 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace kernel { -template -__global__ __launch_bounds__(block_size) void scale( - size_type num_rows, size_type num_cols, size_type num_alpha_cols, - const ValueType *__restrict__ alpha, ValueType *__restrict__ x, - size_type stride_x) -{ - constexpr auto warps_per_block = block_size / config::warp_size; - const auto global_id = - thread::get_thread_id(); - const auto row_id = global_id / num_cols; - const auto col_id = global_id % num_cols; - const auto alpha_id = num_alpha_cols == 1 ? 0 : col_id; - if (row_id < num_rows) { - x[row_id * stride_x + col_id] = - alpha[alpha_id] == zero() - ? zero() - : x[row_id * stride_x + col_id] * alpha[alpha_id]; - } -} - - -template -__global__ __launch_bounds__(block_size) void add_scaled( - size_type num_rows, size_type num_cols, size_type num_alpha_cols, - const ValueType *__restrict__ alpha, const ValueType *__restrict__ x, - size_type stride_x, ValueType *__restrict__ y, size_type stride_y) -{ - constexpr auto warps_per_block = block_size / config::warp_size; - const auto global_id = - thread::get_thread_id(); - const auto row_id = global_id / num_cols; - const auto col_id = global_id % num_cols; - const auto alpha_id = num_alpha_cols == 1 ? 0 : col_id; - if (row_id < num_rows && alpha[alpha_id] != zero()) { - y[row_id * stride_y + col_id] += - x[row_id * stride_x + col_id] * alpha[alpha_id]; - } -} - - -template -__global__ __launch_bounds__(default_block_size) void add_scaled_diag( - size_type size, const ValueType *__restrict__ alpha, - const ValueType *__restrict__ diag, ValueType *__restrict__ y, - size_type stride_y) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx >= size) { - return; - } - - y[tidx * stride_y + tidx] += alpha[0] * diag[tidx]; -} - - template __device__ void compute_partial_reduce(size_type num_rows, @@ -154,14 +98,29 @@ __global__ __launch_bounds__(block_size) void compute_partial_dot( compute_partial_reduce( num_rows, work, [x, stride_x, y, stride_y](size_type i) { - return x[i * stride_x] * conj(y[i * stride_y]); + return x[i * stride_x] * y[i * stride_y]; + }, + [](const ValueType &x, const ValueType &y) { return x + y; }); +} + + +template +__global__ __launch_bounds__(block_size) void compute_partial_conj_dot( + size_type num_rows, const ValueType *__restrict__ x, size_type stride_x, + const ValueType *__restrict__ y, size_type stride_y, + ValueType *__restrict__ work) +{ + compute_partial_reduce( + num_rows, work, + [x, stride_x, y, stride_y](size_type i) { + return conj(x[i * stride_x]) * y[i * stride_y]; }, [](const ValueType &x, const ValueType &y) { return x + y; }); } template -__global__ __launch_bounds__(block_size) void finalize_dot_computation( +__global__ __launch_bounds__(block_size) void finalize_sum_reduce_computation( size_type size, const ValueType *work, ValueType *result) { finalize_reduce_computation( @@ -185,7 +144,7 @@ __global__ __launch_bounds__(block_size) void compute_partial_norm2( template -__global__ __launch_bounds__(block_size) void finalize_norm2_computation( +__global__ __launch_bounds__(block_size) void finalize_sqrt_reduce_computation( size_type size, const ValueType *work, ValueType *result) { finalize_reduce_computation( @@ -252,7 +211,7 @@ __global__ __launch_bounds__(default_block_size) void fill_in_csr( if (tidx < num_rows) { auto write_to = row_ptrs[tidx]; - for (auto i = 0; i < num_cols; i++) { + for (size_type i = 0; i < num_cols; i++) { if (source[stride * tidx + i] != zero()) { values[write_to] = source[stride * tidx + i]; col_idxs[write_to] = i; @@ -421,93 +380,4 @@ __global__ __launch_bounds__(default_block_size) void reduce_total_cols( } -template -__global__ __launch_bounds__(block_size) void row_permute( - size_type num_rows, size_type num_cols, - const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig, - size_type stride_orig, ValueType *__restrict__ result, - size_type stride_result) -{ - constexpr auto warps_per_block = block_size / config::warp_size; - const auto global_id = - thread::get_thread_id(); - const auto row_id = global_id / num_cols; - const auto col_id = global_id % num_cols; - if (row_id < num_rows) { - result[row_id * stride_result + col_id] = - orig[perm_idxs[row_id] * stride_orig + col_id]; - } -} - - -template -__global__ __launch_bounds__(block_size) void column_permute( - size_type num_rows, size_type num_cols, - const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig, - size_type stride_orig, ValueType *__restrict__ result, - size_type stride_result) -{ - constexpr auto warps_per_block = block_size / config::warp_size; - const auto global_id = - thread::get_thread_id(); - const auto row_id = global_id / num_cols; - const auto col_id = global_id % num_cols; - if (row_id < num_rows) { - result[row_id * stride_result + col_id] = - orig[row_id * stride_orig + perm_idxs[col_id]]; - } -} - - -template -__global__ __launch_bounds__(block_size) void inverse_row_permute( - size_type num_rows, size_type num_cols, - const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig, - size_type stride_orig, ValueType *__restrict__ result, - size_type stride_result) -{ - constexpr auto warps_per_block = block_size / config::warp_size; - const auto global_id = - thread::get_thread_id(); - const auto row_id = global_id / num_cols; - const auto col_id = global_id % num_cols; - if (row_id < num_rows) { - result[perm_idxs[row_id] * stride_result + col_id] = - orig[row_id * stride_orig + col_id]; - } -} - - -template -__global__ __launch_bounds__(block_size) void inverse_column_permute( - size_type num_rows, size_type num_cols, - const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig, - size_type stride_orig, ValueType *__restrict__ result, - size_type stride_result) -{ - constexpr auto warps_per_block = block_size / config::warp_size; - const auto global_id = - thread::get_thread_id(); - const auto row_id = global_id / num_cols; - const auto col_id = global_id % num_cols; - if (row_id < num_rows) { - result[row_id * stride_result + perm_idxs[col_id]] = - orig[row_id * stride_orig + col_id]; - } -} - - -template -__global__ __launch_bounds__(default_block_size) void extract_diagonal( - size_type problem_size, const ValueType *__restrict__ orig, - size_type stride_orig, ValueType *__restrict__ diag) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx < problem_size) { - diag[tidx] = orig[tidx * stride_orig + tidx]; - } -} - - } // namespace kernel diff --git a/common/cuda_hip/matrix/diagonal_kernels.hpp.inc b/common/cuda_hip/matrix/diagonal_kernels.hpp.inc new file mode 100644 index 00000000000..9a3736766e6 --- /dev/null +++ b/common/cuda_hip/matrix/diagonal_kernels.hpp.inc @@ -0,0 +1,61 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +namespace kernel { + + +template +__global__ __launch_bounds__(default_block_size) void apply_to_csr( + size_type num_rows, const ValueType *__restrict__ diag, + const IndexType *__restrict__ row_ptrs, + ValueType *__restrict__ result_values) +{ + constexpr auto warp_size = config::warp_size; + auto warp_tile = + group::tiled_partition(group::this_thread_block()); + const auto row = thread::get_subwarp_id_flat(); + const auto tid_in_warp = warp_tile.thread_rank(); + + if (row >= num_rows) { + return; + } + + const auto diag_val = diag[row]; + + for (size_type idx = row_ptrs[row] + tid_in_warp; idx < row_ptrs[row + 1]; + idx += warp_size) { + result_values[idx] *= diag_val; + } +} + + +} // namespace kernel diff --git a/common/matrix/ell_kernels.hpp.inc b/common/cuda_hip/matrix/ell_kernels.hpp.inc similarity index 78% rename from common/matrix/ell_kernels.hpp.inc rename to common/cuda_hip/matrix/ell_kernels.hpp.inc index 57991f2e06d..f1845111eec 100644 --- a/common/matrix/ell_kernels.hpp.inc +++ b/common/cuda_hip/matrix/ell_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,29 +34,30 @@ namespace kernel { namespace { -template +template __device__ void spmv_kernel( const size_type num_rows, const int num_worker_per_row, - const ValueType *__restrict__ val, const IndexType *__restrict__ col, + acc::range val, const IndexType *__restrict__ col, const size_type stride, const size_type num_stored_elements_per_row, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, Closure op) + acc::range b, OutputValueType *__restrict__ c, + const size_type c_stride, Closure op) { const auto tidx = thread::get_thread_id_flat(); - const auto column_id = blockIdx.y; + const decltype(tidx) column_id = blockIdx.y; if (num_thread_per_worker == 1) { // Specialize the num_thread_per_worker = 1. It doesn't need the shared // memory, __syncthreads, and atomic_add if (tidx < num_rows) { - ValueType temp = zero(); + auto temp = zero(); for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) { const auto ind = tidx + idx * stride; const auto col_idx = col[ind]; if (col_idx < idx) { break; } else { - temp += val[ind] * b[col_idx * b_stride + column_id]; + temp += val(ind) * b(col_idx, column_id); } } const auto c_ind = tidx * c_stride + column_id; @@ -68,14 +69,14 @@ __device__ void spmv_kernel( const auto x = tidx % num_rows; const auto worker_id = tidx / num_rows; const auto step_size = num_worker_per_row * num_thread_per_worker; - __shared__ UninitializedArray + __shared__ UninitializedArray< + OutputValueType, default_block_size / num_thread_per_worker> storage; if (idx_in_worker == 0) { storage[threadIdx.x] = 0; } __syncthreads(); - ValueType temp = zero(); + auto temp = zero(); for (size_type idx = worker_id * num_thread_per_worker + idx_in_worker; idx < num_stored_elements_per_row; idx += step_size) { @@ -84,7 +85,7 @@ __device__ void spmv_kernel( if (col_idx < idx) { break; } else { - temp += val[ind] * b[col_idx * b_stride + column_id]; + temp += val(ind) * b(col_idx, column_id); } } atomic_add(&storage[threadIdx.x], temp); @@ -102,51 +103,52 @@ __device__ void spmv_kernel( } -template +template __global__ __launch_bounds__(default_block_size) void spmv( const size_type num_rows, const int num_worker_per_row, - const ValueType *__restrict__ val, const IndexType *__restrict__ col, + acc::range val, const IndexType *__restrict__ col, const size_type stride, const size_type num_stored_elements_per_row, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride) + acc::range b, OutputValueType *__restrict__ c, + const size_type c_stride) { spmv_kernel( num_rows, num_worker_per_row, val, col, stride, - num_stored_elements_per_row, b, b_stride, c, c_stride, - [](const ValueType &x, const ValueType &y) { return x; }); + num_stored_elements_per_row, b, c, c_stride, + [](const OutputValueType &x, const OutputValueType &y) { return x; }); } -template +template __global__ __launch_bounds__(default_block_size) void spmv( const size_type num_rows, const int num_worker_per_row, - const ValueType *__restrict__ alpha, const ValueType *__restrict__ val, + acc::range alpha, acc::range val, const IndexType *__restrict__ col, const size_type stride, - const size_type num_stored_elements_per_row, - const ValueType *__restrict__ b, const size_type b_stride, - const ValueType *__restrict__ beta, ValueType *__restrict__ c, + const size_type num_stored_elements_per_row, acc::range b, + const OutputValueType *__restrict__ beta, OutputValueType *__restrict__ c, const size_type c_stride) { - const ValueType alpha_val = alpha[0]; - const ValueType beta_val = beta[0]; - // Because the atomic operation changes the values of c during computation, - // it can not do the right alpha * a * b + beta * c operation. - // Thus, the cuda kernel only computes alpha * a * b when it uses atomic - // operation. + const OutputValueType alpha_val = alpha(0); + const OutputValueType beta_val = beta[0]; if (atomic) { + // Because the atomic operation changes the values of c during + // computation, it can not directly do alpha * a * b + beta * c + // operation. The beta * c needs to be done before calling this kernel. + // Then, this kernel only adds alpha * a * b when it uses atomic + // operation. spmv_kernel( num_rows, num_worker_per_row, val, col, stride, - num_stored_elements_per_row, b, b_stride, c, c_stride, - [&alpha_val](const ValueType &x, const ValueType &y) { + num_stored_elements_per_row, b, c, c_stride, + [&alpha_val](const OutputValueType &x, const OutputValueType &y) { return alpha_val * x; }); } else { spmv_kernel( num_rows, num_worker_per_row, val, col, stride, - num_stored_elements_per_row, b, b_stride, c, c_stride, - [&alpha_val, &beta_val](const ValueType &x, const ValueType &y) { + num_stored_elements_per_row, b, c, c_stride, + [&alpha_val, &beta_val](const OutputValueType &x, + const OutputValueType &y) { return alpha_val * x + beta_val * y; }); } @@ -178,7 +180,7 @@ __global__ __launch_bounds__(default_block_size) void fill_in_dense( { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_rows) { - for (auto col = 0; col < nnz; col++) { + for (size_type col = 0; col < nnz; col++) { result[tidx * result_stride + col_idxs[tidx + col * source_stride]] += values[tidx + col * source_stride]; @@ -225,7 +227,7 @@ __global__ __launch_bounds__(default_block_size) void fill_in_csr( if (tidx < num_rows) { auto write_to = result_row_ptrs[tidx]; - for (auto i = 0; i < max_nnz_per_row; i++) { + for (size_type i = 0; i < max_nnz_per_row; i++) { const auto source_idx = tidx + stride * i; if (source_values[source_idx] != zero()) { result_values[write_to] = source_values[source_idx]; diff --git a/common/matrix/hybrid_kernels.hpp.inc b/common/cuda_hip/matrix/hybrid_kernels.hpp.inc similarity index 98% rename from common/matrix/hybrid_kernels.hpp.inc rename to common/cuda_hip/matrix/hybrid_kernels.hpp.inc index 3a45bd38e79..c7c192189e0 100644 --- a/common/matrix/hybrid_kernels.hpp.inc +++ b/common/cuda_hip/matrix/hybrid_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -108,7 +108,7 @@ __global__ __launch_bounds__(default_block_size) void fill_in_csr( if (tidx < num_rows) { auto write_to = result_row_ptrs[tidx]; - for (auto i = 0; i < max_nnz_per_row; i++) { + for (size_type i = 0; i < max_nnz_per_row; i++) { const auto source_idx = tidx + stride * i; if (ell_val[source_idx] != zero()) { result_values[write_to] = ell_val[source_idx]; diff --git a/common/matrix/sellp_kernels.hpp.inc b/common/cuda_hip/matrix/sellp_kernels.hpp.inc similarity index 99% rename from common/matrix/sellp_kernels.hpp.inc rename to common/cuda_hip/matrix/sellp_kernels.hpp.inc index b15d96e6e91..b0e7ab9dc93 100644 --- a/common/matrix/sellp_kernels.hpp.inc +++ b/common/cuda_hip/matrix/sellp_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc b/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc new file mode 100644 index 00000000000..f03fc4a59b3 --- /dev/null +++ b/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc @@ -0,0 +1,228 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +namespace kernel { + + +template +__global__ __launch_bounds__(default_block_size) void match_edge_kernel( + size_type num, const IndexType *__restrict__ strongest_neighbor_vals, + IndexType *__restrict__ agg_vals) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= num) { + return; + } + if (agg_vals[tidx] != -1) { + return; + } + auto neighbor = strongest_neighbor_vals[tidx]; + if (neighbor != -1 && strongest_neighbor_vals[neighbor] == tidx && + tidx <= neighbor) { + // Use the smaller index as agg point + agg_vals[tidx] = tidx; + agg_vals[neighbor] = tidx; + } +} + + +template +__global__ __launch_bounds__(default_block_size) void activate_kernel( + size_type num, const IndexType *__restrict__ agg, + IndexType *__restrict__ active_agg) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= num) { + return; + } + active_agg[tidx] = agg[tidx] == -1; +} + + +template +__global__ __launch_bounds__(default_block_size) void fill_agg_kernel( + size_type num, const IndexType *__restrict__ index, + IndexType *__restrict__ result) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= num) { + return; + } + // agg_vals[i] == i always holds in the aggregated group whose identifier is + // i because we use the index of element as the aggregated group identifier. + result[tidx] = (index[tidx] == tidx); +} + + +template +__global__ __launch_bounds__(default_block_size) void renumber_kernel( + size_type num, const IndexType *__restrict__ map, + IndexType *__restrict__ result) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= num) { + return; + } + result[tidx] = map[result[tidx]]; +} + + +template +__global__ + __launch_bounds__(default_block_size) void find_strongest_neighbor_kernel( + const size_type num, const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ col_idxs, + const ValueType *__restrict__ weight_vals, + const ValueType *__restrict__ diag, IndexType *__restrict__ agg, + IndexType *__restrict__ strongest_neighbor) +{ + auto row = thread::get_thread_id_flat(); + if (row >= num) { + return; + } + + auto max_weight_unagg = zero(); + auto max_weight_agg = zero(); + IndexType strongest_unagg = -1; + IndexType strongest_agg = -1; + if (agg[row] != -1) { + return; + } + for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) { + auto col = col_idxs[idx]; + if (col == row) { + continue; + } + auto weight = weight_vals[idx] / max(abs(diag[row]), abs(diag[col])); + if (agg[col] == -1 && + thrust::tie(weight, col) > + thrust::tie(max_weight_unagg, strongest_unagg)) { + max_weight_unagg = weight; + strongest_unagg = col; + } else if (agg[col] != -1 && + thrust::tie(weight, col) > + thrust::tie(max_weight_agg, strongest_agg)) { + max_weight_agg = weight; + strongest_agg = col; + } + } + + if (strongest_unagg == -1 && strongest_agg != -1) { + // all neighbor is agg, connect to the strongest agg + // Also, no others will use this item as their strongest_neighbor + // because they are already aggregated. Thus, it is determinstic + // behavior + agg[row] = agg[strongest_agg]; + } else if (strongest_unagg != -1) { + // set the strongest neighbor in the unagg group + strongest_neighbor[row] = strongest_unagg; + } else { + // no neighbor + strongest_neighbor[row] = row; + } +} + + +template +__global__ + __launch_bounds__(default_block_size) void assign_to_exist_agg_kernel( + const size_type num, const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ col_idxs, + const ValueType *__restrict__ weight_vals, + const ValueType *__restrict__ diag, + const IndexType *__restrict__ agg_const_val, + IndexType *__restrict__ agg_val) +{ + auto row = thread::get_thread_id_flat(); + if (row >= num || agg_val[row] != -1) { + return; + } + ValueType max_weight_agg = zero(); + IndexType strongest_agg = -1; + for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) { + auto col = col_idxs[idx]; + if (col == row) { + continue; + } + auto weight = weight_vals[idx] / max(abs(diag[row]), abs(diag[col])); + if (agg_const_val[col] != -1 && + thrust::tie(weight, col) > + thrust::tie(max_weight_agg, strongest_agg)) { + max_weight_agg = weight; + strongest_agg = col; + } + } + if (strongest_agg != -1) { + agg_val[row] = agg_const_val[strongest_agg]; + } else { + agg_val[row] = row; + } +} + +// This is the undeterminstic implementation which is the same implementation of +// the previous one but agg_val == agg_const_val. +template +__global__ + __launch_bounds__(default_block_size) void assign_to_exist_agg_kernel( + const size_type num, const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ col_idxs, + const ValueType *__restrict__ weight_vals, + const ValueType *__restrict__ diag, IndexType *__restrict__ agg_val) +{ + auto row = thread::get_thread_id_flat(); + if (row >= num || agg_val[row] != -1) { + return; + } + ValueType max_weight_agg = zero(); + IndexType strongest_agg = -1; + for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; idx++) { + auto col = col_idxs[idx]; + if (col == row) { + continue; + } + auto weight = weight_vals[idx] / max(abs(diag[row]), abs(diag[col])); + if (agg_val[col] != -1 && + thrust::tie(weight, col) > + thrust::tie(max_weight_agg, strongest_agg)) { + max_weight_agg = weight; + strongest_agg = col; + } + } + if (strongest_agg != -1) { + agg_val[row] = agg_val[strongest_agg]; + } else { + agg_val[row] = row; + } +} + + +} // namespace kernel diff --git a/common/preconditioner/isai_kernels.hpp.inc b/common/cuda_hip/preconditioner/isai_kernels.hpp.inc similarity index 65% rename from common/preconditioner/isai_kernels.hpp.inc rename to common/cuda_hip/preconditioner/isai_kernels.hpp.inc index 9eec6afaa04..27fc0c35b32 100644 --- a/common/preconditioner/isai_kernels.hpp.inc +++ b/common/cuda_hip/preconditioner/isai_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -50,7 +50,7 @@ __forceinline__ __device__ void generic_generate( const IndexType *__restrict__ i_row_ptrs, const IndexType *__restrict__ i_col_idxs, ValueType *__restrict__ i_values, IndexType *__restrict__ excess_rhs_sizes, - IndexType *__restrict__ excess_nnz, Callable trs_solve) + IndexType *__restrict__ excess_nnz, Callable direct_solve) { static_assert(subwarp_size >= row_size_limit, "incompatible subwarp_size"); const auto row = thread::get_subwarp_id_flat(); @@ -99,20 +99,21 @@ __forceinline__ __device__ void generic_generate( subwarps_per_block> storage; - auto trisystem_ptr = storage + (threadIdx.x / subwarp_size) * - subwarp_size * subwarp_size; + auto dense_system_ptr = storage + (threadIdx.x / subwarp_size) * + subwarp_size * subwarp_size; // row-major accessor - auto trisystem = [&](IndexType row, IndexType col) -> ValueType & { - return trisystem_ptr[row * subwarp_size + col]; + auto dense_system = [&](IndexType row, IndexType col) -> ValueType & { + return dense_system_ptr[row * subwarp_size + col]; }; #pragma unroll for (int i = 0; i < subwarp_size; ++i) { - trisystem(i, local_id) = zero(); + dense_system(i, local_id) = zero(); } subwarp.sync(); + IndexType rhs_one_idx{}; for (IndexType nz = 0; nz < i_row_size; ++nz) { auto col = i_col_idxs[i_row_begin + nz]; auto m_row_begin = m_row_ptrs[col]; @@ -124,23 +125,27 @@ __forceinline__ __device__ void generic_generate( i_row_size, subwarp, [&](IndexType, IndexType m_idx, IndexType i_idx, config::lane_mask_type, bool valid) { + rhs_one_idx += popcnt(subwarp.ballot( + valid && m_col_idxs[m_row_begin + m_idx] < row && + col == row)); if (valid) { - trisystem(nz, i_idx) = m_values[m_row_begin + m_idx]; + dense_system(nz, i_idx) = m_values[m_row_begin + m_idx]; } }); } subwarp.sync(); - // Now, read a full col of `trisystem` into local registers, which will - // be row elements after this (implicit) transpose + // Now, read a full col of `dense_system` into local registers, which + // will be row elements after this (implicit) transpose ValueType local_row[subwarp_size]; #pragma unroll for (int i = 0; i < subwarp_size; ++i) { - local_row[i] = trisystem(i, local_id); + local_row[i] = dense_system(i, local_id); } - const auto rhs = trs_solve(i_row_size, local_row, subwarp); + const auto rhs = + direct_solve(i_row_size, local_row, subwarp, rhs_one_idx); // Write back: if (local_id < i_row_size) { @@ -167,27 +172,27 @@ __global__ __launch_bounds__(default_block_size) void generate_l_inverse( IndexType *__restrict__ excess_rhs_sizes, IndexType *__restrict__ excess_nnz) { - auto trs_solve = [](IndexType num_elems, - const ValueType *__restrict__ local_row, - group::thread_block_tile &subwarp) { - const int local_id = subwarp.thread_rank(); - ValueType rhs = - local_id == num_elems - 1 ? one() : zero(); - // Solve Triangular system - for (int d_col = num_elems - 1; d_col >= 0; --d_col) { - const auto elem = local_row[d_col]; - if (d_col == local_id) { - rhs /= elem; - } + auto trs_solve = + [](IndexType num_elems, const ValueType *__restrict__ local_row, + group::thread_block_tile &subwarp, size_type) { + const int local_id = subwarp.thread_rank(); + ValueType rhs = local_id == num_elems - 1 ? one() + : zero(); + // Solve Triangular system + for (int d_col = num_elems - 1; d_col >= 0; --d_col) { + const auto elem = local_row[d_col]; + if (d_col == local_id) { + rhs /= elem; + } - const ValueType bot = subwarp.shfl(rhs, d_col); - if (local_id < d_col) { - rhs -= bot * elem; + const ValueType bot = subwarp.shfl(rhs, d_col); + if (local_id < d_col) { + rhs -= bot * elem; + } } - } - return rhs; - }; + return rhs; + }; generic_generate( num_rows, m_row_ptrs, m_col_idxs, m_values, i_row_ptrs, i_col_idxs, @@ -208,7 +213,8 @@ __global__ __launch_bounds__(default_block_size) void generate_u_inverse( { auto trs_solve = [](IndexType num_elems, const ValueType *__restrict__ local_row, - group::thread_block_tile &subwarp) { + group::thread_block_tile &subwarp, + size_type) { const int local_id = subwarp.thread_rank(); ValueType rhs = local_id == 0 ? one() : zero(); // Solve Triangular system @@ -233,6 +239,56 @@ __global__ __launch_bounds__(default_block_size) void generate_u_inverse( } +template +__global__ __launch_bounds__(default_block_size) void generate_general_inverse( + IndexType num_rows, const IndexType *__restrict__ m_row_ptrs, + const IndexType *__restrict__ m_col_idxs, + const ValueType *__restrict__ m_values, + const IndexType *__restrict__ i_row_ptrs, + const IndexType *__restrict__ i_col_idxs, ValueType *__restrict__ i_values, + IndexType *__restrict__ excess_rhs_sizes, + IndexType *__restrict__ excess_nnz, bool spd) +{ + auto general_solve = [spd](IndexType num_elems, + ValueType *__restrict__ local_row, + group::thread_block_tile &subwarp, + size_type rhs_one_idx) { + const int local_id = subwarp.thread_rank(); + ValueType rhs = + local_id == rhs_one_idx ? one() : zero(); + size_type perm = local_id; + auto pivoted = subwarp.thread_rank() >= num_elems; + auto status = true; + for (size_type i = 0; i < num_elems; i++) { + const auto piv = choose_pivot(subwarp, local_row[i], pivoted); + if (local_id == piv) { + pivoted = true; + } + if (local_id == i) { + perm = piv; + } + + apply_gauss_jordan_transform_with_rhs( + subwarp, piv, i, local_row, &rhs, status); + } + + ValueType sol = subwarp.shfl(rhs, perm); + + if (spd) { + auto diag = subwarp.shfl(sol, num_elems - 1); + sol /= sqrt(diag); + } + + return sol; + }; + + generic_generate( + num_rows, m_row_ptrs, m_col_idxs, m_values, i_row_ptrs, i_col_idxs, + i_values, excess_rhs_sizes, excess_nnz, general_solve); +} + + template __global__ __launch_bounds__(default_block_size) void generate_excess_system( IndexType num_rows, const IndexType *__restrict__ m_row_ptrs, @@ -244,11 +300,13 @@ __global__ __launch_bounds__(default_block_size) void generate_excess_system( const IndexType *__restrict__ excess_nz_ptrs, IndexType *__restrict__ excess_row_ptrs, IndexType *__restrict__ excess_col_idxs, - ValueType *__restrict__ excess_values, ValueType *__restrict__ excess_rhs) + ValueType *__restrict__ excess_values, ValueType *__restrict__ excess_rhs, + size_type e_start, size_type e_end) { - const auto row = thread::get_subwarp_id_flat(); + const auto row = + thread::get_subwarp_id_flat() + e_start; - if (row >= num_rows) { + if (row >= e_end) { return; } @@ -260,7 +318,7 @@ __global__ __launch_bounds__(default_block_size) void generate_excess_system( const int local_id = subwarp.thread_rank(); const auto prefix_mask = (config::lane_mask_type{1} << local_id) - 1; - if (row == 0 && local_id == 0) { + if (row == e_start && local_id == 0) { excess_row_ptrs[0] = 0; } @@ -271,6 +329,9 @@ __global__ __launch_bounds__(default_block_size) void generate_excess_system( auto excess_rhs_begin = excess_rhs_ptrs[row]; auto excess_nz_begin = excess_nz_ptrs[row]; + auto out_nz_begin = excess_nz_begin - excess_nz_ptrs[e_start]; + auto out_ptrs_begin = excess_rhs_begin - excess_rhs_ptrs[e_start]; + // defer long rows: store their nnz and number of matches for (IndexType nz = 0; nz < i_row_size; ++nz) { auto col = i_col_idxs[i_row_begin + nz]; @@ -283,36 +344,67 @@ __global__ __launch_bounds__(default_block_size) void generate_excess_system( i_row_size, subwarp, [&](IndexType col, IndexType m_idx, IndexType i_idx, config::lane_mask_type mask, bool valid) { - // trisystem(nz, i_idx) = m_values[m_row_begin + m_idx] + // dense_system(nz, i_idx) = m_values[m_row_begin + m_idx] // only in sparse :) if (valid) { - auto nz = excess_nz_begin + popcnt(mask & prefix_mask); - excess_col_idxs[nz] = excess_rhs_begin + i_idx; + auto nz = out_nz_begin + popcnt(mask & prefix_mask); + excess_col_idxs[nz] = out_ptrs_begin + i_idx; excess_values[nz] = m_values[m_row_begin + m_idx]; } - excess_nz_begin += popcnt(mask); + out_nz_begin += popcnt(mask); }); if (local_id == 0) { // build right-hand side: 1 for diagonal entry, 0 else - excess_rhs[excess_rhs_begin + nz] = + excess_rhs[out_ptrs_begin + nz] = row == col ? one() : zero(); // store row pointers - excess_row_ptrs[excess_rhs_begin + nz + 1] = excess_nz_begin; + excess_row_ptrs[out_ptrs_begin + nz + 1] = out_nz_begin; } } } +template +__global__ __launch_bounds__(default_block_size) void scale_excess_solution( + const IndexType *__restrict__ excess_block_ptrs, + ValueType *__restrict__ excess_solution, size_type e_start, size_type e_end) +{ + const auto warp_id = thread::get_subwarp_id_flat(); + auto subwarp = + group::tiled_partition(group::this_thread_block()); + const int local_id = subwarp.thread_rank(); + const auto row = warp_id + e_start; + + if (row >= e_end) { + return; + } + + const IndexType offset = excess_block_ptrs[e_start]; + const IndexType block_begin = excess_block_ptrs[row] - offset; + const IndexType block_end = excess_block_ptrs[row + 1] - offset; + if (block_end == block_begin) { + return; + } + const auto diag = excess_solution[block_end - 1]; + const ValueType scal = one() / sqrt(diag); + + for (size_type i = block_begin + local_id; i < block_end; + i += subwarp_size) { + excess_solution[i] *= scal; + } +} template __global__ __launch_bounds__(default_block_size) void copy_excess_solution( IndexType num_rows, const IndexType *__restrict__ i_row_ptrs, const IndexType *__restrict__ excess_rhs_ptrs, const ValueType *__restrict__ excess_solution, - ValueType *__restrict__ i_values) + ValueType *__restrict__ i_values, size_type e_start, size_type e_end) { - const auto row = thread::get_subwarp_id_flat(); + const auto excess_row = + thread::get_subwarp_id_flat(); + const auto row = excess_row + e_start; - if (row >= num_rows) { + if (row >= e_end) { return; } @@ -327,7 +419,8 @@ __global__ __launch_bounds__(default_block_size) void copy_excess_solution( if (excess_size > 0) { // copy the values for this row for (IndexType nz = local_id; nz < excess_size; nz += subwarp_size) { - i_values[nz + i_row_begin] = excess_solution[nz + excess_begin]; + i_values[nz + i_row_begin] = + excess_solution[nz + excess_begin - excess_rhs_ptrs[e_start]]; } } } diff --git a/common/preconditioner/jacobi_advanced_apply_kernel.hpp.inc b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc similarity index 99% rename from common/preconditioner/jacobi_advanced_apply_kernel.hpp.inc rename to common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc index 2426728d402..abf100be7e5 100644 --- a/common/preconditioner/jacobi_advanced_apply_kernel.hpp.inc +++ b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/preconditioner/jacobi_generate_kernel.hpp.inc b/common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc similarity index 99% rename from common/preconditioner/jacobi_generate_kernel.hpp.inc rename to common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc index b402c94e5db..713dcc3dedc 100644 --- a/common/preconditioner/jacobi_generate_kernel.hpp.inc +++ b/common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/preconditioner/jacobi_kernels.hpp.inc b/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc similarity index 99% rename from common/preconditioner/jacobi_kernels.hpp.inc rename to common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc index d480a0a154a..c3fa889b210 100644 --- a/common/preconditioner/jacobi_kernels.hpp.inc +++ b/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/preconditioner/jacobi_simple_apply_kernel.hpp.inc b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc similarity index 99% rename from common/preconditioner/jacobi_simple_apply_kernel.hpp.inc rename to common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc index c7a472bd409..db73880732d 100644 --- a/common/preconditioner/jacobi_simple_apply_kernel.hpp.inc +++ b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/common/cuda_hip/solver/cb_gmres_kernels.hpp.inc b/common/cuda_hip/solver/cb_gmres_kernels.hpp.inc new file mode 100644 index 00000000000..41110c05780 --- /dev/null +++ b/common/cuda_hip/solver/cb_gmres_kernels.hpp.inc @@ -0,0 +1,580 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "common_gmres_kernels.hpp.inc" + + +template +__global__ __launch_bounds__(default_block_size) void zero_matrix_kernel( + size_type m, size_type n, size_type stride, ValueType *__restrict__ array) +{ + const auto tidx = thread::get_thread_id_flat(); + if (tidx < n) { + auto pos = tidx; + for (size_type k = 0; k < m; ++k) { + array[pos] = zero(); + pos += stride; + } + } +} + + +// Must be called with at least `num_rows * stride_krylov` threads in total. +template +__global__ __launch_bounds__(block_size) void initialize_2_1_kernel( + size_type num_rows, size_type num_rhs, size_type krylov_dim, + Accessor3d krylov_bases, ValueType *__restrict__ residual_norm_collection, + size_type stride_residual_nc) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto krylov_stride = + gko::cb_gmres::helper_functions_accessor::get_stride( + krylov_bases); + // krylov indices + const auto krylov_idx = global_id / krylov_stride[0]; + const auto reminder = global_id % krylov_stride[0]; + const auto krylov_row = reminder / krylov_stride[1]; + const auto rhs = reminder % krylov_stride[1]; + + // residual_norm indices (separated for better coalesced access) + const auto residual_row = global_id / stride_residual_nc; + const auto residual_col = global_id % stride_residual_nc; + + if (krylov_idx < krylov_dim + 1 && krylov_row < num_rows && rhs < num_rhs) { + krylov_bases(krylov_idx, krylov_row, rhs) = zero(); + } + + if (residual_row < krylov_dim + 1 && residual_col < num_rhs) { + residual_norm_collection[residual_row * stride_residual_nc + + residual_col] = zero(); + } +} + + +// Must be called with at least `num_rows * num_rhs` threads in total. +template +__global__ __launch_bounds__(block_size) void initialize_2_2_kernel( + size_type num_rows, size_type num_rhs, + const ValueType *__restrict__ residual, size_type stride_residual, + const remove_complex *__restrict__ residual_norm, + ValueType *__restrict__ residual_norm_collection, Accessor3d krylov_bases, + ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, + size_type *__restrict__ final_iter_nums) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto krylov_stride = + gko::cb_gmres::helper_functions_accessor::get_stride( + krylov_bases); + const auto row_idx = global_id / krylov_stride[1]; + const auto col_idx = global_id % krylov_stride[1]; + + if (global_id < num_rhs) { + residual_norm_collection[global_id] = residual_norm[global_id]; + final_iter_nums[global_id] = 0; + } + + if (row_idx < num_rows && col_idx < num_rhs) { + auto value = residual[row_idx * stride_residual + col_idx] / + residual_norm[col_idx]; + krylov_bases(0, row_idx, col_idx) = value; + next_krylov_basis[row_idx * stride_next_krylov + col_idx] = value; + } +} + + +__global__ + __launch_bounds__(default_block_size) void increase_final_iteration_numbers_kernel( + size_type *__restrict__ final_iter_nums, + const stopping_status *__restrict__ stop_status, size_type total_number) +{ + const auto global_id = thread::get_thread_id_flat(); + if (global_id < total_number) { + final_iter_nums[global_id] += !stop_status[global_id].has_stopped(); + } +} + + +template +__global__ __launch_bounds__(default_dot_size) void multinorm2_kernel( + size_type num_rows, size_type num_cols, + const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, remove_complex *__restrict__ norms, + const stopping_status *__restrict__ stop_status) +{ + using rc_vtype = remove_complex; + const auto tidx = threadIdx.x; + const auto tidy = threadIdx.y; + const auto col_idx = blockIdx.x * default_dot_dim + tidx; + const auto num = ceildiv(num_rows, gridDim.y); + const auto start_row = blockIdx.y * num; + const auto end_row = + ((blockIdx.y + 1) * num > num_rows) ? num_rows : (blockIdx.y + 1) * num; + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + __shared__ + UninitializedArray + reduction_helper_array; + rc_vtype *__restrict__ reduction_helper = reduction_helper_array; + rc_vtype local_res = zero(); + if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { + for (size_type i = start_row + tidy; i < end_row; + i += default_dot_dim) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + local_res += squared_norm(next_krylov_basis[next_krylov_idx]); + } + } + reduction_helper[tidx * (default_dot_dim + 1) + tidy] = local_res; + group::this_thread_block().sync(); + local_res = reduction_helper[tidy * (default_dot_dim + 1) + tidx]; + const auto tile_block = + group::tiled_partition(group::this_thread_block()); + const auto sum = + reduce(tile_block, local_res, + [](const rc_vtype &a, const rc_vtype &b) { return a + b; }); + const auto new_col_idx = blockIdx.x * default_dot_dim + tidy; + if (tidx == 0 && new_col_idx < num_cols && + !stop_status[new_col_idx].has_stopped()) { + const auto norms_idx = new_col_idx; + atomic_add(norms + norms_idx, sum); + } +} + + +template +__global__ + __launch_bounds__(default_dot_size) void multinorminf_without_stop_kernel( + size_type num_rows, size_type num_cols, + const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, + remove_complex *__restrict__ norms, size_type stride_norms) +{ + using rc_vtype = remove_complex; + const auto tidx = threadIdx.x; + const auto tidy = threadIdx.y; + const auto col_idx = blockIdx.x * default_dot_dim + tidx; + const auto num = ceildiv(num_rows, gridDim.y); + const auto start_row = blockIdx.y * num; + const auto end_row = + ((blockIdx.y + 1) * num > num_rows) ? num_rows : (blockIdx.y + 1) * num; + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + __shared__ + UninitializedArray + reduction_helper_array; + rc_vtype *__restrict__ reduction_helper = reduction_helper_array; + rc_vtype local_max = zero(); + if (col_idx < num_cols) { + for (size_type i = start_row + tidy; i < end_row; + i += default_dot_dim) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + local_max = (local_max >= abs(next_krylov_basis[next_krylov_idx])) + ? local_max + : abs(next_krylov_basis[next_krylov_idx]); + } + } + reduction_helper[tidx * (default_dot_dim + 1) + tidy] = local_max; + group::this_thread_block().sync(); + local_max = reduction_helper[tidy * (default_dot_dim + 1) + tidx]; + const auto tile_block = + group::tiled_partition(group::this_thread_block()); + const auto value = + reduce(tile_block, local_max, [](const rc_vtype &a, const rc_vtype &b) { + return ((a >= b) ? a : b); + }); + const auto new_col_idx = blockIdx.x * default_dot_dim + tidy; + if (tidx == 0 && new_col_idx < num_cols) { + const auto norms_idx = new_col_idx; + atomic_max(norms + norms_idx, value); + } +} + + +// ONLY computes the inf-norm (into norms2) when compute_inf is true +template +__global__ __launch_bounds__(default_dot_size) void multinorm2_inf_kernel( + size_type num_rows, size_type num_cols, + const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, + remove_complex *__restrict__ norms1, + remove_complex *__restrict__ norms2, + const stopping_status *__restrict__ stop_status) +{ + using rc_vtype = remove_complex; + const auto tidx = threadIdx.x; + const auto tidy = threadIdx.y; + const auto col_idx = blockIdx.x * default_dot_dim + tidx; + const auto num = ceildiv(num_rows, gridDim.y); + const auto start_row = blockIdx.y * num; + const auto end_row = + ((blockIdx.y + 1) * num > num_rows) ? num_rows : (blockIdx.y + 1) * num; + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + __shared__ UninitializedArray< + rc_vtype, (1 + compute_inf) * default_dot_dim *(default_dot_dim + 1)> + reduction_helper_array; + rc_vtype *__restrict__ reduction_helper_add = reduction_helper_array; + rc_vtype *__restrict__ reduction_helper_max = + static_cast(reduction_helper_array) + + default_dot_dim * (default_dot_dim + 1); + rc_vtype local_res = zero(); + rc_vtype local_max = zero(); + if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { + for (size_type i = start_row + tidy; i < end_row; + i += default_dot_dim) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + const auto num = next_krylov_basis[next_krylov_idx]; + local_res += squared_norm(num); + if (compute_inf) { + local_max = ((local_max >= abs(num)) ? local_max : abs(num)); + } + } + } + // Add reduction + reduction_helper_add[tidx * (default_dot_dim + 1) + tidy] = local_res; + if (compute_inf) { + reduction_helper_max[tidx * (default_dot_dim + 1) + tidy] = local_max; + } + group::this_thread_block().sync(); + local_res = reduction_helper_add[tidy * (default_dot_dim + 1) + tidx]; + const auto tile_block = + group::tiled_partition(group::this_thread_block()); + const auto sum = + reduce(tile_block, local_res, + [](const rc_vtype &a, const rc_vtype &b) { return a + b; }); + rc_vtype reduced_max{}; + if (compute_inf) { + local_max = reduction_helper_max[tidy * (default_dot_dim + 1) + tidx]; + reduced_max = reduce(tile_block, local_max, + [](const rc_vtype &a, const rc_vtype &b) { + return ((a >= b) ? a : b); + }); + } + const auto new_col_idx = blockIdx.x * default_dot_dim + tidy; + if (tidx == 0 && new_col_idx < num_cols && + !stop_status[new_col_idx].has_stopped()) { + const auto norms_idx = new_col_idx; + atomic_add(norms1 + norms_idx, sum); + if (compute_inf) { + atomic_max(norms2 + norms_idx, reduced_max); + } + } +} + + +template +__global__ __launch_bounds__(dot_dim *dot_dim) void multidot_kernel( + size_type num_rows, size_type num_cols, + const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, const Accessor3d krylov_bases, + ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status) +{ + /* + * In general in this kernel: + * grid_dim + * x: for col_idx (^= which right hand side) + * y: for row_idx + * z: for num_iters (number of krylov vectors) + * block_dim + * x: for col_idx (must be < dot_dim) + * y: for row_idx (must be < dot_dim) + * (z not used, must be set to 1 in dim) + */ + const size_type tidx = threadIdx.x; + const size_type tidy = threadIdx.y; + const size_type col_idx = blockIdx.x * blockDim.x + threadIdx.x; + const size_type num_rows_per_thread = ceildiv(num_rows, gridDim.y); + const size_type start_row = blockIdx.y * num_rows_per_thread + threadIdx.y; + const auto end_row = min((blockIdx.y + 1) * num_rows_per_thread, num_rows); + const size_type k = blockIdx.z; + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + __shared__ UninitializedArray + reduction_helper_array; + ValueType *__restrict__ reduction_helper = reduction_helper_array; + + ValueType local_res = zero(); + if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { + for (size_type i = start_row; i < end_row; i += blockDim.y) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + ValueType other_basis = krylov_bases(k, i, col_idx); + local_res += next_krylov_basis[next_krylov_idx] * conj(other_basis); + } + } + // Transpose local_res, so each warp contains a local_res from the same + // right hand side + reduction_helper[tidx * dot_dim + tidy] = local_res; + auto thread_block = group::this_thread_block(); + thread_block.sync(); + local_res = reduction_helper[tidy * dot_dim + tidx]; + const auto new_col_idx = blockIdx.x * blockDim.x + tidy; + const auto tile_block = group::tiled_partition(thread_block); + const auto sum = + reduce(tile_block, local_res, + [](const ValueType &a, const ValueType &b) { return a + b; }); + if (tidx == 0 && new_col_idx < num_cols && + !stop_status[new_col_idx].has_stopped()) { + const auto hessenberg_idx = k * stride_hessenberg + new_col_idx; + atomic_add(hessenberg_iter + hessenberg_idx, sum); + } +} + + +template +__global__ __launch_bounds__(block_size) void singledot_kernel( + size_type num_rows, const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, const Accessor3d krylov_bases, + ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status) +{ + /* + * In general in this kernel: + * grid_dim + * x: for row_idx + * y: for num_iters (number of krylov vectors) + * block_dim + * x: for row_idx (must be block_size) + * (y and z not used, must be set to 1 in dim) + */ + const size_type tidx = threadIdx.x; + constexpr size_type col_idx{0}; + const size_type k = blockIdx.y; + const size_type num_rows_per_thread = ceildiv(num_rows, gridDim.x); + const size_type start_row = blockIdx.x * num_rows_per_thread + threadIdx.x; + const auto end_row = min((blockIdx.x + 1) * num_rows_per_thread, num_rows); + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + __shared__ UninitializedArray reduction_helper_array; + ValueType *__restrict__ reduction_helper = reduction_helper_array; + + ValueType local_res = zero(); + if (!stop_status[col_idx].has_stopped()) { + for (size_type i = start_row; i < end_row; i += block_size) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + ValueType other_basis = krylov_bases(k, i, col_idx); + local_res += next_krylov_basis[next_krylov_idx] * conj(other_basis); + } + } + // Transpose local_res, so each warp contains a local_res from the same + // right hand side + reduction_helper[tidx] = local_res; + auto thread_block = group::this_thread_block(); + thread_block.sync(); + reduce(thread_block, reduction_helper, + [](const ValueType &a, const ValueType &b) { return a + b; }); + if (tidx == 0 && !stop_status[col_idx].has_stopped()) { + const auto hessenberg_idx = k * stride_hessenberg + col_idx; + atomic_add(hessenberg_iter + hessenberg_idx, reduction_helper[0]); + } +} + + +// Must be called with at least `num_rows * stride_next_krylov` threads in +// total. +template +__global__ __launch_bounds__(block_size) void update_next_krylov_kernel( + size_type num_iters, size_type num_rows, size_type num_cols, + ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, + const Accessor3d krylov_bases, + const ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto row_idx = global_id / stride_next_krylov; + const auto col_idx = global_id % stride_next_krylov; + + if (row_idx < num_rows && col_idx < num_cols && + !stop_status[col_idx].has_stopped()) { + const auto next_krylov_idx = row_idx * stride_next_krylov + col_idx; + auto local_res = next_krylov_basis[next_krylov_idx]; + for (size_type k = 0; k < num_iters; ++k) { + const auto hessenberg_idx = k * stride_hessenberg + col_idx; + + local_res -= hessenberg_iter[hessenberg_idx] * + krylov_bases(k, row_idx, col_idx); + } + next_krylov_basis[next_krylov_idx] = local_res; + } +} + + +// Must be called with at least `num_rows * stride_next_krylov` threads in +// total. +template +__global__ __launch_bounds__(block_size) void update_next_krylov_and_add_kernel( + size_type num_iters, size_type num_rows, size_type num_cols, + ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, + const Accessor3d krylov_bases, ValueType *__restrict__ hessenberg_iter, + size_type stride_hessenberg, const ValueType *__restrict__ buffer_iter, + size_type stride_buffer, const stopping_status *__restrict__ stop_status, + const stopping_status *__restrict__ reorth_status) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto row_idx = global_id / stride_next_krylov; + const auto col_idx = global_id % stride_next_krylov; + + if (row_idx < num_rows && col_idx < num_cols && + !stop_status[col_idx].has_stopped() && + !reorth_status[col_idx].has_stopped()) { + const auto next_krylov_idx = row_idx * stride_next_krylov + col_idx; + auto local_res = next_krylov_basis[next_krylov_idx]; + for (size_type k = 0; k < num_iters; ++k) { + const auto hessenberg_idx = k * stride_hessenberg + col_idx; + const auto buffer_idx = k * stride_buffer + col_idx; + local_res -= + buffer_iter[buffer_idx] * krylov_bases(k, row_idx, col_idx); + if ((row_idx == 0) && !reorth_status[col_idx].has_stopped()) { + hessenberg_iter[hessenberg_idx] += buffer_iter[buffer_idx]; + } + } + next_krylov_basis[next_krylov_idx] = local_res; + } +} + + +// Must be called with at least `num_rhs` threads +template +__global__ __launch_bounds__(block_size) void check_arnoldi_norms( + size_type num_rhs, remove_complex *__restrict__ arnoldi_norm, + size_type stride_norm, ValueType *__restrict__ hessenberg_iter, + size_type stride_hessenberg, size_type iter, Accessor3d krylov_bases, + const stopping_status *__restrict__ stop_status, + stopping_status *__restrict__ reorth_status, + size_type *__restrict__ num_reorth) +{ + const remove_complex eta_squared = 1.0 / 2.0; + const auto col_idx = thread::get_thread_id_flat(); + constexpr bool has_scalar = + gko::cb_gmres::detail::has_3d_scaled_accessor::value; + + if (col_idx < num_rhs && !stop_status[col_idx].has_stopped()) { + const auto num0 = (sqrt(eta_squared * arnoldi_norm[col_idx])); + const auto num11 = sqrt(arnoldi_norm[col_idx + stride_norm]); + const auto num2 = has_scalar ? (arnoldi_norm[col_idx + 2 * stride_norm]) + : remove_complex{}; + if (num11 < num0) { + reorth_status[col_idx].reset(); + atomic_add(num_reorth, one()); + } else { + reorth_status[col_idx].stop(1); + } + arnoldi_norm[col_idx] = num0; + arnoldi_norm[col_idx + stride_norm] = num11; + hessenberg_iter[iter * stride_hessenberg + col_idx] = num11; + gko::cb_gmres::helper_functions_accessor::write_scalar( + krylov_bases, iter, col_idx, num2 / num11); + } +} + + +template +__global__ __launch_bounds__(block_size) void set_scalar_kernel( + size_type num_rhs, size_type num_blocks, + const RealValueType *__restrict__ residual_norm, size_type stride_residual, + const RealValueType *__restrict__ arnoldi_inf, size_type stride_inf, + Accessor3d krylov_bases) +{ + static_assert(!is_complex_s::value, + "ValueType must not be complex!"); + const auto global_id = thread::get_thread_id_flat(); + const auto krylov_stride = + gko::cb_gmres::helper_functions_accessor::get_stride( + krylov_bases); + const auto blk_idx = global_id / krylov_stride[1]; + const auto col_idx = global_id % krylov_stride[1]; + + if (blk_idx < num_blocks && col_idx < num_rhs) { + if (blk_idx == 0) { + const auto num1 = residual_norm[col_idx]; + const auto num2 = arnoldi_inf[col_idx]; + gko::cb_gmres::helper_functions_accessor::write_scalar( + krylov_bases, {0}, col_idx, num2 / num1); + } else { + const auto num = one(); + gko::cb_gmres::helper_functions_accessor::write_scalar( + krylov_bases, blk_idx, col_idx, num); + } + } +} + + +// Must be called with at least `num_rows * stride_next_krylov` threads in +// total. +template +__global__ __launch_bounds__(block_size) void update_krylov_next_krylov_kernel( + size_type iter, size_type num_rows, size_type num_cols, + ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, + Accessor3d krylov_bases, const ValueType *__restrict__ hessenberg_iter, + size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto row_idx = global_id / stride_next_krylov; + const auto col_idx = global_id % stride_next_krylov; + const auto hessenberg = + hessenberg_iter[(iter + 1) * stride_hessenberg + col_idx]; + + if (row_idx < num_rows && col_idx < num_cols && + !stop_status[col_idx].has_stopped()) { + const auto next_krylov_idx = row_idx * stride_next_krylov + col_idx; + + const auto next_krylov_value = + next_krylov_basis[next_krylov_idx] / hessenberg; + + next_krylov_basis[next_krylov_idx] = next_krylov_value; + krylov_bases(iter + 1, row_idx, col_idx) = next_krylov_value; + } +} + + +// Must be called with at least `stride_preconditioner * num_rows` threads +// in total. +template +__global__ __launch_bounds__(block_size) void calculate_Qy_kernel( + size_type num_rows, size_type num_cols, const Accessor3d krylov_bases, + const ValueType *__restrict__ y, size_type stride_y, + ValueType *__restrict__ before_preconditioner, + size_type stride_preconditioner, + const size_type *__restrict__ final_iter_nums) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto row_id = global_id / stride_preconditioner; + const auto col_id = global_id % stride_preconditioner; + + if (row_id < num_rows && col_id < num_cols) { + ValueType temp = zero(); + for (size_type j = 0; j < final_iter_nums[col_id]; ++j) { + temp += krylov_bases(j, row_id, col_id) * y[j * stride_y + col_id]; + } + before_preconditioner[global_id] = temp; + } +} diff --git a/common/cuda_hip/solver/common_gmres_kernels.hpp.inc b/common/cuda_hip/solver/common_gmres_kernels.hpp.inc new file mode 100644 index 00000000000..84ee6f52f03 --- /dev/null +++ b/common/cuda_hip/solver/common_gmres_kernels.hpp.inc @@ -0,0 +1,199 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +// Must be called with at least `max(stride_b * num_rows, krylov_dim * +// num_cols)` threads in total. +template +__global__ __launch_bounds__(block_size) void initialize_1_kernel( + size_type num_rows, size_type num_cols, size_type krylov_dim, + const ValueType *__restrict__ b, size_type stride_b, + ValueType *__restrict__ residual, size_type stride_residual, + ValueType *__restrict__ givens_sin, size_type stride_sin, + ValueType *__restrict__ givens_cos, size_type stride_cos, + stopping_status *__restrict__ stop_status) +{ + const auto global_id = thread::get_thread_id_flat(); + + const auto row_idx = global_id / stride_b; + const auto col_idx = global_id % stride_b; + + if (global_id < num_cols) { + stop_status[global_id].reset(); + } + + if (row_idx < num_rows && col_idx < num_cols) { + residual[row_idx * stride_residual + col_idx] = + b[row_idx * stride_b + col_idx]; + } + + if (global_id < krylov_dim * num_cols) { + const auto row_givens = global_id / num_cols; + const auto col_givens = global_id % num_cols; + + givens_sin[row_givens * stride_sin + col_givens] = zero(); + givens_cos[row_givens * stride_cos + col_givens] = zero(); + } +} + + +template +__device__ void calculate_sin_and_cos_kernel( + size_type col_idx, size_type num_cols, size_type iter, + const ValueType &this_hess, const ValueType &next_hess, + ValueType *givens_sin, size_type stride_sin, ValueType *givens_cos, + size_type stride_cos, ValueType ®ister_sin, ValueType ®ister_cos) +{ + if (this_hess == zero()) { + register_cos = zero(); + register_sin = one(); + } else { + const auto scale = abs(this_hess) + abs(next_hess); + const auto hypotenuse = + scale * sqrt(abs(this_hess / scale) * abs(this_hess / scale) + + abs(next_hess / scale) * abs(next_hess / scale)); + register_cos = conj(this_hess) / hypotenuse; + register_sin = conj(next_hess) / hypotenuse; + } + givens_cos[iter * stride_cos + col_idx] = register_cos; + givens_sin[iter * stride_sin + col_idx] = register_sin; +} + + +template +__device__ void calculate_residual_norm_kernel( + size_type col_idx, size_type num_cols, size_type iter, + const ValueType ®ister_sin, const ValueType ®ister_cos, + remove_complex *residual_norm, + ValueType *residual_norm_collection, + size_type stride_residual_norm_collection) +{ + const auto this_rnc = + residual_norm_collection[iter * stride_residual_norm_collection + + col_idx]; + const auto next_rnc = -conj(register_sin) * this_rnc; + residual_norm_collection[iter * stride_residual_norm_collection + col_idx] = + register_cos * this_rnc; + residual_norm[col_idx] = abs(next_rnc); + residual_norm_collection[(iter + 1) * stride_residual_norm_collection + + col_idx] = next_rnc; +} + + +// Must be called with at least `num_cols` threads in total. +template +__global__ __launch_bounds__(block_size) void givens_rotation_kernel( + size_type num_rows, size_type num_cols, size_type iter, + ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + ValueType *__restrict__ givens_sin, size_type stride_sin, + ValueType *__restrict__ givens_cos, size_type stride_cos, + remove_complex *__restrict__ residual_norm, + ValueType *__restrict__ residual_norm_collection, + size_type stride_residual_norm_collection, + const stopping_status *__restrict__ stop_status) +{ + const auto col_idx = thread::get_thread_id_flat(); + + if (col_idx >= num_cols || stop_status[col_idx].has_stopped()) { + return; + } + + auto this_hess = hessenberg_iter[col_idx]; + auto next_hess = hessenberg_iter[stride_hessenberg + col_idx]; + for (size_type i = 0; i < iter; ++i) { + const auto cos = givens_cos[i * stride_cos + col_idx]; + const auto sin = givens_sin[i * stride_sin + col_idx]; + hessenberg_iter[i * stride_hessenberg + col_idx] = + cos * this_hess + sin * next_hess; + this_hess = conj(cos) * next_hess - conj(sin) * this_hess; + next_hess = hessenberg_iter[(i + 2) * stride_hessenberg + col_idx]; + } + // for j in 0:iter - 1 + // temp = cos(j)*hessenberg(j) + + // sin(j)*hessenberg(j+1) + // hessenberg(j+1) = -sin(j)*hessenberg(j) + + // cos(j)*hessenberg(j+1) + // hessenberg(j) = temp; + // end + + ValueType register_sin; + ValueType register_cos; + calculate_sin_and_cos_kernel(col_idx, num_cols, iter, this_hess, next_hess, + givens_sin, stride_sin, givens_cos, stride_cos, + register_sin, register_cos); + // Calculate sin and cos on hessenberg(iter) and hessenberg(iter+1) + + hessenberg_iter[iter * stride_hessenberg + col_idx] = + register_cos * this_hess + register_sin * next_hess; + hessenberg_iter[(iter + 1) * stride_hessenberg + col_idx] = + zero(); + // hessenberg(iter) = cos(iter)*hessenberg(iter) + + // sin(iter)*hessenberg(iter+1) + // hessenberg(iter+1) = 0 + + calculate_residual_norm_kernel( + col_idx, num_cols, iter, register_sin, register_cos, residual_norm, + residual_norm_collection, stride_residual_norm_collection); + // Calculate residual norm +} + + +// Must be called with at least `num_rhs` threads in total. +template +__global__ __launch_bounds__(block_size) void solve_upper_triangular_kernel( + size_type num_cols, size_type num_rhs, + const ValueType *__restrict__ residual_norm_collection, + size_type stride_residual_norm_collection, + const ValueType *__restrict__ hessenberg, size_type stride_hessenberg, + ValueType *__restrict__ y, size_type stride_y, + const size_type *__restrict__ final_iter_nums) +{ + const auto col_idx = thread::get_thread_id_flat(); + + if (col_idx >= num_rhs) { + return; + } + + for (int i = final_iter_nums[col_idx] - 1; i >= 0; --i) { + auto temp = + residual_norm_collection[i * stride_residual_norm_collection + + col_idx]; + for (size_type j = i + 1; j < final_iter_nums[col_idx]; ++j) { + temp -= hessenberg[i * stride_hessenberg + j * num_rhs + col_idx] * + y[j * stride_y + col_idx]; + } + + y[i * stride_y + col_idx] = + temp / hessenberg[i * stride_hessenberg + i * num_rhs + col_idx]; + } + // Solve upper triangular. + // y = hessenberg \ residual_norm_collection +} diff --git a/common/solver/gmres_kernels.hpp.inc b/common/cuda_hip/solver/gmres_kernels.hpp.inc similarity index 59% rename from common/solver/gmres_kernels.hpp.inc rename to common/cuda_hip/solver/gmres_kernels.hpp.inc index 7b991879571..1f44ea93cb8 100644 --- a/common/solver/gmres_kernels.hpp.inc +++ b/common/cuda_hip/solver/gmres_kernels.hpp.inc @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,39 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -// Must be called with at least `max(stride_b * num_rows, krylov_dim * -// num_cols)` threads in total. -template -__global__ __launch_bounds__(block_size) void initialize_1_kernel( - size_type num_rows, size_type num_cols, size_type krylov_dim, - const ValueType *__restrict__ b, size_type stride_b, - ValueType *__restrict__ residual, size_type stride_residual, - ValueType *__restrict__ givens_sin, size_type stride_sin, - ValueType *__restrict__ givens_cos, size_type stride_cos, - stopping_status *__restrict__ stop_status) -{ - const auto global_id = thread::get_thread_id_flat(); - - const auto row_idx = global_id / stride_b; - const auto col_idx = global_id % stride_b; - - if (global_id < num_cols) { - stop_status[global_id].reset(); - } - - if (row_idx < num_rows && col_idx < num_cols) { - residual[row_idx * stride_residual + col_idx] = - b[row_idx * stride_b + col_idx]; - } - - if (global_id < krylov_dim * num_cols) { - const auto row_givens = global_id / num_cols; - const auto col_givens = global_id % num_cols; - - givens_sin[row_givens * stride_sin + col_givens] = zero(); - givens_cos[row_givens * stride_cos + col_givens] = zero(); - } -} +#include "common_gmres_kernels.hpp.inc" // Must be called with at least `num_rows * num_rhs` threads in total. @@ -243,140 +211,6 @@ __global__ __launch_bounds__(block_size) void update_krylov_kernel( } -template -__device__ void calculate_sin_and_cos_kernel( - size_type col_idx, size_type num_cols, size_type iter, - const ValueType &this_hess, const ValueType &next_hess, - ValueType *givens_sin, size_type stride_sin, ValueType *givens_cos, - size_type stride_cos, ValueType ®ister_sin, ValueType ®ister_cos) -{ - if (this_hess == zero()) { - register_cos = zero(); - register_sin = one(); - } else { - const auto scale = abs(this_hess) + abs(next_hess); - const auto hypotenuse = - scale * sqrt(abs(this_hess / scale) * abs(this_hess / scale) + - abs(next_hess / scale) * abs(next_hess / scale)); - register_cos = conj(this_hess) / hypotenuse; - register_sin = conj(next_hess) / hypotenuse; - } - givens_cos[iter * stride_cos + col_idx] = register_cos; - givens_sin[iter * stride_sin + col_idx] = register_sin; -} - - -template -__device__ void calculate_residual_norm_kernel( - size_type col_idx, size_type num_cols, size_type iter, - const ValueType ®ister_sin, const ValueType ®ister_cos, - remove_complex *residual_norm, - ValueType *residual_norm_collection, - size_type stride_residual_norm_collection) -{ - const auto this_rnc = - residual_norm_collection[iter * stride_residual_norm_collection + - col_idx]; - const auto next_rnc = -conj(register_sin) * this_rnc; - residual_norm_collection[iter * stride_residual_norm_collection + col_idx] = - register_cos * this_rnc; - residual_norm[col_idx] = abs(next_rnc); - residual_norm_collection[(iter + 1) * stride_residual_norm_collection + - col_idx] = next_rnc; -} - - -// Must be called with at least `num_cols` threads in total. -template -__global__ __launch_bounds__(block_size) void givens_rotation_kernel( - size_type num_rows, size_type num_cols, size_type iter, - ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - ValueType *__restrict__ givens_sin, size_type stride_sin, - ValueType *__restrict__ givens_cos, size_type stride_cos, - remove_complex *__restrict__ residual_norm, - ValueType *__restrict__ residual_norm_collection, - size_type stride_residual_norm_collection, - const stopping_status *__restrict__ stop_status) -{ - const auto col_idx = thread::get_thread_id_flat(); - - if (col_idx >= num_cols || stop_status[col_idx].has_stopped()) { - return; - } - - auto this_hess = hessenberg_iter[col_idx]; - auto next_hess = hessenberg_iter[stride_hessenberg + col_idx]; - for (size_type i = 0; i < iter; ++i) { - const auto cos = givens_cos[i * stride_cos + col_idx]; - const auto sin = givens_sin[i * stride_sin + col_idx]; - hessenberg_iter[i * stride_hessenberg + col_idx] = - cos * this_hess + sin * next_hess; - this_hess = conj(cos) * next_hess - conj(sin) * this_hess; - next_hess = hessenberg_iter[(i + 2) * stride_hessenberg + col_idx]; - } - // for j in 0:iter - 1 - // temp = cos(j)*hessenberg(j) + - // sin(j)*hessenberg(j+1) - // hessenberg(j+1) = -sin(j)*hessenberg(j) + - // cos(j)*hessenberg(j+1) - // hessenberg(j) = temp; - // end - - ValueType register_sin; - ValueType register_cos; - calculate_sin_and_cos_kernel(col_idx, num_cols, iter, this_hess, next_hess, - givens_sin, stride_sin, givens_cos, stride_cos, - register_sin, register_cos); - // Calculate sin and cos on hessenberg(iter) and hessenberg(iter+1) - - hessenberg_iter[iter * stride_hessenberg + col_idx] = - register_cos * this_hess + register_sin * next_hess; - hessenberg_iter[(iter + 1) * stride_hessenberg + col_idx] = - zero(); - // hessenberg(iter) = cos(iter)*hessenberg(iter) + - // sin(iter)*hessenberg(iter+1) - // hessenberg(iter+1) = 0 - - calculate_residual_norm_kernel( - col_idx, num_cols, iter, register_sin, register_cos, residual_norm, - residual_norm_collection, stride_residual_norm_collection); - // Calculate residual norm -} - - -// Must be called with at least `num_rhs` threads in total. -template -__global__ __launch_bounds__(block_size) void solve_upper_triangular_kernel( - size_type num_cols, size_type num_rhs, - const ValueType *__restrict__ residual_norm_collection, - size_type stride_residual_norm_collection, - const ValueType *__restrict__ hessenberg, size_type stride_hessenberg, - ValueType *__restrict__ y, size_type stride_y, - const size_type *__restrict__ final_iter_nums) -{ - const auto col_idx = thread::get_thread_id_flat(); - - if (col_idx >= num_rhs) { - return; - } - - for (int i = final_iter_nums[col_idx] - 1; i >= 0; --i) { - auto temp = - residual_norm_collection[i * stride_residual_norm_collection + - col_idx]; - for (size_type j = i + 1; j < final_iter_nums[col_idx]; ++j) { - temp -= hessenberg[i * stride_hessenberg + j * num_rhs + col_idx] * - y[j * stride_y + col_idx]; - } - - y[i * stride_y + col_idx] = - temp / hessenberg[i * stride_hessenberg + i * num_rhs + col_idx]; - } - // Solve upper triangular. - // y = hessenberg \ residual_norm_collection -} - - // Must be called with at least `stride_preconditioner * num_rows` threads in // total. template diff --git a/common/cuda_hip/solver/idr_kernels.hpp.inc b/common/cuda_hip/solver/idr_kernels.hpp.inc new file mode 100644 index 00000000000..52e9a3313a7 --- /dev/null +++ b/common/cuda_hip/solver/idr_kernels.hpp.inc @@ -0,0 +1,345 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +template +__global__ __launch_bounds__(default_block_size) void initialize_m_kernel( + size_type subspace_dim, size_type nrhs, ValueType *__restrict__ m_values, + size_type m_stride, stopping_status *__restrict__ stop_status) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto row = global_id / m_stride; + const auto col = global_id % m_stride; + + if (global_id < nrhs) { + stop_status[global_id].reset(); + } + + if (row < subspace_dim && col < nrhs * subspace_dim) { + m_values[row * m_stride + col] = + (row == col / nrhs) ? one() : zero(); + } +} + + +template +__global__ + __launch_bounds__(block_size) void orthonormalize_subspace_vectors_kernel( + size_type num_rows, size_type num_cols, ValueType *__restrict__ values, + size_type stride) +{ + const auto tidx = thread::get_thread_id_flat(); + + __shared__ UninitializedArray reduction_helper_array; + // they are not be used in the same time. + ValueType *reduction_helper = reduction_helper_array; + auto reduction_helper_real = + reinterpret_cast *>(reduction_helper); + + for (size_type row = 0; row < num_rows; row++) { + for (size_type i = 0; i < row; i++) { + auto dot = zero(); + for (size_type j = tidx; j < num_cols; j += block_size) { + dot += values[row * stride + j] * conj(values[i * stride + j]); + } + + // Ensure already finish reading this shared memory + __syncthreads(); + reduction_helper[tidx] = dot; + reduce( + group::this_thread_block(), reduction_helper, + [](const ValueType &a, const ValueType &b) { return a + b; }); + __syncthreads(); + + dot = reduction_helper[0]; + for (size_type j = tidx; j < num_cols; j += block_size) { + values[row * stride + j] -= dot * values[i * stride + j]; + } + } + + auto norm = zero>(); + for (size_type j = tidx; j < num_cols; j += block_size) { + norm += squared_norm(values[row * stride + j]); + } + // Ensure already finish reading this shared memory + __syncthreads(); + reduction_helper_real[tidx] = norm; + reduce(group::this_thread_block(), reduction_helper_real, + [](const remove_complex &a, + const remove_complex &b) { return a + b; }); + __syncthreads(); + + norm = sqrt(reduction_helper_real[0]); + for (size_type j = tidx; j < num_cols; j += block_size) { + values[row * stride + j] /= norm; + } + } +} + + +template +__global__ + __launch_bounds__(default_block_size) void solve_lower_triangular_kernel( + size_type subspace_dim, size_type nrhs, + const ValueType *__restrict__ m_values, size_type m_stride, + const ValueType *__restrict__ f_values, size_type f_stride, + ValueType *__restrict__ c_values, size_type c_stride, + const stopping_status *__restrict__ stop_status) +{ + const auto global_id = thread::get_thread_id_flat(); + + if (global_id >= nrhs) { + return; + } + + if (!stop_status[global_id].has_stopped()) { + for (size_type row = 0; row < subspace_dim; row++) { + auto temp = f_values[row * f_stride + global_id]; + for (size_type col = 0; col < row; col++) { + temp -= m_values[row * m_stride + col * nrhs + global_id] * + c_values[col * c_stride + global_id]; + } + c_values[row * c_stride + global_id] = + temp / m_values[row * m_stride + row * nrhs + global_id]; + } + } +} + + +template +__global__ __launch_bounds__(default_block_size) void step_1_kernel( + size_type k, size_type num_rows, size_type subspace_dim, size_type nrhs, + const ValueType *__restrict__ residual_values, size_type residual_stride, + const ValueType *__restrict__ c_values, size_type c_stride, + const ValueType *__restrict__ g_values, size_type g_stride, + ValueType *__restrict__ v_values, size_type v_stride, + const stopping_status *__restrict__ stop_status) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto row = global_id / nrhs; + const auto col = global_id % nrhs; + + if (row >= num_rows) { + return; + } + + if (!stop_status[col].has_stopped()) { + auto temp = residual_values[row * residual_stride + col]; + for (size_type j = k; j < subspace_dim; j++) { + temp -= c_values[j * c_stride + col] * + g_values[row * g_stride + j * nrhs + col]; + } + v_values[row * v_stride + col] = temp; + } +} + + +template +__global__ __launch_bounds__(default_block_size) void step_2_kernel( + size_type k, size_type num_rows, size_type subspace_dim, size_type nrhs, + const ValueType *__restrict__ omega_values, + const ValueType *__restrict__ v_values, size_type v_stride, + const ValueType *__restrict__ c_values, size_type c_stride, + ValueType *__restrict__ u_values, size_type u_stride, + const stopping_status *__restrict__ stop_status) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto row = global_id / nrhs; + const auto col = global_id % nrhs; + + if (row >= num_rows) { + return; + } + + if (!stop_status[col].has_stopped()) { + auto temp = omega_values[col] * v_values[row * v_stride + col]; + for (size_type j = k; j < subspace_dim; j++) { + temp += c_values[j * c_stride + col] * + u_values[row * u_stride + j * nrhs + col]; + } + u_values[row * u_stride + k * nrhs + col] = temp; + } +} + + +template +__global__ __launch_bounds__(default_dot_size) void multidot_kernel( + size_type num_rows, size_type nrhs, const ValueType *__restrict__ p_i, + const ValueType *__restrict__ g_k, size_type g_k_stride, + ValueType *__restrict__ alpha, + const stopping_status *__restrict__ stop_status) +{ + const auto tidx = threadIdx.x; + const auto tidy = threadIdx.y; + const auto rhs = blockIdx.x * default_dot_dim + tidx; + const auto num = ceildiv(num_rows, gridDim.y); + const auto start_row = blockIdx.y * num; + const auto end_row = + ((blockIdx.y + 1) * num > num_rows) ? num_rows : (blockIdx.y + 1) * num; + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + __shared__ + UninitializedArray + reduction_helper_array; + ValueType *__restrict__ reduction_helper = reduction_helper_array; + + ValueType local_res = zero(); + if (rhs < nrhs && !stop_status[rhs].has_stopped()) { + for (size_type i = start_row + tidy; i < end_row; + i += default_dot_dim) { + const auto g_idx = i * g_k_stride + rhs; + local_res += p_i[i] * g_k[g_idx]; + } + } + reduction_helper[tidx * (default_dot_dim + 1) + tidy] = local_res; + __syncthreads(); + local_res = reduction_helper[tidy * (default_dot_dim + 1) + tidx]; + const auto tile_block = + group::tiled_partition(group::this_thread_block()); + const auto sum = + reduce(tile_block, local_res, + [](const ValueType &a, const ValueType &b) { return a + b; }); + const auto new_rhs = blockIdx.x * default_dot_dim + tidy; + if (tidx == 0 && new_rhs < nrhs && !stop_status[new_rhs].has_stopped()) { + atomic_add(alpha + new_rhs, sum); + } +} + + +template +__global__ __launch_bounds__(block_size) void update_g_k_and_u_kernel( + size_type k, size_type i, size_type size, size_type nrhs, + const ValueType *__restrict__ alpha, const ValueType *__restrict__ m_values, + size_type m_stride, const ValueType *__restrict__ g_values, + size_type g_stride, ValueType *__restrict__ g_k_values, + size_type g_k_stride, ValueType *__restrict__ u_values, size_type u_stride, + const stopping_status *__restrict__ stop_status) +{ + const auto tidx = thread::get_thread_id_flat(); + const auto row = tidx / g_k_stride; + const auto rhs = tidx % g_k_stride; + + if (row >= size || rhs >= nrhs) { + return; + } + + if (!stop_status[rhs].has_stopped()) { + const auto fact = alpha[rhs] / m_values[i * m_stride + i * nrhs + rhs]; + g_k_values[row * g_k_stride + rhs] -= + fact * g_values[row * g_stride + i * nrhs + rhs]; + u_values[row * u_stride + k * nrhs + rhs] -= + fact * u_values[row * u_stride + i * nrhs + rhs]; + } +} + + +template +__global__ __launch_bounds__(block_size) void update_g_kernel( + size_type k, size_type size, size_type nrhs, + const ValueType *__restrict__ g_k_values, size_type g_k_stride, + ValueType *__restrict__ g_values, size_type g_stride, + const stopping_status *__restrict__ stop_status) +{ + const auto tidx = thread::get_thread_id_flat(); + const auto row = tidx / g_k_stride; + const auto rhs = tidx % nrhs; + + if (row >= size || rhs >= nrhs) { + return; + } + + if (!stop_status[rhs].has_stopped()) { + g_values[row * g_stride + k * nrhs + rhs] = + g_k_values[row * g_k_stride + rhs]; + } +} + + +template +__global__ __launch_bounds__(default_block_size) void update_x_r_and_f_kernel( + size_type k, size_type size, size_type subspace_dim, size_type nrhs, + const ValueType *__restrict__ m_values, size_type m_stride, + const ValueType *__restrict__ g_values, size_type g_stride, + const ValueType *__restrict__ u_values, size_type u_stride, + ValueType *__restrict__ f_values, size_type f_stride, + ValueType *__restrict__ r_values, size_type r_stride, + ValueType *__restrict__ x_values, size_type x_stride, + const stopping_status *__restrict__ stop_status) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto row = global_id / x_stride; + const auto col = global_id % x_stride; + + if (row >= size || col >= nrhs) { + return; + } + + if (!stop_status[col].has_stopped()) { + const auto beta = f_values[k * f_stride + col] / + m_values[k * m_stride + k * nrhs + col]; + r_values[row * r_stride + col] -= + beta * g_values[row * g_stride + k * nrhs + col]; + x_values[row * x_stride + col] += + beta * u_values[row * u_stride + k * nrhs + col]; + + if (k < row && k + 1 < subspace_dim && row < subspace_dim) { + f_values[row * f_stride + col] -= + beta * m_values[row * m_stride + k * nrhs + col]; + } + } +} + + +template +__global__ __launch_bounds__(config::warp_size) void compute_omega_kernel( + size_type nrhs, const remove_complex kappa, + const ValueType *__restrict__ tht, + const remove_complex *__restrict__ residual_norm, + ValueType *__restrict__ omega, + const stopping_status *__restrict__ stop_status) +{ + const auto global_id = thread::get_thread_id_flat(); + + if (global_id >= nrhs) { + return; + } + + if (!stop_status[global_id].has_stopped()) { + auto thr = omega[global_id]; + omega[global_id] /= tht[global_id]; + auto absrho = + abs(thr / (sqrt(real(tht[global_id])) * residual_norm[global_id])); + + if (absrho < kappa) { + omega[global_id] *= kappa / absrho; + } + } +} diff --git a/common/matrix/diagonal_kernels.hpp.inc b/common/matrix/diagonal_kernels.hpp.inc deleted file mode 100644 index 6e29ca2cd5e..00000000000 --- a/common/matrix/diagonal_kernels.hpp.inc +++ /dev/null @@ -1,147 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -namespace kernel { - - -template -__global__ __launch_bounds__(default_block_size) void apply_to_dense( - size_type num_rows, size_type num_cols, const ValueType *__restrict__ diag, - size_type source_stride, const ValueType *__restrict__ source_values, - size_type result_stride, ValueType *__restrict__ result_values) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto row = tidx / num_cols; - const auto col = tidx % num_cols; - - if (row < num_rows) { - result_values[row * result_stride + col] = - source_values[row * source_stride + col] * diag[row]; - } -} - - -template -__global__ __launch_bounds__(default_block_size) void right_apply_to_dense( - size_type num_rows, size_type num_cols, const ValueType *__restrict__ diag, - size_type source_stride, const ValueType *__restrict__ source_values, - size_type result_stride, ValueType *__restrict__ result_values) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto row = tidx / num_cols; - const auto col = tidx % num_cols; - - if (row < num_rows) { - result_values[row * result_stride + col] = - source_values[row * source_stride + col] * diag[col]; - } -} - - -template -__global__ __launch_bounds__(default_block_size) void apply_to_csr( - size_type num_rows, const ValueType *__restrict__ diag, - const IndexType *__restrict__ row_ptrs, - ValueType *__restrict__ result_values) -{ - constexpr auto warp_size = config::warp_size; - auto warp_tile = - group::tiled_partition(group::this_thread_block()); - const auto row = thread::get_subwarp_id_flat(); - const auto tid_in_warp = warp_tile.thread_rank(); - - if (row >= num_rows) { - return; - } - - const auto diag_val = diag[row]; - - for (size_type idx = row_ptrs[row] + tid_in_warp; idx < row_ptrs[row + 1]; - idx += warp_size) { - result_values[idx] *= diag_val; - } -} - - -template -__global__ __launch_bounds__(default_block_size) void right_apply_to_csr( - size_type num_nnz, const ValueType *__restrict__ diag, - const IndexType *__restrict__ col_idxs, - ValueType *__restrict__ result_values) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx >= num_nnz) { - return; - } - - result_values[tidx] *= diag[col_idxs[tidx]]; -} - - -template -__global__ __launch_bounds__(default_block_size) void convert_to_csr( - size_type size, const ValueType *__restrict__ diag_values, - IndexType *__restrict__ row_ptrs, IndexType *__restrict__ col_idxs, - ValueType *__restrict__ csr_values) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx >= size) { - return; - } - if (tidx == 0) { - row_ptrs[size] = size; - } - - row_ptrs[tidx] = tidx; - col_idxs[tidx] = tidx; - csr_values[tidx] = diag_values[tidx]; -} - - -template -__global__ __launch_bounds__(default_block_size) void conj_transpose( - size_type size, const ValueType *__restrict__ orig_values, - ValueType *__restrict__ trans_values) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx >= size) { - return; - } - - trans_values[tidx] = conj(orig_values[tidx]); -} - - -} // namespace kernel diff --git a/common/solver/bicg_kernels.hpp.inc b/common/solver/bicg_kernels.hpp.inc deleted file mode 100644 index fdb8ee8f3f9..00000000000 --- a/common/solver/bicg_kernels.hpp.inc +++ /dev/null @@ -1,111 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -template -__global__ __launch_bounds__(default_block_size) void initialize_kernel( - size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ b, ValueType *__restrict__ r, - ValueType *__restrict__ z, ValueType *__restrict__ p, - ValueType *__restrict__ q, ValueType *__restrict__ r2, - ValueType *__restrict__ z2, ValueType *__restrict__ p2, - ValueType *__restrict__ q2, ValueType *__restrict__ prev_rho, - ValueType *__restrict__ rho, stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx < num_cols) { - rho[tidx] = zero(); - prev_rho[tidx] = one(); - stop_status[tidx].reset(); - } - - if (tidx < num_rows * stride) { - r[tidx] = b[tidx]; - z[tidx] = zero(); - p[tidx] = zero(); - q[tidx] = zero(); - r2[tidx] = b[tidx]; - z2[tidx] = zero(); - p2[tidx] = zero(); - q2[tidx] = zero(); - } -} - - -template -__global__ __launch_bounds__(default_block_size) void step_1_kernel( - size_type num_rows, size_type num_cols, size_type stride, - ValueType *__restrict__ p, const ValueType *__restrict__ z, - ValueType *__restrict__ p2, const ValueType *__restrict__ z2, - const ValueType *__restrict__ rho, const ValueType *__restrict__ prev_rho, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto col = tidx % stride; - if (col >= num_cols || tidx >= num_rows * stride || - stop_status[col].has_stopped()) { - return; - } - const auto tmp = rho[col] / prev_rho[col]; - - p[tidx] = - prev_rho[col] == zero() ? z[tidx] : z[tidx] + tmp * p[tidx]; - - p2[tidx] = prev_rho[col] == zero() ? z2[tidx] - : z2[tidx] + tmp * p2[tidx]; -} - - -template -__global__ __launch_bounds__(default_block_size) void step_2_kernel( - size_type num_rows, size_type num_cols, size_type stride, - size_type x_stride, ValueType *__restrict__ x, ValueType *__restrict__ r, - ValueType *__restrict__ r2, const ValueType *__restrict__ p, - const ValueType *__restrict__ q, const ValueType *__restrict__ q2, - const ValueType *__restrict__ beta, const ValueType *__restrict__ rho, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto row = tidx / stride; - const auto col = tidx % stride; - - if (col >= num_cols || tidx >= num_rows * num_cols || - stop_status[col].has_stopped()) { - return; - } - if (beta[col] != zero()) { - const auto tmp = rho[col] / beta[col]; - x[row * x_stride + col] += tmp * p[tidx]; - r[tidx] -= tmp * q[tidx]; - r2[tidx] -= tmp * q2[tidx]; - } -} diff --git a/common/solver/bicgstab_kernels.hpp.inc b/common/solver/bicgstab_kernels.hpp.inc deleted file mode 100644 index 03071970fcc..00000000000 --- a/common/solver/bicgstab_kernels.hpp.inc +++ /dev/null @@ -1,168 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -template -__global__ __launch_bounds__(default_block_size) void initialize_kernel( - size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ b, ValueType *__restrict__ r, - ValueType *__restrict__ rr, ValueType *__restrict__ y, - ValueType *__restrict__ s, ValueType *__restrict__ t, - ValueType *__restrict__ z, ValueType *__restrict__ v, - ValueType *__restrict__ p, ValueType *__restrict__ prev_rho, - ValueType *__restrict__ rho, ValueType *__restrict__ alpha, - ValueType *__restrict__ beta, ValueType *__restrict__ gamma, - ValueType *__restrict__ omega, stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx < num_cols) { - prev_rho[tidx] = one(); - rho[tidx] = one(); - alpha[tidx] = one(); - beta[tidx] = one(); - gamma[tidx] = one(); - omega[tidx] = one(); - stop_status[tidx].reset(); - } - - if (tidx < num_rows * stride) { - r[tidx] = b[tidx]; - rr[tidx] = zero(); - y[tidx] = zero(); - s[tidx] = zero(); - t[tidx] = zero(); - z[tidx] = zero(); - v[tidx] = zero(); - p[tidx] = zero(); - } -} - - -template -__global__ __launch_bounds__(default_block_size) void step_1_kernel( - size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ r, ValueType *__restrict__ p, - const ValueType *__restrict__ v, const ValueType *__restrict__ rho, - const ValueType *__restrict__ prev_rho, const ValueType *__restrict__ alpha, - const ValueType *__restrict__ omega, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto col = tidx % stride; - if (col >= num_cols || tidx >= num_rows * stride || - stop_status[col].has_stopped()) { - return; - } - auto res = r[tidx]; - if (prev_rho[col] * omega[col] != zero()) { - const auto tmp = (rho[col] / prev_rho[col]) * (alpha[col] / omega[col]); - res += tmp * (p[tidx] - omega[col] * v[tidx]); - } - p[tidx] = res; -} - - -template -__global__ __launch_bounds__(default_block_size) void step_2_kernel( - size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ r, ValueType *__restrict__ s, - const ValueType *__restrict__ v, const ValueType *__restrict__ rho, - ValueType *__restrict__ alpha, const ValueType *__restrict__ beta, - const stopping_status *__restrict__ stop_status) -{ - const size_type tidx = thread::get_thread_id_flat(); - const size_type col = tidx % stride; - if (col >= num_cols || tidx >= num_rows * stride || - stop_status[col].has_stopped()) { - return; - } - auto t_alpha = zero(); - auto t_s = r[tidx]; - if (beta[col] != zero()) { - t_alpha = rho[col] / beta[col]; - t_s -= t_alpha * v[tidx]; - } - alpha[col] = t_alpha; - s[tidx] = t_s; -} - - -template -__global__ __launch_bounds__(default_block_size) void step_3_kernel( - size_type num_rows, size_type num_cols, size_type stride, - size_type x_stride, ValueType *__restrict__ x, ValueType *__restrict__ r, - const ValueType *__restrict__ s, const ValueType *__restrict__ t, - const ValueType *__restrict__ y, const ValueType *__restrict__ z, - const ValueType *__restrict__ alpha, const ValueType *__restrict__ beta, - const ValueType *__restrict__ gamma, ValueType *__restrict__ omega, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto row = tidx / stride; - const auto col = tidx % stride; - if (col >= num_cols || tidx >= num_rows * stride || - stop_status[col].has_stopped()) { - return; - } - const auto x_pos = row * x_stride + col; - auto t_omega = zero(); - auto t_x = x[x_pos] + alpha[col] * y[tidx]; - auto t_r = s[tidx]; - if (beta[col] != zero()) { - t_omega = gamma[col] / beta[col]; - t_x += t_omega * z[tidx]; - t_r -= t_omega * t[tidx]; - } - omega[col] = t_omega; - x[x_pos] = t_x; - r[tidx] = t_r; -} - - -template -__global__ __launch_bounds__(default_block_size) void finalize_kernel( - size_type num_rows, size_type num_cols, size_type stride, - size_type x_stride, ValueType *__restrict__ x, - const ValueType *__restrict__ y, const ValueType *__restrict__ alpha, - stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto row = tidx / stride; - const auto col = tidx % stride; - if (col >= num_cols || tidx >= num_rows * stride || - stop_status[col].is_finalized() || !stop_status[col].has_stopped()) { - return; - } - const auto x_pos = row * x_stride + col; - x[x_pos] = x[x_pos] + alpha[col] * y[tidx]; - stop_status[col].finalize(); -} diff --git a/common/solver/cg_kernels.hpp.inc b/common/solver/cg_kernels.hpp.inc deleted file mode 100644 index d318c30f338..00000000000 --- a/common/solver/cg_kernels.hpp.inc +++ /dev/null @@ -1,98 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -template -__global__ __launch_bounds__(default_block_size) void initialize_kernel( - size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ b, ValueType *__restrict__ r, - ValueType *__restrict__ z, ValueType *__restrict__ p, - ValueType *__restrict__ q, ValueType *__restrict__ prev_rho, - ValueType *__restrict__ rho, stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx < num_cols) { - rho[tidx] = zero(); - prev_rho[tidx] = one(); - stop_status[tidx].reset(); - } - - if (tidx < num_rows * stride) { - r[tidx] = b[tidx]; - z[tidx] = zero(); - p[tidx] = zero(); - q[tidx] = zero(); - } -} - - -template -__global__ __launch_bounds__(default_block_size) void step_1_kernel( - size_type num_rows, size_type num_cols, size_type stride, - ValueType *__restrict__ p, const ValueType *__restrict__ z, - const ValueType *__restrict__ rho, const ValueType *__restrict__ prev_rho, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto col = tidx % stride; - if (col >= num_cols || tidx >= num_rows * stride || - stop_status[col].has_stopped()) { - return; - } - const auto tmp = rho[col] / prev_rho[col]; - p[tidx] = - prev_rho[col] == zero() ? z[tidx] : z[tidx] + tmp * p[tidx]; -} - - -template -__global__ __launch_bounds__(default_block_size) void step_2_kernel( - size_type num_rows, size_type num_cols, size_type stride, - size_type x_stride, ValueType *__restrict__ x, ValueType *__restrict__ r, - const ValueType *__restrict__ p, const ValueType *__restrict__ q, - const ValueType *__restrict__ beta, const ValueType *__restrict__ rho, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto row = tidx / stride; - const auto col = tidx % stride; - - if (col >= num_cols || tidx >= num_rows * num_cols || - stop_status[col].has_stopped()) { - return; - } - if (beta[col] != zero()) { - const auto tmp = rho[col] / beta[col]; - x[row * x_stride + col] += tmp * p[tidx]; - r[tidx] -= tmp * q[tidx]; - } -} diff --git a/common/solver/cgs_kernels.hpp.inc b/common/solver/cgs_kernels.hpp.inc deleted file mode 100644 index d6c3e64cd4c..00000000000 --- a/common/solver/cgs_kernels.hpp.inc +++ /dev/null @@ -1,137 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -template -__global__ __launch_bounds__(default_block_size) void initialize_kernel( - size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ b, ValueType *__restrict__ r, - ValueType *__restrict__ r_tld, ValueType *__restrict__ p, - ValueType *__restrict__ q, ValueType *__restrict__ u, - ValueType *__restrict__ u_hat, ValueType *__restrict__ v_hat, - ValueType *__restrict__ t, ValueType *__restrict__ alpha, - ValueType *__restrict__ beta, ValueType *__restrict__ gamma, - ValueType *__restrict__ rho_prev, ValueType *__restrict__ rho, - stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx < num_cols) { - rho[tidx] = zero(); - alpha[tidx] = one(); - beta[tidx] = one(); - gamma[tidx] = one(); - rho_prev[tidx] = one(); - stop_status[tidx].reset(); - } - - if (tidx < num_rows * stride) { - r[tidx] = b[tidx]; - r_tld[tidx] = b[tidx]; - u[tidx] = zero(); - p[tidx] = zero(); - q[tidx] = zero(); - u_hat[tidx] = zero(); - v_hat[tidx] = zero(); - t[tidx] = zero(); - } -} - - -template -__global__ __launch_bounds__(default_block_size) void step_1_kernel( - size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ r, ValueType *__restrict__ u, - ValueType *__restrict__ p, const ValueType *__restrict__ q, - ValueType *__restrict__ beta, const ValueType *__restrict__ rho, - const ValueType *__restrict__ rho_prev, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto col = tidx % stride; - - if (col >= num_cols || tidx >= num_rows * stride || - stop_status[col].has_stopped()) { - return; - } - if (rho_prev[col] != zero()) { - beta[col] = rho[col] / rho_prev[col]; - u[tidx] = r[tidx] + beta[col] * q[tidx]; - p[tidx] = u[tidx] + beta[col] * (q[tidx] + beta[col] * p[tidx]); - } -} - - -template -__global__ __launch_bounds__(default_block_size) void step_2_kernel( - size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ u, const ValueType *__restrict__ v_hat, - ValueType *__restrict__ q, ValueType *__restrict__ t, - ValueType *__restrict__ alpha, const ValueType *__restrict__ rho, - const ValueType *__restrict__ gamma, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto col = tidx % stride; - - if (col >= num_cols || tidx >= num_rows * stride || - stop_status[col].has_stopped()) { - return; - } - if (gamma[col] != zero()) { - alpha[col] = rho[col] / gamma[col]; - q[tidx] = u[tidx] - alpha[col] * v_hat[tidx]; - t[tidx] = u[tidx] + q[tidx]; - } -} - - -template -__global__ __launch_bounds__(default_block_size) void step_3_kernel( - size_type num_rows, size_type num_cols, size_type stride, - size_type x_stride, const ValueType *__restrict__ t, - const ValueType *__restrict__ v_hat, ValueType *__restrict__ r, - ValueType *__restrict__ x, const ValueType *__restrict__ alpha, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto row = tidx / stride; - const auto col = tidx % stride; - if (col >= num_cols || tidx >= num_rows * stride || - stop_status[col].has_stopped()) { - return; - } - const auto x_pos = row * x_stride + col; - auto t_x = x[x_pos] + alpha[col] * v_hat[tidx]; - auto t_r = r[tidx] - alpha[col] * t[tidx]; - x[x_pos] = t_x; - r[tidx] = t_r; -} \ No newline at end of file diff --git a/common/solver/fcg_kernels.hpp.inc b/common/solver/fcg_kernels.hpp.inc deleted file mode 100644 index 2b5b72029a2..00000000000 --- a/common/solver/fcg_kernels.hpp.inc +++ /dev/null @@ -1,104 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -template -__global__ __launch_bounds__(default_block_size) void initialize_kernel( - size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ b, ValueType *__restrict__ r, - ValueType *__restrict__ z, ValueType *__restrict__ p, - ValueType *__restrict__ q, ValueType *__restrict__ t, - ValueType *__restrict__ prev_rho, ValueType *__restrict__ rho, - ValueType *__restrict__ rho_t, stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - - if (tidx < num_cols) { - rho[tidx] = zero(); - prev_rho[tidx] = one(); - rho_t[tidx] = one(); - stop_status[tidx].reset(); - } - - if (tidx < num_rows * stride) { - r[tidx] = b[tidx]; - z[tidx] = zero(); - p[tidx] = zero(); - q[tidx] = zero(); - t[tidx] = b[tidx]; - } -} - - -template -__global__ __launch_bounds__(default_block_size) void step_1_kernel( - size_type num_rows, size_type num_cols, size_type stride, - ValueType *__restrict__ p, const ValueType *__restrict__ z, - const ValueType *__restrict__ rho, const ValueType *__restrict__ prev_rho, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto col = tidx % stride; - if (col >= num_cols || tidx >= num_rows * stride || - stop_status[col].has_stopped()) { - return; - } - const auto tmp = rho[col] / prev_rho[col]; - p[tidx] = - prev_rho[col] == zero() ? z[tidx] : z[tidx] + tmp * p[tidx]; -} - - -template -__global__ __launch_bounds__(default_block_size) void step_2_kernel( - size_type num_rows, size_type num_cols, size_type stride, - size_type x_stride, ValueType *__restrict__ x, ValueType *__restrict__ r, - ValueType *__restrict__ t, const ValueType *__restrict__ p, - const ValueType *__restrict__ q, const ValueType *__restrict__ beta, - const ValueType *__restrict__ rho, - const stopping_status *__restrict__ stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); - const auto row = tidx / stride; - const auto col = tidx % stride; - - if (col >= num_cols || tidx >= num_rows * num_cols || - stop_status[col].has_stopped()) { - return; - } - if (beta[col] != zero()) { - const auto tmp = rho[col] / beta[col]; - const auto prev_r = r[tidx]; - x[row * x_stride + col] += tmp * p[tidx]; - r[tidx] -= tmp * q[tidx]; - t[tidx] = r[tidx] - prev_r; - } -} \ No newline at end of file diff --git a/common/unified/base/kernel_launch.hpp b/common/unified/base/kernel_launch.hpp new file mode 100644 index 00000000000..5e5e1914476 --- /dev/null +++ b/common/unified/base/kernel_launch.hpp @@ -0,0 +1,283 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_HPP_ +#define GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_HPP_ + + +#include + + +#include +#include +#include +#include + + +#if defined(GKO_COMPILING_CUDA) + +#define GKO_DEVICE_NAMESPACE cuda +#define GKO_KERNEL __device__ +#include "cuda/base/types.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { + + +template +using device_type = typename detail::cuda_type_impl::type; + +template +device_type as_device_type(T value) +{ + return as_cuda_type(value); +} + + +} // namespace cuda +} // namespace kernels +} // namespace gko + + +#elif defined(GKO_COMPILING_HIP) + +#define GKO_DEVICE_NAMESPACE hip +#define GKO_KERNEL __device__ +#include "hip/base/types.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { + + +template +using device_type = typename detail::hip_type_impl::type; + +template +device_type as_device_type(T value) +{ + return as_hip_type(value); +} + + +} // namespace hip +} // namespace kernels +} // namespace gko + + +#elif defined(GKO_COMPILING_DPCPP) + +#define GKO_DEVICE_NAMESPACE dpcpp +#define GKO_KERNEL + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +template +using device_type = T; + +template +device_type as_device_type(T value) +{ + return value; +} + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#elif defined(GKO_COMPILING_OMP) + +#define GKO_DEVICE_NAMESPACE omp +#define GKO_KERNEL + + +namespace gko { +namespace kernels { +namespace omp { + + +template +using device_type = T; + +template +device_type as_device_type(T value) +{ + return value; +} + + +} // namespace omp +} // namespace kernels +} // namespace gko + + +#else + +#error "This file should only be used inside Ginkgo device compilation" + +#endif + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { + + +/** + * @internal + * A simple row-major accessor as a device representation of gko::matrix::Dense + * objects. + * + * @tparam ValueType the value type of the underlying matrix. + */ +template +struct matrix_accessor { + ValueType *data; + size_type stride; + + /** + * @internal + * Returns a reference to the element at position (row, col). + */ + GKO_INLINE GKO_ATTRIBUTES ValueType &operator()(size_type row, + size_type col) + { + return data[row * stride + col]; + } + + /** + * @internal + * Returns a reference to the element at position idx in the underlying + * storage. + */ + GKO_INLINE GKO_ATTRIBUTES ValueType &operator[](size_type idx) + { + return data[idx]; + } +}; + + +/** + * @internal + * This struct is used to provide mappings from host types like + * gko::matrix::Dense to device representations of the same data, like an + * accessor storing only data pointer and stride. + * + * By default, it only maps std::complex to the corresponding device + * representation of the complex type. There are specializations for dealing + * with gko::Array and gko::matrix::Dense (both const and mutable) that map them + * to plain pointers or matrix_accessor objects. + * + * @tparam T the type being mapped. It will be used based on a + * forwarding-reference, i.e. preserve references in the input + * parameter, so special care must be taken to only return types that + * can be passed to the device, i.e. (structs containing) device + * pointers or values. This means that T will be either a r-value or + * l-value reference. + */ +template +struct to_device_type_impl { + using type = std::decay_t>; + static type map_to_device(T in) { return as_device_type(in); } +}; + +template +struct to_device_type_impl *&> { + using type = matrix_accessor>; + static type map_to_device(matrix::Dense *mtx) + { + return {as_device_type(mtx->get_values()), mtx->get_stride()}; + } +}; + +template +struct to_device_type_impl *&> { + using type = matrix_accessor>; + static type map_to_device(const matrix::Dense *mtx) + { + return {as_device_type(mtx->get_const_values()), mtx->get_stride()}; + } +}; + +template +struct to_device_type_impl &> { + using type = device_type *; + static type map_to_device(Array &array) + { + return as_device_type(array.get_data()); + } +}; + +template +struct to_device_type_impl &> { + using type = const device_type *; + static type map_to_device(const Array &array) + { + return as_device_type(array.get_const_data()); + } +}; + + +template +typename to_device_type_impl::type map_to_device(T &¶m) +{ + return to_device_type_impl::map_to_device(param); +} + + +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko + + +// these files include this file again to make inclusion work from both sides, +// this does not lead to issues due to the header guards. +#if defined(GKO_COMPILING_CUDA) +#include "cuda/base/kernel_launch.cuh" +#elif defined(GKO_COMPILING_HIP) +#include "hip/base/kernel_launch.hip.hpp" +#elif defined(GKO_COMPILING_DPCPP) +#include "dpcpp/base/kernel_launch.dp.hpp" +#elif defined(GKO_COMPILING_OMP) +#include "omp/base/kernel_launch.hpp" +#endif + + +#endif // GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_HPP_ diff --git a/common/unified/base/kernel_launch_solver.hpp b/common/unified/base/kernel_launch_solver.hpp new file mode 100644 index 00000000000..0f859631919 --- /dev/null +++ b/common/unified/base/kernel_launch_solver.hpp @@ -0,0 +1,149 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_SOLVER_HPP_ +#define GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_SOLVER_HPP_ + + +#include "common/unified/base/kernel_launch.hpp" + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { + + +/** + * @internal + * Wrapper class used by default_stride(matrix::Dense*) to wrap a + * dense matrix using the default stride. + */ +template +struct default_stride_dense_wrapper { + ValueType *data; +}; + + +/** + * @internal + * Helper that creates a device representation of the input object based on the + * default stride that was passed to run_kernel_solver. + * @see default_stride_dense_wrapper + * @see default_stride(matrix::Dense*) + */ +template +struct device_unpack_solver_impl { + using type = T; + static GKO_INLINE GKO_ATTRIBUTES type unpack(T param, size_type) + { + return param; + } +}; + +template +struct device_unpack_solver_impl> { + using type = matrix_accessor; + static GKO_INLINE GKO_ATTRIBUTES type unpack( + default_stride_dense_wrapper param, size_type default_stride) + { + return {param.data, default_stride}; + } +}; + + +/** + * @internal + * Wraps the given matrix in a wrapper signifying that it has the default stride + * that was provided to run_kernel_solver. This avoids having individual stride + * parameters for all dense matrix parameters. + */ +template +default_stride_dense_wrapper> default_stride( + matrix::Dense *mtx) +{ + return {as_device_type(mtx->get_values())}; +} + +/** + * @internal + * @copydoc default_stride(matrix::Dense*) + */ +template +default_stride_dense_wrapper> default_stride( + const matrix::Dense *mtx) +{ + return {as_device_type(mtx->get_const_values())}; +} + + +/** + * @internal + * Wraps the given matrix in a wrapper signifying that it is a row vector, i.e. + * we don't need to pass a stride parameter, but can access it directly as a + * pointer. + */ +template +device_type *row_vector(matrix::Dense *mtx) +{ + GKO_ASSERT(mtx->get_size()[0] == 1); + return as_device_type(mtx->get_values()); +} + +/** + * @internal + * @copydoc row_vector(matrix::Dense*) + */ +template +const device_type *row_vector(const matrix::Dense *mtx) +{ + GKO_ASSERT(mtx->get_size()[0] == 1); + return as_device_type(mtx->get_const_values()); +} + + +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko + + +#if defined(GKO_COMPILING_CUDA) +#include "cuda/base/kernel_launch_solver.cuh" +#elif defined(GKO_COMPILING_HIP) +#include "hip/base/kernel_launch_solver.hip.hpp" +#elif defined(GKO_COMPILING_DPCPP) +#include "dpcpp/base/kernel_launch_solver.dp.hpp" +#elif defined(GKO_COMPILING_OMP) +#include "omp/base/kernel_launch_solver.hpp" +#endif + + +#endif // GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_SOLVER_HPP_ diff --git a/omp/components/precision_conversion.cpp b/common/unified/components/precision_conversion.cpp similarity index 87% rename from omp/components/precision_conversion.cpp rename to common/unified/components/precision_conversion.cpp index 4c4553470a8..09ad03125da 100644 --- a/omp/components/precision_conversion.cpp +++ b/common/unified/components/precision_conversion.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,9 +33,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/precision_conversion.hpp" +#include "common/unified/base/kernel_launch.hpp" + + namespace gko { namespace kernels { -namespace omp { +namespace GKO_DEVICE_NAMESPACE { namespace components { @@ -43,16 +46,16 @@ template void convert_precision(std::shared_ptr exec, size_type size, const SourceType *in, TargetType *out) { -#pragma omp parallel for - for (size_type i = 0; i < size; ++i) { - out[i] = in[i]; - } + run_kernel( + exec, + [] GKO_KERNEL(auto idx, auto in, auto out) { out[idx] = in[idx]; }, + size, in, out); } GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(GKO_DECLARE_CONVERT_PRECISION_KERNEL); } // namespace components -} // namespace omp +} // namespace GKO_DEVICE_NAMESPACE } // namespace kernels } // namespace gko diff --git a/common/unified/matrix/coo_kernels.cpp b/common/unified/matrix/coo_kernels.cpp new file mode 100644 index 00000000000..095bbdf0e65 --- /dev/null +++ b/common/unified/matrix/coo_kernels.cpp @@ -0,0 +1,78 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/coo_kernels.hpp" + + +#include + + +#include "common/unified/base/kernel_launch.hpp" + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { +/** + * @brief The Coo matrix format namespace. + * + * @ingroup coo + */ +namespace coo { + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Coo *orig, + matrix::Diagonal *diag) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto tidx, auto orig_values, auto orig_row_idxs, + auto orig_col_idxs, auto diag) { + if (orig_row_idxs[tidx] == orig_col_idxs[tidx]) { + diag[orig_row_idxs[tidx]] = orig_values[tidx]; + } + }, + orig->get_num_stored_elements(), orig->get_const_values(), + orig->get_const_row_idxs(), orig->get_const_col_idxs(), + diag->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_COO_EXTRACT_DIAGONAL_KERNEL); + + +} // namespace coo +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko diff --git a/common/unified/matrix/csr_kernels.cpp b/common/unified/matrix/csr_kernels.cpp new file mode 100644 index 00000000000..5134e0b88ee --- /dev/null +++ b/common/unified/matrix/csr_kernels.cpp @@ -0,0 +1,108 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/csr_kernels.hpp" + + +#include + + +#include + + +#include "common/unified/base/kernel_launch.hpp" + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { +/** + * @brief The Csr matrix format namespace. + * + * @ingroup csr + */ +namespace csr { + + +template +void invert_permutation(std::shared_ptr exec, + size_type size, const IndexType *permutation_indices, + IndexType *inv_permutation) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto tid, auto permutation, auto inv_permutation) { + inv_permutation[permutation[tid]] = tid; + }, + size, permutation_indices, inv_permutation); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); + + +template +void inverse_column_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *column_permuted) +{ + auto num_rows = orig->get_size()[0]; + auto nnz = orig->get_num_stored_elements(); + auto size = std::max(num_rows, nnz); + run_kernel( + exec, + [] GKO_KERNEL(auto tid, auto num_rows, auto num_nonzeros, + auto permutation, auto in_row_ptrs, auto in_col_idxs, + auto in_vals, auto out_row_ptrs, auto out_col_idxs, + auto out_vals) { + if (tid < num_nonzeros) { + out_col_idxs[tid] = permutation[in_col_idxs[tid]]; + out_vals[tid] = in_vals[tid]; + } + if (tid <= num_rows) { + out_row_ptrs[tid] = in_row_ptrs[tid]; + } + }, + size, num_rows, nnz, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), orig->get_const_values(), + column_permuted->get_row_ptrs(), column_permuted->get_col_idxs(), + column_permuted->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL); + + +} // namespace csr +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko diff --git a/common/unified/matrix/dense_kernels.cpp b/common/unified/matrix/dense_kernels.cpp new file mode 100644 index 00000000000..034e465014a --- /dev/null +++ b/common/unified/matrix/dense_kernels.cpp @@ -0,0 +1,425 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/dense_kernels.hpp" + + +#include + + +#include "common/unified/base/kernel_launch.hpp" + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { +/** + * @brief The Dense matrix format namespace. + * + * @ingroup dense + */ +namespace dense { + + +template +void copy(std::shared_ptr exec, + const matrix::Dense *input, + matrix::Dense *output) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto input, auto output) { + output(row, col) = input(row, col); + }, + input->get_size(), input, output); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( + GKO_DECLARE_DENSE_COPY_KERNEL); + + +template +void fill(std::shared_ptr exec, + matrix::Dense *mat, ValueType value) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto mat, auto value) { + mat(row, col) = value; + }, + mat->get_size(), mat, value); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_FILL_KERNEL); + + +template +void scale(std::shared_ptr exec, + const matrix::Dense *alpha, matrix::Dense *x) +{ + if (alpha->get_size()[1] > 1) { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto alpha, auto x) { + x(row, col) *= alpha[col]; + }, + x->get_size(), alpha->get_const_values(), x); + } else { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto alpha, auto x) { + x(row, col) *= alpha[0]; + }, + x->get_size(), alpha->get_const_values(), x); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SCALE_KERNEL); + + +template +void inv_scale(std::shared_ptr exec, + const matrix::Dense *alpha, + matrix::Dense *x) +{ + if (alpha->get_size()[1] > 1) { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto alpha, auto x) { + x(row, col) /= alpha[col]; + }, + x->get_size(), alpha->get_const_values(), x); + } else { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto alpha, auto x) { + x(row, col) /= alpha[0]; + }, + x->get_size(), alpha->get_const_values(), x); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_INV_SCALE_KERNEL); + + +template +void add_scaled(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Dense *x, matrix::Dense *y) +{ + if (alpha->get_size()[1] > 1) { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto alpha, auto x, auto y) { + y(row, col) += alpha[col] * x(row, col); + }, + x->get_size(), alpha->get_const_values(), x, y); + } else { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto alpha, auto x, auto y) { + y(row, col) += alpha[0] * x(row, col); + }, + x->get_size(), alpha->get_const_values(), x, y); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_KERNEL); + + +template +void sub_scaled(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Dense *x, matrix::Dense *y) +{ + if (alpha->get_size()[1] > 1) { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto alpha, auto x, auto y) { + y(row, col) -= alpha[col] * x(row, col); + }, + x->get_size(), alpha->get_const_values(), x, y); + } else { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto alpha, auto x, auto y) { + y(row, col) -= alpha[0] * x(row, col); + }, + x->get_size(), alpha->get_const_values(), x, y); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_KERNEL); + + +template +void add_scaled_diag(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Diagonal *x, + matrix::Dense *y) +{ + const auto diag_values = x->get_const_values(); + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto alpha, auto diag, auto y) { + y(i, i) += alpha[0] * diag[i]; + }, + x->get_size()[0], alpha->get_const_values(), x->get_const_values(), y); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL); + + +template +void sub_scaled_diag(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Diagonal *x, + matrix::Dense *y) +{ + const auto diag_values = x->get_const_values(); + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto alpha, auto diag, auto y) { + y(i, i) -= alpha[0] * diag[i]; + }, + x->get_size()[0], alpha->get_const_values(), x->get_const_values(), y); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL); + + +template +void symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) { + permuted(row, col) = orig(perm[row], perm[col]); + }, + orig->get_size(), orig, *permutation_indices, permuted); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL); + + +template +void inv_symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) { + permuted(perm[row], perm[col]) = orig(row, col); + }, + orig->get_size(), orig, *permutation_indices, permuted); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL); + + +template +void row_gather(std::shared_ptr exec, + const Array *row_indices, + const matrix::Dense *orig, + matrix::Dense *row_gathered) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto orig, auto rows, auto gathered) { + gathered(row, col) = orig(rows[row], col); + }, + dim<2>{row_indices->get_num_elems(), orig->get_size()[1]}, orig, + *row_indices, row_gathered); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_ROW_GATHER_KERNEL); + + +template +void column_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *column_permuted) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) { + permuted(row, col) = orig(row, perm[col]); + }, + orig->get_size(), orig, *permutation_indices, column_permuted); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL); + + +template +void inverse_row_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *row_permuted) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) { + permuted(perm[row], col) = orig(row, col); + }, + orig->get_size(), orig, *permutation_indices, row_permuted); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL); + + +template +void inverse_column_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *column_permuted) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) { + permuted(row, perm[col]) = orig(row, col); + }, + orig->get_size(), orig, *permutation_indices, column_permuted); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Dense *orig, + matrix::Diagonal *diag) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto orig, auto diag) { diag[i] = orig(i, i); }, + diag->get_size()[0], orig, diag->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL); + + +template +void inplace_absolute_dense(std::shared_ptr exec, + matrix::Dense *source) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto source) { + source(row, col) = abs(source(row, col)); + }, + source->get_size(), source); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL); + + +template +void outplace_absolute_dense(std::shared_ptr exec, + const matrix::Dense *source, + matrix::Dense> *result) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto source, auto result) { + result(row, col) = abs(source(row, col)); + }, + source->get_size(), source, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL); + + +template +void make_complex(std::shared_ptr exec, + const matrix::Dense *source, + matrix::Dense> *result) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto source, auto result) { + result(row, col) = source(row, col); + }, + source->get_size(), source, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MAKE_COMPLEX_KERNEL); + + +template +void get_real(std::shared_ptr exec, + const matrix::Dense *source, + matrix::Dense> *result) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto source, auto result) { + result(row, col) = real(source(row, col)); + }, + source->get_size(), source, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_REAL_KERNEL); + + +template +void get_imag(std::shared_ptr exec, + const matrix::Dense *source, + matrix::Dense> *result) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto source, auto result) { + result(row, col) = imag(source(row, col)); + }, + source->get_size(), source, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_IMAG_KERNEL); + + +} // namespace dense +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko diff --git a/common/unified/matrix/diagonal_kernels.cpp b/common/unified/matrix/diagonal_kernels.cpp new file mode 100644 index 00000000000..e39bd40e207 --- /dev/null +++ b/common/unified/matrix/diagonal_kernels.cpp @@ -0,0 +1,153 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/diagonal_kernels.hpp" + + +#include + + +#include "common/unified/base/kernel_launch.hpp" + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { +/** + * @brief The Diagonal matrix format namespace. + * + * @ingroup diagonal + */ +namespace diagonal { + + +template +void apply_to_dense(std::shared_ptr exec, + const matrix::Diagonal *a, + const matrix::Dense *b, + matrix::Dense *c) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto diag, auto source, auto result) { + result(row, col) = source(row, col) * diag[row]; + }, + b->get_size(), a->get_const_values(), b, c); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL); + + +template +void right_apply_to_dense(std::shared_ptr exec, + const matrix::Diagonal *a, + const matrix::Dense *b, + matrix::Dense *c) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto diag, auto source, auto result) { + result(row, col) = source(row, col) * diag[col]; + }, + b->get_size(), a->get_const_values(), b, c); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_DENSE_KERNEL); + + +template +void right_apply_to_csr(std::shared_ptr exec, + const matrix::Diagonal *a, + const matrix::Csr *b, + matrix::Csr *c) +{ + // TODO: combine copy and diag apply together + c->copy_from(b); + run_kernel( + exec, + [] GKO_KERNEL(auto tidx, auto diag, auto result_values, auto col_idxs) { + result_values[tidx] *= diag[col_idxs[tidx]]; + }, + c->get_num_stored_elements(), a->get_const_values(), c->get_values(), + c->get_const_col_idxs()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_CSR_KERNEL); + + +template +void convert_to_csr(std::shared_ptr exec, + const matrix::Diagonal *source, + matrix::Csr *result) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto tidx, auto size, auto diag_values, auto row_ptrs, + auto col_idxs, auto csr_values) { + row_ptrs[tidx] = tidx; + col_idxs[tidx] = tidx; + csr_values[tidx] = diag_values[tidx]; + if (tidx == size - 1) { + row_ptrs[size] = size; + } + }, + source->get_size()[0], source->get_size()[0], + source->get_const_values(), result->get_row_ptrs(), + result->get_col_idxs(), result->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DIAGONAL_CONVERT_TO_CSR_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Diagonal *orig, + matrix::Diagonal *trans) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto tidx, auto orig_values, auto trans_values) { + trans_values[tidx] = conj(orig_values[tidx]); + }, + orig->get_size()[0], orig->get_const_values(), trans->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_CONJ_TRANSPOSE_KERNEL); + + +} // namespace diagonal +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko diff --git a/common/unified/preconditioner/jacobi_kernels.cpp b/common/unified/preconditioner/jacobi_kernels.cpp new file mode 100644 index 00000000000..b7160a97617 --- /dev/null +++ b/common/unified/preconditioner/jacobi_kernels.cpp @@ -0,0 +1,158 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/preconditioner/jacobi_kernels.hpp" + + +#include + + +#include "common/unified/base/kernel_launch.hpp" + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { +/** + * @brief The Jacobi preconditioner namespace. + * + * @ingroup jacobi + */ +namespace jacobi { + + +template +void scalar_conj(std::shared_ptr exec, + const Array &diag, Array &conj_diag) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto elem, auto diag, auto conj_diag) { + conj_diag[elem] = conj(diag[elem]); + }, + diag.get_num_elems(), diag, conj_diag); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL); + + +template +void invert_diagonal(std::shared_ptr exec, + const Array &diag, Array &inv_diag) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto elem, auto diag, auto inv_diag) { + inv_diag[elem] = safe_divide(one(diag[elem]), diag[elem]); + }, + diag.get_num_elems(), diag, inv_diag); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL); + + +template +void scalar_apply(std::shared_ptr exec, + const Array &diag, + const matrix::Dense *alpha, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *x) +{ + if (alpha->get_size()[1] > 1) { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b, + auto beta, auto x) { + x(row, col) = beta[col] * x(row, col) + + alpha[col] * b(row, col) * diag[row]; + }, + x->get_size(), diag, alpha->get_const_values(), b, + beta->get_const_values(), x); + } else { + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b, + auto beta, auto x) { + x(row, col) = + beta[0] * x(row, col) + alpha[0] * b(row, col) * diag[row]; + }, + x->get_size(), diag, alpha->get_const_values(), b, + beta->get_const_values(), x); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); + + +template +void simple_scalar_apply(std::shared_ptr exec, + const Array &diag, + const matrix::Dense *b, + matrix::Dense *x) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto diag, auto b, auto x) { + x(row, col) = b(row, col) * diag[row]; + }, + x->get_size(), diag, b, x); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL); + + +template +void scalar_convert_to_dense(std::shared_ptr exec, + const Array &blocks, + matrix::Dense *result) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto diag, auto result) { + result(row, col) = zero(diag[row]); + if (row == col) { + result(row, col) = diag[row]; + } + }, + result->get_size(), blocks, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL); + + +} // namespace jacobi +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko diff --git a/omp/solver/bicg_kernels.cpp b/common/unified/solver/bicg_kernels.cpp similarity index 57% rename from omp/solver/bicg_kernels.cpp rename to common/unified/solver/bicg_kernels.cpp index d9e2864eedf..3f646b93819 100644 --- a/omp/solver/bicg_kernels.cpp +++ b/common/unified/solver/bicg_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,18 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/solver/bicg_kernels.hpp" -#include +#include -#include -#include -#include -#include +#include "common/unified/base/kernel_launch_solver.hpp" namespace gko { namespace kernels { -namespace omp { +namespace GKO_DEVICE_NAMESPACE { /** * @brief The BICG solver namespace. * @@ -54,7 +51,7 @@ namespace bicg { template -void initialize(std::shared_ptr exec, +void initialize(std::shared_ptr exec, const matrix::Dense *b, matrix::Dense *r, matrix::Dense *z, matrix::Dense *p, matrix::Dense *q, matrix::Dense *prev_rho, @@ -63,57 +60,59 @@ void initialize(std::shared_ptr exec, matrix::Dense *q2, Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < b->get_size()[1]; ++j) { - rho->at(j) = zero(); - prev_rho->at(j) = one(); - stop_status->get_data()[j].reset(); - } -#pragma omp parallel for - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - r->at(i, j) = b->at(i, j); - r2->at(i, j) = b->at(i, j); - z->at(i, j) = p->at(i, j) = q->at(i, j) = zero(); - z2->at(i, j) = p2->at(i, j) = q2->at(i, j) = zero(); - } - } + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto z, auto p, + auto q, auto prev_rho, auto rho, auto r2, auto z2, + auto p2, auto q2, auto stop) { + if (row == 0) { + rho[col] = zero(rho[col]); + prev_rho[col] = one(prev_rho[col]); + stop[col].reset(); + } + r(row, col) = b(row, col); + r2(row, col) = b(row, col); + z(row, col) = p(row, col) = q(row, col) = z2(row, col) = + p2(row, col) = q2(row, col) = zero(z(row, col)); + }, + b->get_size(), b->get_stride(), default_stride(b), default_stride(r), + default_stride(z), default_stride(p), default_stride(q), + row_vector(prev_rho), row_vector(rho), default_stride(r2), + default_stride(z2), default_stride(p2), default_stride(q2), + *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_INITIALIZE_KERNEL); template -void step_1(std::shared_ptr exec, +void step_1(std::shared_ptr exec, matrix::Dense *p, const matrix::Dense *z, matrix::Dense *p2, const matrix::Dense *z2, const matrix::Dense *rho, const matrix::Dense *prev_rho, const Array *stop_status) { -#pragma omp parallel for - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto p, auto z, auto p2, auto z2, + auto rho, auto prev_rho, auto stop) { + if (!stop[col].has_stopped()) { + auto tmp = safe_divide(rho[col], prev_rho[col]); + p(row, col) = z(row, col) + tmp * p(row, col); + p2(row, col) = z2(row, col) + tmp * p2(row, col); } - if (prev_rho->at(j) == zero()) { - p->at(i, j) = z->at(i, j); - p2->at(i, j) = z2->at(i, j); - } else { - auto tmp = rho->at(j) / prev_rho->at(j); - p->at(i, j) = z->at(i, j) + tmp * p->at(i, j); - p2->at(i, j) = z2->at(i, j) + tmp * p2->at(i, j); - } - } - } + }, + p->get_size(), p->get_stride(), default_stride(p), default_stride(z), + default_stride(p2), default_stride(z2), row_vector(rho), + row_vector(prev_rho), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_1_KERNEL); template -void step_2(std::shared_ptr exec, +void step_2(std::shared_ptr exec, matrix::Dense *x, matrix::Dense *r, matrix::Dense *r2, const matrix::Dense *p, const matrix::Dense *q, @@ -122,26 +121,26 @@ void step_2(std::shared_ptr exec, const matrix::Dense *rho, const Array *stop_status) { -#pragma omp parallel for - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; - } - if (beta->at(j) != zero()) { - auto tmp = rho->at(j) / beta->at(j); - x->at(i, j) += tmp * p->at(i, j); - r->at(i, j) -= tmp * q->at(i, j); - r2->at(i, j) -= tmp * q2->at(i, j); + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto x, auto r, auto r2, auto p, + auto q, auto q2, auto beta, auto rho, auto stop) { + if (!stop[col].has_stopped()) { + auto tmp = safe_divide(rho[col], beta[col]); + x(row, col) += tmp * p(row, col); + r(row, col) -= tmp * q(row, col); + r2(row, col) -= tmp * q2(row, col); } - } - } + }, + x->get_size(), r->get_stride(), x, default_stride(r), + default_stride(r2), default_stride(p), default_stride(q), + default_stride(q2), row_vector(beta), row_vector(rho), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_2_KERNEL); } // namespace bicg -} // namespace omp +} // namespace GKO_DEVICE_NAMESPACE } // namespace kernels } // namespace gko diff --git a/omp/solver/bicgstab_kernels.cpp b/common/unified/solver/bicgstab_kernels.cpp similarity index 51% rename from omp/solver/bicgstab_kernels.cpp rename to common/unified/solver/bicgstab_kernels.cpp index d761fc044cf..a1d98b139e5 100644 --- a/omp/solver/bicgstab_kernels.cpp +++ b/common/unified/solver/bicgstab_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,20 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/solver/bicgstab_kernels.hpp" -#include - - -#include +#include -#include -#include -#include +#include "common/unified/base/kernel_launch_solver.hpp" namespace gko { namespace kernels { -namespace omp { +namespace GKO_DEVICE_NAMESPACE { /** * @brief The BICGSTAB solver namespace. * @@ -56,7 +51,7 @@ namespace bicgstab { template -void initialize(std::shared_ptr exec, +void initialize(std::shared_ptr exec, const matrix::Dense *b, matrix::Dense *r, matrix::Dense *rr, matrix::Dense *y, matrix::Dense *s, matrix::Dense *t, @@ -67,36 +62,34 @@ void initialize(std::shared_ptr exec, matrix::Dense *omega, Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < b->get_size()[1]; ++j) { - rho->at(j) = one(); - prev_rho->at(j) = one(); - alpha->at(j) = one(); - beta->at(j) = one(); - gamma->at(j) = one(); - omega->at(j) = one(); - stop_status->get_data()[j].reset(); - } -#pragma omp parallel for - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - r->at(i, j) = b->at(i, j); - rr->at(i, j) = zero(); - z->at(i, j) = zero(); - v->at(i, j) = zero(); - s->at(i, j) = zero(); - t->at(i, j) = zero(); - y->at(i, j) = zero(); - p->at(i, j) = zero(); - } - } + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto rr, auto y, + auto s, auto t, auto z, auto v, auto p, auto prev_rho, + auto rho, auto alpha, auto beta, auto gamma, auto omega, + auto stop) { + if (row == 0) { + rho[col] = prev_rho[col] = alpha[col] = beta[col] = gamma[col] = + omega[col] = one(rho[col]); + stop[col].reset(); + } + r(row, col) = b(row, col); + rr(row, col) = z(row, col) = v(row, col) = s(row, col) = + t(row, col) = y(row, col) = p(row, col) = zero(rr(row, col)); + }, + b->get_size(), b->get_stride(), default_stride(b), default_stride(r), + default_stride(rr), default_stride(y), default_stride(s), + default_stride(t), default_stride(z), default_stride(v), + default_stride(p), row_vector(prev_rho), row_vector(rho), + row_vector(alpha), row_vector(beta), row_vector(gamma), + row_vector(omega), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_INITIALIZE_KERNEL); template -void step_1(std::shared_ptr exec, +void step_1(std::shared_ptr exec, const matrix::Dense *r, matrix::Dense *p, const matrix::Dense *v, const matrix::Dense *rho, @@ -105,29 +98,27 @@ void step_1(std::shared_ptr exec, const matrix::Dense *omega, const Array *stop_status) { -#pragma omp parallel for - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; - } - if (prev_rho->at(j) * omega->at(j) != zero()) { - const auto tmp = - rho->at(j) / prev_rho->at(j) * alpha->at(j) / omega->at(j); - p->at(i, j) = r->at(i, j) + - tmp * (p->at(i, j) - omega->at(j) * v->at(i, j)); - } else { - p->at(i, j) = r->at(i, j); + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto r, auto p, auto v, auto rho, + auto prev_rho, auto alpha, auto omega, auto stop) { + if (!stop[col].has_stopped()) { + auto tmp = safe_divide(rho[col], prev_rho[col]) * + safe_divide(alpha[col], omega[col]); + p(row, col) = r(row, col) + + tmp * (p(row, col) - omega[col] * v(row, col)); } - } - } + }, + r->get_size(), r->get_stride(), default_stride(r), default_stride(p), + default_stride(v), row_vector(rho), row_vector(prev_rho), + row_vector(alpha), row_vector(omega), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_1_KERNEL); template -void step_2(std::shared_ptr exec, +void step_2(std::shared_ptr exec, const matrix::Dense *r, matrix::Dense *s, const matrix::Dense *v, const matrix::Dense *rho, @@ -135,30 +126,21 @@ void step_2(std::shared_ptr exec, const matrix::Dense *beta, const Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < s->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; - } - if (beta->at(j) != zero()) { - alpha->at(j) = rho->at(j) / beta->at(j); - } else { - alpha->at(j) = zero(); - } - } -#pragma omp parallel for - for (size_type i = 0; i < s->get_size()[0]; ++i) { - for (size_type j = 0; j < s->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; - } - if (beta->at(j) != zero()) { - s->at(i, j) = r->at(i, j) - alpha->at(j) * v->at(i, j); - } else { - s->at(i, j) = r->at(i, j); + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto r, auto s, auto v, auto rho, + auto alpha, auto beta, auto stop) { + if (!stop[col].has_stopped()) { + auto tmp = safe_divide(rho[col], beta[col]); + if (row == 0) { + alpha[col] = tmp; + } + s(row, col) = r(row, col) - tmp * v(row, col); } - } - } + }, + r->get_size(), r->get_stride(), default_stride(r), default_stride(s), + default_stride(v), row_vector(rho), row_vector(alpha), row_vector(beta), + *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_2_KERNEL); @@ -166,62 +148,59 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_2_KERNEL); template void step_3( - std::shared_ptr exec, matrix::Dense *x, + std::shared_ptr exec, matrix::Dense *x, matrix::Dense *r, const matrix::Dense *s, const matrix::Dense *t, const matrix::Dense *y, const matrix::Dense *z, const matrix::Dense *alpha, const matrix::Dense *beta, const matrix::Dense *gamma, matrix::Dense *omega, const Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; - } - if (beta->at(j) != zero()) { - omega->at(j) = gamma->at(j) / beta->at(j); - } else { - omega->at(j) = zero(); - } - } -#pragma omp parallel for - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto x, auto r, auto s, auto t, + auto y, auto z, auto alpha, auto beta, auto gamma, + auto omega, auto stop) { + if (!stop[col].has_stopped()) { + auto tmp = safe_divide(gamma[col], beta[col]); + if (row == 0) { + omega[col] = tmp; + } + x(row, col) += alpha[col] * y(row, col) + tmp * z(row, col); + r(row, col) = s(row, col) - tmp * t(row, col); } - x->at(i, j) += - alpha->at(j) * y->at(i, j) + omega->at(j) * z->at(i, j); - r->at(i, j) = s->at(i, j) - omega->at(j) * t->at(i, j); - } - } + }, + x->get_size(), r->get_stride(), x, default_stride(r), default_stride(s), + default_stride(t), default_stride(y), default_stride(z), + row_vector(alpha), row_vector(beta), row_vector(gamma), + row_vector(omega), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_3_KERNEL); template -void finalize(std::shared_ptr exec, +void finalize(std::shared_ptr exec, matrix::Dense *x, const matrix::Dense *y, const matrix::Dense *alpha, Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped() && - !stop_status->get_const_data()[j].is_finalized()) { - for (size_type i = 0; i < x->get_size()[0]; ++i) { - x->at(i, j) += alpha->at(j) * y->at(i, j); - stop_status->get_data()[j].finalize(); + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto x, auto y, auto alpha, + auto stop) { + if (stop[col].has_stopped() && !stop[col].is_finalized()) { + x(row, col) += alpha[col] * y(row, col); + stop[col].finalize(); } - } - } + }, + x->get_size(), y->get_stride(), x, default_stride(y), row_vector(alpha), + *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_FINALIZE_KERNEL); } // namespace bicgstab -} // namespace omp +} // namespace GKO_DEVICE_NAMESPACE } // namespace kernels } // namespace gko diff --git a/omp/solver/cg_kernels.cpp b/common/unified/solver/cg_kernels.cpp similarity index 60% rename from omp/solver/cg_kernels.cpp rename to common/unified/solver/cg_kernels.cpp index b9a88f25761..f47390aa3df 100644 --- a/omp/solver/cg_kernels.cpp +++ b/common/unified/solver/cg_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,18 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/solver/cg_kernels.hpp" -#include +#include -#include -#include -#include -#include +#include "common/unified/base/kernel_launch_solver.hpp" namespace gko { namespace kernels { -namespace omp { +namespace GKO_DEVICE_NAMESPACE { /** * @brief The CG solver namespace. * @@ -54,59 +51,58 @@ namespace cg { template -void initialize(std::shared_ptr exec, +void initialize(std::shared_ptr exec, const matrix::Dense *b, matrix::Dense *r, matrix::Dense *z, matrix::Dense *p, matrix::Dense *q, matrix::Dense *prev_rho, matrix::Dense *rho, Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < b->get_size()[1]; ++j) { - rho->at(j) = zero(); - prev_rho->at(j) = one(); - stop_status->get_data()[j].reset(); - } -#pragma omp parallel for - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - r->at(i, j) = b->at(i, j); - z->at(i, j) = p->at(i, j) = q->at(i, j) = zero(); - } - } + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto z, auto p, + auto q, auto prev_rho, auto rho, auto stop) { + if (row == 0) { + rho[col] = zero(rho[col]); + prev_rho[col] = one(prev_rho[col]); + stop[col].reset(); + } + r(row, col) = b(row, col); + z(row, col) = p(row, col) = q(row, col) = zero(z(row, col)); + }, + b->get_size(), b->get_stride(), b, default_stride(r), default_stride(z), + default_stride(p), default_stride(q), row_vector(prev_rho), + row_vector(rho), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_INITIALIZE_KERNEL); template -void step_1(std::shared_ptr exec, +void step_1(std::shared_ptr exec, matrix::Dense *p, const matrix::Dense *z, const matrix::Dense *rho, const matrix::Dense *prev_rho, const Array *stop_status) { -#pragma omp parallel for - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto p, auto z, auto rho, + auto prev_rho, auto stop) { + if (!stop[col].has_stopped()) { + auto tmp = safe_divide(rho[col], prev_rho[col]); + p(row, col) = z(row, col) + tmp * p(row, col); } - if (prev_rho->at(j) == zero()) { - p->at(i, j) = z->at(i, j); - } else { - auto tmp = rho->at(j) / prev_rho->at(j); - p->at(i, j) = z->at(i, j) + tmp * p->at(i, j); - } - } - } + }, + p->get_size(), p->get_stride(), default_stride(p), default_stride(z), + row_vector(rho), row_vector(prev_rho), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_1_KERNEL); template -void step_2(std::shared_ptr exec, +void step_2(std::shared_ptr exec, matrix::Dense *x, matrix::Dense *r, const matrix::Dense *p, const matrix::Dense *q, @@ -114,25 +110,24 @@ void step_2(std::shared_ptr exec, const matrix::Dense *rho, const Array *stop_status) { -#pragma omp parallel for - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; - } - if (beta->at(j) != zero()) { - auto tmp = rho->at(j) / beta->at(j); - x->at(i, j) += tmp * p->at(i, j); - r->at(i, j) -= tmp * q->at(i, j); + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto x, auto r, auto p, auto q, + auto beta, auto rho, auto stop) { + if (!stop[col].has_stopped()) { + auto tmp = safe_divide(rho[col], beta[col]); + x(row, col) += tmp * p(row, col); + r(row, col) -= tmp * q(row, col); } - } - } + }, + x->get_size(), r->get_stride(), x, default_stride(r), default_stride(p), + default_stride(q), row_vector(beta), row_vector(rho), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_2_KERNEL); } // namespace cg -} // namespace omp +} // namespace GKO_DEVICE_NAMESPACE } // namespace kernels } // namespace gko diff --git a/omp/solver/cgs_kernels.cpp b/common/unified/solver/cgs_kernels.cpp similarity index 52% rename from omp/solver/cgs_kernels.cpp rename to common/unified/solver/cgs_kernels.cpp index a0678788565..948c52864f2 100644 --- a/omp/solver/cgs_kernels.cpp +++ b/common/unified/solver/cgs_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,18 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/solver/cgs_kernels.hpp" -#include +#include -#include -#include -#include -#include +#include "common/unified/base/kernel_launch_solver.hpp" namespace gko { namespace kernels { -namespace omp { +namespace GKO_DEVICE_NAMESPACE { /** * @brief The CGS solver namespace. * @@ -54,7 +51,7 @@ namespace cgs { template -void initialize(std::shared_ptr exec, +void initialize(std::shared_ptr exec, const matrix::Dense *b, matrix::Dense *r, matrix::Dense *r_tld, matrix::Dense *p, matrix::Dense *q, matrix::Dense *u, @@ -66,65 +63,65 @@ void initialize(std::shared_ptr exec, matrix::Dense *rho, Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < b->get_size()[1]; ++j) { - rho->at(j) = zero(); - prev_rho->at(j) = one(); - alpha->at(j) = one(); - beta->at(j) = one(); - gamma->at(j) = one(); - stop_status->get_data()[j].reset(); - } -#pragma omp parallel for - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - r->at(i, j) = b->at(i, j); - r_tld->at(i, j) = b->at(i, j); - u->at(i, j) = u_hat->at(i, j) = p->at(i, j) = q->at(i, j) = - v_hat->at(i, j) = t->at(i, j) = zero(); - } - } + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto r_tld, auto p, + auto q, auto u, auto u_hat, auto v_hat, auto t, + auto alpha, auto beta, auto gamma, auto prev_rho, + auto rho, auto stop) { + if (row == 0) { + rho[col] = zero(rho[col]); + prev_rho[col] = alpha[col] = beta[col] = gamma[col] = + one(prev_rho[col]); + stop[col].reset(); + } + r(row, col) = r_tld(row, col) = b(row, col); + u(row, col) = u_hat(row, col) = p(row, col) = q(row, col) = + v_hat(row, col) = t(row, col) = zero(u(row, col)); + }, + b->get_size(), b->get_stride(), default_stride(b), default_stride(r), + default_stride(r_tld), default_stride(p), default_stride(q), + default_stride(u), default_stride(u_hat), default_stride(v_hat), + default_stride(t), row_vector(alpha), row_vector(beta), + row_vector(gamma), row_vector(prev_rho), row_vector(rho), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_INITIALIZE_KERNEL); template -void step_1(std::shared_ptr exec, +void step_1(std::shared_ptr exec, const matrix::Dense *r, matrix::Dense *u, matrix::Dense *p, const matrix::Dense *q, matrix::Dense *beta, const matrix::Dense *rho, - const matrix::Dense *rho_prev, + const matrix::Dense *prev_rho, const Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; - } - if (rho_prev->at(j) != zero()) { - beta->at(j) = rho->at(j) / rho_prev->at(j); - } - } -#pragma omp parallel for - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto r, auto u, auto p, auto q, + auto beta, auto rho, auto prev_rho, auto stop) { + if (!stop[col].has_stopped()) { + auto prev_rho_zero = prev_rho[col] == zero(prev_rho[col]); + auto tmp = prev_rho_zero ? beta[col] : rho[col] / prev_rho[col]; + if (row == 0 && !prev_rho_zero) { + beta[col] = tmp; + } + u(row, col) = r(row, col) + tmp * q(row, col); + p(row, col) = + u(row, col) + tmp * (q(row, col) + tmp * p(row, col)); } - u->at(i, j) = r->at(i, j) + beta->at(j) * q->at(i, j); - p->at(i, j) = - u->at(i, j) + - beta->at(j) * (q->at(i, j) + beta->at(j) * p->at(i, j)); - } - } + }, + r->get_size(), r->get_stride(), default_stride(r), default_stride(u), + default_stride(p), default_stride(q), row_vector(beta), row_vector(rho), + row_vector(prev_rho), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_1_KERNEL); template -void step_2(std::shared_ptr exec, +void step_2(std::shared_ptr exec, const matrix::Dense *u, const matrix::Dense *v_hat, matrix::Dense *q, matrix::Dense *t, matrix::Dense *alpha, @@ -132,25 +129,23 @@ void step_2(std::shared_ptr exec, const matrix::Dense *gamma, const Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < u->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; - } - if (gamma->at(j) != zero()) { - alpha->at(j) = rho->at(j) / gamma->at(j); - } - } -#pragma omp parallel for - for (size_type i = 0; i < u->get_size()[0]; ++i) { - for (size_type j = 0; j < u->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto u, auto v_hat, auto q, auto t, + auto alpha, auto rho, auto gamma, auto stop) { + if (!stop[col].has_stopped()) { + auto gamma_is_zero = gamma[col] == zero(gamma[col]); + auto tmp = gamma_is_zero ? alpha[col] : rho[col] / gamma[col]; + if (row == 0 && !gamma_is_zero) { + alpha[col] = tmp; + } + q(row, col) = u(row, col) - tmp * v_hat(row, col); + t(row, col) = u(row, col) + q(row, col); } - q->at(i, j) = u->at(i, j) - alpha->at(j) * v_hat->at(i, j); - t->at(i, j) = u->at(i, j) + q->at(i, j); - } - } + }, + u->get_size(), u->get_stride(), default_stride(u), + default_stride(v_hat), default_stride(q), default_stride(t), + row_vector(alpha), row_vector(rho), row_vector(gamma), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_2_KERNEL); @@ -162,22 +157,24 @@ void step_3(std::shared_ptr exec, matrix::Dense *x, const matrix::Dense *alpha, const Array *stop_status) { -#pragma omp parallel for - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto t, auto u_hat, auto r, auto x, + auto alpha, auto stop) { + if (!stop[col].has_stopped()) { + x(row, col) += alpha[col] * u_hat(row, col); + r(row, col) -= alpha[col] * t(row, col); } - x->at(i, j) += alpha->at(j) * u_hat->at(i, j); - r->at(i, j) -= alpha->at(j) * t->at(i, j); - } - } + }, + t->get_size(), t->get_stride(), default_stride(t), + default_stride(u_hat), default_stride(r), x, row_vector(alpha), + *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_3_KERNEL); } // namespace cgs -} // namespace omp +} // namespace GKO_DEVICE_NAMESPACE } // namespace kernels } // namespace gko diff --git a/omp/solver/fcg_kernels.cpp b/common/unified/solver/fcg_kernels.cpp similarity index 58% rename from omp/solver/fcg_kernels.cpp rename to common/unified/solver/fcg_kernels.cpp index b8b69ee6d91..ba9587e350d 100644 --- a/omp/solver/fcg_kernels.cpp +++ b/common/unified/solver/fcg_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,17 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/solver/fcg_kernels.hpp" -#include +#include -#include -#include -#include +#include "common/unified/base/kernel_launch_solver.hpp" namespace gko { namespace kernels { -namespace omp { +namespace GKO_DEVICE_NAMESPACE { /** * @brief The FCG solver namespace. * @@ -53,7 +51,7 @@ namespace fcg { template -void initialize(std::shared_ptr exec, +void initialize(std::shared_ptr exec, const matrix::Dense *b, matrix::Dense *r, matrix::Dense *z, matrix::Dense *p, matrix::Dense *q, matrix::Dense *t, @@ -61,53 +59,53 @@ void initialize(std::shared_ptr exec, matrix::Dense *rho, matrix::Dense *rho_t, Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < b->get_size()[1]; ++j) { - rho->at(j) = zero(); - prev_rho->at(j) = one(); - rho_t->at(j) = one(); - stop_status->get_data()[j].reset(); - } -#pragma omp parallel for - for (size_type i = 0; i < b->get_size()[0]; ++i) { - for (size_type j = 0; j < b->get_size()[1]; ++j) { - t->at(i, j) = r->at(i, j) = b->at(i, j); - z->at(i, j) = p->at(i, j) = q->at(i, j) = zero(); - } - } + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto b, auto r, auto z, auto p, + auto q, auto t, auto prev_rho, auto rho, auto rho_t, + auto stop) { + if (row == 0) { + rho[col] = zero(rho[col]); + prev_rho[col] = rho_t[col] = one(prev_rho[col]); + stop[col].reset(); + } + t(row, col) = r(row, col) = b(row, col); + z(row, col) = p(row, col) = q(row, col) = zero(z(row, col)); + }, + b->get_size(), b->get_stride(), default_stride(b), default_stride(r), + default_stride(z), default_stride(p), default_stride(q), + default_stride(t), row_vector(prev_rho), row_vector(rho), + row_vector(rho_t), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_INITIALIZE_KERNEL); template -void step_1(std::shared_ptr exec, +void step_1(std::shared_ptr exec, matrix::Dense *p, const matrix::Dense *z, const matrix::Dense *rho_t, const matrix::Dense *prev_rho, const Array *stop_status) { -#pragma omp parallel for - for (size_type i = 0; i < p->get_size()[0]; ++i) { - for (size_type j = 0; j < p->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto p, auto z, auto rho_t, + auto prev_rho, auto stop) { + if (!stop[col].has_stopped()) { + auto tmp = safe_divide(rho_t[col], prev_rho[col]); + p(row, col) = z(row, col) + tmp * p(row, col); } - if (prev_rho->at(j) == zero()) { - p->at(i, j) = z->at(i, j); - } else { - auto tmp = rho_t->at(j) / prev_rho->at(j); - p->at(i, j) = z->at(i, j) + tmp * p->at(i, j); - } - } - } + }, + p->get_size(), p->get_stride(), default_stride(p), default_stride(z), + row_vector(rho_t), row_vector(prev_rho), *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_1_KERNEL); template -void step_2(std::shared_ptr exec, +void step_2(std::shared_ptr exec, matrix::Dense *x, matrix::Dense *r, matrix::Dense *t, const matrix::Dense *p, const matrix::Dense *q, @@ -115,27 +113,27 @@ void step_2(std::shared_ptr exec, const matrix::Dense *rho, const Array *stop_status) { -#pragma omp parallel for - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - if (stop_status->get_const_data()[j].has_stopped()) { - continue; - } - if (beta->at(j) != zero()) { - auto tmp = rho->at(j) / beta->at(j); - auto prev_r = r->at(i, j); - x->at(i, j) += tmp * p->at(i, j); - r->at(i, j) -= tmp * q->at(i, j); - t->at(i, j) = r->at(i, j) - prev_r; + run_kernel_solver( + exec, + [] GKO_KERNEL(auto row, auto col, auto x, auto r, auto t, auto p, + auto q, auto beta, auto rho, auto stop) { + if (!stop[col].has_stopped() && beta[col] != zero(beta[col])) { + auto tmp = rho[col] / beta[col]; + auto prev_r = r(row, col); + x(row, col) += tmp * p(row, col); + r(row, col) -= tmp * q(row, col); + t(row, col) = r(row, col) - prev_r; } - } - } + }, + x->get_size(), r->get_stride(), x, default_stride(r), default_stride(t), + default_stride(p), default_stride(q), row_vector(beta), row_vector(rho), + *stop_status); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_2_KERNEL); } // namespace fcg -} // namespace omp +} // namespace GKO_DEVICE_NAMESPACE } // namespace kernels } // namespace gko diff --git a/omp/solver/ir_kernels.cpp b/common/unified/solver/ir_kernels.cpp similarity index 83% rename from omp/solver/ir_kernels.cpp rename to common/unified/solver/ir_kernels.cpp index ba68c407e95..3ba7c957ee3 100644 --- a/omp/solver/ir_kernels.cpp +++ b/common/unified/solver/ir_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,12 +33,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/solver/ir_kernels.hpp" -#include +#include "common/unified/base/kernel_launch.hpp" namespace gko { namespace kernels { -namespace omp { +namespace GKO_DEVICE_NAMESPACE { /** * @brief The IR solver namespace. * @@ -47,17 +47,16 @@ namespace omp { namespace ir { -void initialize(std::shared_ptr exec, +void initialize(std::shared_ptr exec, Array *stop_status) { -#pragma omp parallel for - for (size_type j = 0; j < stop_status->get_num_elems(); ++j) { - stop_status->get_data()[j].reset(); - } + run_kernel( + exec, [] GKO_KERNEL(auto i, auto stop) { stop[i].reset(); }, + stop_status->get_num_elems(), *stop_status); } } // namespace ir -} // namespace omp +} // namespace GKO_DEVICE_NAMESPACE } // namespace kernels } // namespace gko diff --git a/contributors.txt b/contributors.txt index 5631bd6cc43..047fe37d792 100644 --- a/contributors.txt +++ b/contributors.txt @@ -1,8 +1,10 @@ I hereby place all my contributions in this codebase under a BSD-3-Clause license, as specified in the repository's LICENSE file. +Aliaga José I. Universitat Jaume I Anzt Hartwig Karlsruhe Institute of Technology, The University of Tennessee Knoxville Boman Erik Sandia National Laboratories +Castelli Fabian Karlsruhe Institute of Technology Chen Yenchen National Taiwan University Cojean Terry Karlsruhe Institute of Technology Drzaic Jelena University of Zagreb @@ -13,8 +15,11 @@ Grützmacher Thomas Karlsruhe Institute of Technology Heroux Mike Sandia National Laboratories Hoemmen Mark Sandia National Laboratories Holeksa Claudius Karlsruhe Institute of Technology +Kashi Aditya Karlsruhe Institute of Technology +Koch Marcel Karlsruhe Institute of Technology Maier Matthias Texas A&M University Nayak Pratik Karlsruhe Institute of Technology Olenik Gregor HPSim Ribizel Tobias Karlsruhe Institute of Technology +Riemer Lukas Karlsruhe Institute of Technology Tsai Yuhsiang National Taiwan University diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index f5cff2e407d..f5388785d6a 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -1,4 +1,3 @@ -add_subdirectory(devices) # basic device functionalities, always compiled add_subdirectory(device_hooks) # placeholders for disabled modules add_library(ginkgo "") @@ -11,7 +10,9 @@ target_sources(ginkgo base/mtx_io.cpp base/perturbation.cpp base/version.cpp + factorization/ic.cpp factorization/ilu.cpp + factorization/par_ic.cpp factorization/par_ict.cpp factorization/par_ilu.cpp factorization/par_ilut.cpp @@ -24,19 +25,24 @@ target_sources(ginkgo matrix/dense.cpp matrix/diagonal.cpp matrix/ell.cpp + matrix/fbcsr.cpp matrix/hybrid.cpp matrix/identity.cpp matrix/permutation.cpp matrix/sellp.cpp matrix/sparsity_csr.cpp + multigrid/amgx_pgm.cpp preconditioner/isai.cpp preconditioner/jacobi.cpp + reorder/rcm.cpp solver/bicg.cpp solver/bicgstab.cpp solver/cg.cpp solver/cgs.cpp solver/fcg.cpp solver/gmres.cpp + solver/cb_gmres.cpp + solver/idr.cpp solver/ir.cpp solver/lower_trs.cpp solver/upper_trs.cpp @@ -57,16 +63,27 @@ target_compile_options(ginkgo PRIVATE "${GINKGO_COMPILER_FLAGS}") # regardless of whether it is installed or added as a subdirectory add_library(Ginkgo::ginkgo ALIAS ginkgo) target_link_libraries(ginkgo - PUBLIC ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip) + PUBLIC ginkgo_device ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp) # The PAPI dependency needs to be exposed to the user. +set(GKO_RPATH_ADDITIONS "") if (GINKGO_HAVE_PAPI_SDE) target_link_libraries(ginkgo PUBLIC PAPI::PAPI) + list(GET PAPI_LIBRARIES 0 PAPI_FIRST_LIB) + get_filename_component(GKO_PAPI_LIBDIR "${PAPI_FIRST_LIB}" DIRECTORY) + list(APPEND GKO_RPATH_ADDITIONS "${GKO_PAPI_LIBDIR}") endif() + +# Since we have a public dependency on HIP, this dependency appears +# here as well +if (GINKGO_BUILD_HIP AND GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") + list(APPEND GKO_RPATH_ADDITIONS "${HIP_PATH}/lib") +endif() + ginkgo_default_includes(ginkgo) -ginkgo_install_library(ginkgo core) +ginkgo_install_library(ginkgo "${GKO_RPATH_ADDITIONS}") if (GINKGO_CHECK_CIRCULAR_DEPS) - ginkgo_check_headers(ginkgo) + ginkgo_check_headers(ginkgo "") endif() if(GINKGO_BUILD_TESTS) diff --git a/core/base/allocator.hpp b/core/base/allocator.hpp index 0c62f5deccb..791e525e037 100644 --- a/core/base/allocator.hpp +++ b/core/base/allocator.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -86,7 +86,7 @@ class ExecutorAllocator { * @tparam U the element type of the allocator to be constructed. */ template - explicit ExecutorAllocator(const ExecutorAllocator &other) + ExecutorAllocator(const ExecutorAllocator &other) : exec_{other.get_executor()} {} @@ -172,4 +172,4 @@ using unordered_map = } // namespace gko -#endif // GKO_CORE_BASE_ALLOCATOR_HPP_ \ No newline at end of file +#endif // GKO_CORE_BASE_ALLOCATOR_HPP_ diff --git a/core/base/array.cpp b/core/base/array.cpp index 21d8b5f3326..0f31b3d9ba7 100644 --- a/core/base/array.cpp +++ b/core/base/array.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/fill_array.hpp" #include "core/components/precision_conversion.hpp" @@ -49,6 +50,15 @@ GKO_REGISTER_OPERATION(convert, components::convert_precision); } // namespace conversion +namespace array { + + +GKO_REGISTER_OPERATION(fill_array, components::fill_array); + + +} // namespace array + + namespace detail { @@ -68,4 +78,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(GKO_DECLARE_ARRAY_CONVERSION); } // namespace detail + + +template +void Array::fill(const ValueType value) +{ + this->get_executor()->run( + array::make_fill_array(this->get_data(), this->get_num_elems(), value)); +} + + +#define GKO_DECLARE_ARRAY_FILL(_type) void Array<_type>::fill(const _type value) + +GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_ARRAY_FILL); + + } // namespace gko diff --git a/core/base/combination.cpp b/core/base/combination.cpp index dd95298858e..5051f43565a 100644 --- a/core/base/combination.cpp +++ b/core/base/combination.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -103,10 +104,16 @@ void Combination::apply_impl(const LinOp *b, LinOp *x) const { initialize_scalars(this->get_executor(), cache_.zero, cache_.one); - operators_[0]->apply(lend(coefficients_[0]), b, lend(cache_.zero), x); - for (size_type i = 1; i < operators_.size(); ++i) { - operators_[i]->apply(lend(coefficients_[i]), b, lend(cache_.one), x); - } + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + operators_[0]->apply(lend(coefficients_[0]), dense_b, + lend(cache_.zero), dense_x); + for (size_type i = 1; i < operators_.size(); ++i) { + operators_[i]->apply(lend(coefficients_[i]), dense_b, + lend(cache_.one), dense_x); + } + }, + b, x); } @@ -114,14 +121,17 @@ template void Combination::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - if (cache_.intermediate_x == nullptr || - cache_.intermediate_x->get_size() != x->get_size()) { - cache_.intermediate_x = x->clone(); - } - this->apply_impl(b, lend(cache_.intermediate_x)); - auto dense_x = as>(x); - dense_x->scale(beta); - dense_x->add_scaled(alpha, lend(cache_.intermediate_x)); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + if (cache_.intermediate_x == nullptr || + cache_.intermediate_x->get_size() != dense_x->get_size()) { + cache_.intermediate_x = dense_x->clone(); + } + this->apply_impl(dense_b, lend(cache_.intermediate_x)); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, lend(cache_.intermediate_x)); + }, + alpha, b, beta, x); } diff --git a/core/base/composition.cpp b/core/base/composition.cpp index 6fb0171b56e..d383b0174b6 100644 --- a/core/base/composition.cpp +++ b/core/base/composition.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -164,12 +165,17 @@ std::unique_ptr Composition::conj_transpose() const template void Composition::apply_impl(const LinOp *b, LinOp *x) const { - if (operators_.size() > 1) { - operators_[0]->apply( - lend(apply_inner_operators(operators_, storage_, b)), x); - } else { - operators_[0]->apply(b, x); - } + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + if (operators_.size() > 1) { + operators_[0]->apply( + lend(apply_inner_operators(operators_, storage_, dense_b)), + dense_x); + } else { + operators_[0]->apply(dense_b, dense_x); + } + }, + b, x); } @@ -177,13 +183,18 @@ template void Composition::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - if (operators_.size() > 1) { - operators_[0]->apply( - alpha, lend(apply_inner_operators(operators_, storage_, b)), beta, - x); - } else { - operators_[0]->apply(alpha, b, beta, x); - } + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + if (operators_.size() > 1) { + operators_[0]->apply( + dense_alpha, + lend(apply_inner_operators(operators_, storage_, dense_b)), + dense_beta, dense_x); + } else { + operators_[0]->apply(dense_alpha, dense_b, dense_beta, dense_x); + } + }, + alpha, b, beta, x); } diff --git a/core/base/executor.cpp b/core/base/executor.cpp index 9d80ad818f0..109dc6601ed 100644 --- a/core/base/executor.cpp +++ b/core/base/executor.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -53,6 +53,10 @@ void Operation::run(std::shared_ptr executor) const GKO_NOT_IMPLEMENTED; +void Operation::run(std::shared_ptr executor) const + GKO_NOT_IMPLEMENTED; + + void Operation::run(std::shared_ptr executor) const { this->run(static_cast>(executor)); diff --git a/core/base/extended_float.hpp b/core/base/extended_float.hpp index 8ebf836641b..34b3eed0d59 100644 --- a/core/base/extended_float.hpp +++ b/core/base/extended_float.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_BASE_EXTENDED_FLOAT_HPP_ +#include #include @@ -70,18 +71,17 @@ template struct uint_of_impl {}; template -struct uint_of_impl>> { +struct uint_of_impl> { using type = uint16; }; template -struct uint_of_impl>> { +struct uint_of_impl> { using type = uint32; }; template -struct uint_of_impl>> { +struct uint_of_impl> { using type = uint64; }; @@ -551,6 +551,31 @@ class complex> { }; +template <> +struct is_scalar : std::true_type {}; + + +template <> +struct numeric_limits { + static constexpr bool is_specialized{true}; + static constexpr bool is_signed{true}; + static constexpr bool is_integer{false}; + static constexpr bool is_exact{false}; + static constexpr bool is_bounded{true}; + static constexpr bool is_modulo{false}; + static constexpr int digits{ + gko::detail::float_traits::significand_bits + 1}; + // 3/10 is approx. log_10(2) + static constexpr int digits10{digits * 3 / 10}; + + // Note: gko::half can't return gko::half here because it does not have + // a constexpr constructor. + static constexpr float epsilon() + { + return gko::detail::float_traits::eps; + } +}; + } // namespace std diff --git a/core/base/iterator_factory.hpp b/core/base/iterator_factory.hpp index b7efd21dfe0..0f5450fb907 100644 --- a/core/base/iterator_factory.hpp +++ b/core/base/iterator_factory.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -110,7 +110,7 @@ class IteratorFactory { ~Reference() {} - Reference(IteratorFactory &parent, array_index_type array_index) + Reference(IteratorFactory *parent, array_index_type array_index) : parent_(parent), arr_index_(array_index) {} @@ -143,10 +143,10 @@ class IteratorFactory { { // In C++11, it is legal for a nested class to access private // members of the parent class. - parent_.dominant_values_[arr_index_] = - std::move(other.parent_.dominant_values_[other.arr_index_]); - parent_.secondary_values_[arr_index_] = - std::move(other.parent_.secondary_values_[other.arr_index_]); + parent_->dominant_values_[arr_index_] = + std::move(other.parent_->dominant_values_[other.arr_index_]); + parent_->secondary_values_[arr_index_] = + std::move(other.parent_->secondary_values_[other.arr_index_]); return *this; } @@ -174,25 +174,25 @@ class IteratorFactory { return left.dominant < right.dominant(); } - ToSortType &dominant() { return parent_.dominant_values_[arr_index_]; } + ToSortType &dominant() { return parent_->dominant_values_[arr_index_]; } const ToSortType &dominant() const { - return parent_.dominant_values_[arr_index_]; + return parent_->dominant_values_[arr_index_]; } SecondaryType &secondary() { - return parent_.secondary_values_[arr_index_]; + return parent_->secondary_values_[arr_index_]; } const SecondaryType &secondary() const { - return parent_.secondary_values_[arr_index_]; + return parent_->secondary_values_[arr_index_]; } private: - IteratorFactory &parent_; + IteratorFactory *parent_; array_index_type arr_index_; }; @@ -214,9 +214,11 @@ class IteratorFactory { using reference = Reference; using iterator_category = std::random_access_iterator_tag; + Iterator() = default; + ~Iterator() {} - Iterator(IteratorFactory &parent, difference_type array_index) + Iterator(IteratorFactory *parent, difference_type array_index) : parent_(parent), arr_index_(array_index) {} @@ -298,12 +300,12 @@ class IteratorFactory { } // Comparable operators - bool operator==(const Iterator &other) + bool operator==(const Iterator &other) const { return arr_index_ == other.arr_index_; } - bool operator!=(const Iterator &other) + bool operator!=(const Iterator &other) const { return arr_index_ != other.arr_index_; } @@ -329,8 +331,8 @@ class IteratorFactory { } private: - IteratorFactory &parent_; - difference_type arr_index_; + IteratorFactory *parent_{}; + difference_type arr_index_{}; }; public: @@ -363,7 +365,7 @@ class IteratorFactory { * Creates an iterator pointing to the beginning of both arrays * @returns an iterator pointing to the beginning of both arrays */ - Iterator begin() { return {*this, 0}; } + Iterator begin() { return {this, 0}; } /** * Creates an iterator pointing to the (excluding) end of both arrays @@ -371,7 +373,7 @@ class IteratorFactory { */ Iterator end() { - return {*this, static_cast(size_)}; + return {this, static_cast(size_)}; } private: diff --git a/core/base/mixed_precision_types.hpp b/core/base/mixed_precision_types.hpp new file mode 100644 index 00000000000..d725eff82bf --- /dev/null +++ b/core/base/mixed_precision_types.hpp @@ -0,0 +1,83 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_BASE_MIXED_PRECISION_TYPES_HPP_ +#define GKO_CORE_BASE_MIXED_PRECISION_TYPES_HPP_ + + +#include +#include + + +#ifdef GINKGO_MIXED_PRECISION +#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, ...) \ + template _macro(float, float, float, __VA_ARGS__); \ + template _macro(float, float, double, __VA_ARGS__); \ + template _macro(float, double, float, __VA_ARGS__); \ + template _macro(float, double, double, __VA_ARGS__); \ + template _macro(double, float, float, __VA_ARGS__); \ + template _macro(double, float, double, __VA_ARGS__); \ + template _macro(double, double, float, __VA_ARGS__); \ + template _macro(double, double, double, __VA_ARGS__); \ + template _macro(std::complex, std::complex, \ + std::complex, __VA_ARGS__); \ + template _macro(std::complex, std::complex, \ + std::complex, __VA_ARGS__); \ + template _macro(std::complex, std::complex, \ + std::complex, __VA_ARGS__); \ + template _macro(std::complex, std::complex, \ + std::complex, __VA_ARGS__); \ + template _macro(std::complex, std::complex, \ + std::complex, __VA_ARGS__); \ + template _macro(std::complex, std::complex, \ + std::complex, __VA_ARGS__); \ + template _macro(std::complex, std::complex, \ + std::complex, __VA_ARGS__); \ + template _macro(std::complex, std::complex, \ + std::complex, __VA_ARGS__) +#else +#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, ...) \ + template _macro(float, float, float, __VA_ARGS__); \ + template _macro(double, double, double, __VA_ARGS__); \ + template _macro(std::complex, std::complex, \ + std::complex, __VA_ARGS__); \ + template _macro(std::complex, std::complex, \ + std::complex, __VA_ARGS__) +#endif + + +#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(_macro) \ + GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, int32); \ + GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, int64) + + +#endif // GKO_CORE_BASE_MIXED_PRECISION_TYPES_HPP_ diff --git a/core/base/mtx_io.cpp b/core/base/mtx_io.cpp index b6f4d6b35c5..fea768d8c88 100644 --- a/core/base/mtx_io.cpp +++ b/core/base/mtx_io.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/base/perturbation.cpp b/core/base/perturbation.cpp index a7a6a0b004b..c7fead043d6 100644 --- a/core/base/perturbation.cpp +++ b/core/base/perturbation.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -46,15 +47,18 @@ void Perturbation::apply_impl(const LinOp *b, LinOp *x) const // temp = projector * b : projector->apply(b, temp) // x = b : x->copy_from(b) // x = 1 * x + scalar * basis * temp : basis->apply(scalar, temp, 1, x) - using vec = gko::matrix::Dense; - auto exec = this->get_executor(); - auto intermediate_size = - gko::dim<2>(projector_->get_size()[0], b->get_size()[1]); - cache_.allocate(exec, intermediate_size); - projector_->apply(b, lend(cache_.intermediate)); - x->copy_from(b); - basis_->apply(lend(scalar_), lend(cache_.intermediate), lend(cache_.one), - x); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + auto exec = this->get_executor(); + auto intermediate_size = + gko::dim<2>(projector_->get_size()[0], dense_b->get_size()[1]); + cache_.allocate(exec, intermediate_size); + projector_->apply(dense_b, lend(cache_.intermediate)); + dense_x->copy_from(dense_b); + basis_->apply(lend(scalar_), lend(cache_.intermediate), + lend(cache_.one), dense_x); + }, + b, x); } @@ -69,18 +73,20 @@ void Perturbation::apply_impl(const LinOp *alpha, const LinOp *b, // x->add_scaled(alpha, b) // x = x + alpha * scalar * basis * temp // : basis->apply(alpha * scalar, temp, 1, x) - using vec = gko::matrix::Dense; - auto exec = this->get_executor(); - auto intermediate_size = - gko::dim<2>(projector_->get_size()[0], b->get_size()[1]); - cache_.allocate(exec, intermediate_size); - projector_->apply(b, lend(cache_.intermediate)); - auto vec_x = as(x); - vec_x->scale(beta); - vec_x->add_scaled(alpha, b); - alpha->apply(lend(scalar_), lend(cache_.alpha_scalar)); - basis_->apply(lend(cache_.alpha_scalar), lend(cache_.intermediate), - lend(cache_.one), vec_x); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto exec = this->get_executor(); + auto intermediate_size = + gko::dim<2>(projector_->get_size()[0], dense_b->get_size()[1]); + cache_.allocate(exec, intermediate_size); + projector_->apply(dense_b, lend(cache_.intermediate)); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, dense_b); + dense_alpha->apply(lend(scalar_), lend(cache_.alpha_scalar)); + basis_->apply(lend(cache_.alpha_scalar), lend(cache_.intermediate), + lend(cache_.one), dense_x); + }, + alpha, b, beta, x); } diff --git a/core/base/types.hpp b/core/base/types.hpp new file mode 100644 index 00000000000..a820ddfd91e --- /dev/null +++ b/core/base/types.hpp @@ -0,0 +1,209 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_BASE_TYPES_HPP_ +#define GKO_CORE_BASE_TYPES_HPP_ + + +#include +#include +#include + + +namespace gko { +namespace detail { + + +/** + * mask gives the integer with Size activated bits in the end + * + * @tparam Size the number of activated bits + * @tparam ValueType the type of mask, which uses std::uint32_t as default + * + * @return the ValueType with Size activated bits in the end + */ +template +constexpr std::enable_if_t<(Size < sizeof(ValueType) * 8), ValueType> mask() +{ + return (ValueType{1} << Size) - 1; +} + +/** + * @copydoc mask() + * + * @note this is special case for the Size = the number of bits of ValueType + */ +template +constexpr std::enable_if_t mask() +{ + return ~ValueType{}; +} + + +/** + * shift calculates the number of bits for shifting + * + * @tparam current_shift the current position of shifting + * @tparam num_groups the number of elements in array + * + * @return the number of shifting bits + * + * @note this is the last case of nested template + */ +template +constexpr std::enable_if_t<(num_groups == current_shift + 1), int> shift( + const std::array &bits) +{ + return 0; +} + +/** + * @copydoc shift(const std::array) + * + * @note this is the usual case of nested template + */ +template +constexpr std::enable_if_t<(num_groups > current_shift + 1), int> shift( + const std::array &bits) +{ + return bits[current_shift + 1] + + shift<(current_shift + 1), num_groups>(bits); +} + + +} // namespace detail + + +/** + * ConfigSet is a way to embed several information into one integer by given + * certain bits. + * + * The usage will be the following + * Set the method with bits Cfg = ConfigSet + * Encode the given infomation encoded = Cfg::encode(x_0, x_1, ..., x_k) + * Decode the specific position information x_t = Cfg::decode(encoded) + * The encoded result will use 32 bits to record + * rrrrr0..01....1...k..k, which 1/2/.../k means the bits store the information + * for 1/2/.../k position and r is for rest of unused bits. + * + * Denote $B_t = \sum_{i = t+1}^k b_i$ and $F(X) = Cfg::encode(x_0, ..., x_k)$. + * Have $F(X) = \sum_{i = 0}^k (x_i << B_i) = \sum_{i = 0}^k (x_i * 2^{B_i})$. + * For all i, we have $0 <= x_i < 2^{b_i}$. + * $x_i$, $2^{B_i}$ are non-negative, so + * $F(X) = 0$ <=> $X = \{0\}$, $x_i = 0$ for all i. + * Assume $F(X) = F(Y)$, then + * $0 = |F(X) - F(Y)| = |F(X-Y)| = F(|X - Y|)$. + * $|x_i - y_i|$ is still in the same range $0 <= |x_i - y_i| < 2^{b_i}$. + * Thus, $F(|X - Y|) = 0$ -> $|X - Y| = \{0\}$, $x_i - y_i = 0$ -> $X = Y$. + * F is one-to-one function if $0 <= x_i < 2^{b_i}$ for all i. + * For any encoded result R, we can use the following to get the decoded series. + * for i = k to 0; + * $x_i = R % b_i$; + * $R = R / bi$; + * endfor; + * For any R in the range $[0, 2^{B_0})$, we have X such that $F(X) = R$. + * F is onto function. + * Thus, F is bijection. + * + * @tparam num_bits... the number of bits for each position. + * + * @note the num_bit is required at least $ceil(log_2(maxval) + 1)$ + */ +template +class ConfigSet { +public: + static constexpr unsigned num_groups = sizeof...(num_bits); + static constexpr std::array bits{num_bits...}; + + /** + * Decodes the `position` information from encoded + * + * @tparam position the position of desired information + * + * @param encoded the encoded integer + * + * @return the decoded information at position + */ + template + static constexpr std::uint32_t decode(std::uint32_t encoded) + { + static_assert(position < num_groups, + "This position is over the bounds."); + constexpr int shift = detail::shift(bits); + constexpr auto mask = detail::mask(); + return (encoded >> shift) & mask; + } + + /** + * Encodes the information with given bit set to encoded integer. + * + * @note the last case of nested template. + */ + template + static constexpr std::enable_if_t<(current_iter == num_groups), + std::uint32_t> + encode() + { + return 0; + } + + /** + * Encodes the information with given bit set to encoded integer. + * + * @tparam current_iter the encoded place + * @tparam Rest... the rest type + * + * @param first the current encoded information + * @param rest... the rest of other information waiting for encoding + * + * @return the encoded integer + */ + template + static constexpr std::enable_if_t<(current_iter < num_groups), + std::uint32_t> + encode(std::uint32_t first, Rest &&... rest) + { + constexpr int shift = detail::shift(bits); + if (current_iter == 0) { + static_assert( + bits[current_iter] + shift <= sizeof(std::uint32_t) * 8, + "the total bits usage is larger than std::uint32_t bits"); + } + return (first << shift) | + encode(std::forward(rest)...); + } +}; + + +} // namespace gko + +#endif // GKO_CORE_BASE_TYPES_HPP_ diff --git a/core/base/utils.hpp b/core/base/utils.hpp index 4e6fbc1dfce..bf09996ed3b 100644 --- a/core/base/utils.hpp +++ b/core/base/utils.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,10 +30,20 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_INTERNAL_CORE_BASE_UTILS_HPP_ -#define GKO_INTERNAL_CORE_BASE_UTILS_HPP_ +#ifndef GKO_CORE_BASE_UTILS_HPP_ +#define GKO_CORE_BASE_UTILS_HPP_ + +#include + + +#include +#include + + +#include #include +#include namespace gko { @@ -50,7 +60,167 @@ GKO_ATTRIBUTES GKO_INLINE ValueType checked_load(const ValueType *p, } // namespace kernels + + +namespace detail { + + +template +struct conversion_sort_helper {}; + +template +struct conversion_sort_helper> { + using mtx_type = matrix::Csr; + template + static std::unique_ptr get_sorted_conversion( + std::shared_ptr &exec, Source *source) + { + auto editable_mtx = mtx_type::create(exec); + as>(source)->convert_to(lend(editable_mtx)); + editable_mtx->sort_by_column_index(); + return editable_mtx; + } +}; + + +template +std::unique_ptr> convert_to_with_sorting_impl( + std::shared_ptr &exec, Source *obj, bool skip_sorting) +{ + if (skip_sorting) { + return copy_and_convert_to(exec, obj); + } else { + using decay_dest = std::decay_t; + auto sorted_mtx = + detail::conversion_sort_helper::get_sorted_conversion( + exec, obj); + return {sorted_mtx.release(), std::default_delete()}; + } +} + +template +std::shared_ptr convert_to_with_sorting_impl( + std::shared_ptr &exec, std::shared_ptr obj, + bool skip_sorting) +{ + if (skip_sorting) { + return copy_and_convert_to(exec, obj); + } else { + using decay_dest = std::decay_t; + auto sorted_mtx = + detail::conversion_sort_helper::get_sorted_conversion( + exec, obj.get()); + return {std::move(sorted_mtx)}; + } +} + + +} // namespace detail + + +/** + * @internal + * + * Helper function that converts the given matrix to the Dest format with + * additional sorting if requested. + * + * If the given matrix was already sorted, is on the same executor and with a + * dynamic type of `Dest`, the same pointer is returned with an empty + * deleter. + * In all other cases, a new matrix is created, which stores the converted + * matrix. + * + * @tparam Dest the type to which the object should be converted + * @tparam Source the type of the source object + * + * @param exec the executor where the result should be placed + * @param obj the source object that should be converted + * @param skip_sorting indicator if the resulting matrix should be sorted or + * not + */ +template +std::unique_ptr> convert_to_with_sorting( + std::shared_ptr exec, Source *obj, bool skip_sorting) +{ + return detail::convert_to_with_sorting_impl(exec, obj, skip_sorting); +} + +/** + * @copydoc convert_to_with_sorting(std::shared_ptr, + * Source *, bool) + * + * @note This version adds the const qualifier for the result since the input is + * also const + */ +template +std::unique_ptr> +convert_to_with_sorting(std::shared_ptr exec, const Source *obj, + bool skip_sorting) +{ + return detail::convert_to_with_sorting_impl(exec, obj, + skip_sorting); +} + +/** + * @copydoc convert_to_with_sorting(std::shared_ptr, + * Source *, bool) + * + * @note This version has a unique_ptr as the source instead of a plain pointer + */ +template +std::unique_ptr> convert_to_with_sorting( + std::shared_ptr exec, const std::unique_ptr &obj, + bool skip_sorting) +{ + return detail::convert_to_with_sorting_impl(exec, obj.get(), + skip_sorting); +} + +/** + * @internal + * + * Helper function that converts the given matrix to the Dest format with + * additional sorting if requested. + * + * If the given matrix was already sorted, is on the same executor and with a + * dynamic type of `Dest`, the same pointer is returned. + * In all other cases, a new matrix is created, which stores the converted + * matrix. + * + * @tparam Dest the type to which the object should be converted + * @tparam Source the type of the source object + * + * @param exec the executor where the result should be placed + * @param obj the source object that should be converted + * @param skip_sorting indicator if the resulting matrix should be sorted or + * not + */ +template +std::shared_ptr convert_to_with_sorting( + std::shared_ptr exec, std::shared_ptr obj, + bool skip_sorting) +{ + return detail::convert_to_with_sorting_impl(exec, obj, skip_sorting); +} + +/** + * @copydoc convert_to_with_sorting(std::shared_ptr, + * std::shared_ptr, bool) + * + * @note This version adds the const qualifier for the result since the input is + * also const + */ +template +std::shared_ptr convert_to_with_sorting( + std::shared_ptr exec, std::shared_ptr obj, + bool skip_sorting) +{ + return detail::convert_to_with_sorting_impl(exec, obj, + skip_sorting); +} + + } // namespace gko -#endif // GKO_INTERNAL_CORE_BASE_UTILS_HPP_ \ No newline at end of file +#endif // GKO_CORE_BASE_UTILS_HPP_ diff --git a/core/base/version.cpp b/core/base/version.cpp index 7993cee5cae..1cc7a8c849b 100644 --- a/core/base/version.cpp +++ b/core/base/version.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -64,8 +64,10 @@ std::ostream &operator<<(std::ostream &os, const version_info &ver_info) print_version(os, ver_info.omp_version); os << "\n the CUDA module is "; print_version(os, ver_info.cuda_version); - os << "\n the HIP module is "; + os << "\n the HIP module is "; print_version(os, ver_info.hip_version); + os << "\n the DPCPP module is "; + print_version(os, ver_info.dpcpp_version); return os; } diff --git a/core/components/absolute_array.hpp b/core/components/absolute_array.hpp new file mode 100644 index 00000000000..5f66c89254f --- /dev/null +++ b/core/components/absolute_array.hpp @@ -0,0 +1,118 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_COMPONENTS_ABSOLUTE_ARRAY_HPP_ +#define GKO_CORE_COMPONENTS_ABSOLUTE_ARRAY_HPP_ + + +#include + + +#include +#include +#include + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL(ValueType) \ + void inplace_absolute_array(std::shared_ptr exec, \ + ValueType *data, size_type num_entries) + +#define GKO_DECLARE_OUTPLACE_ABSOLUTE_ARRAY_KERNEL(ValueType) \ + void outplace_absolute_array(std::shared_ptr exec, \ + const ValueType *in, size_type num_entries, \ + remove_complex *out) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL(ValueType); \ + template \ + GKO_DECLARE_OUTPLACE_ABSOLUTE_ARRAY_KERNEL(ValueType) + + +namespace omp { +namespace components { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace components +} // namespace omp + + +namespace cuda { +namespace components { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace components +} // namespace cuda + + +namespace reference { +namespace components { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace components +} // namespace reference + + +namespace hip { +namespace components { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace components +} // namespace hip + + +namespace dpcpp { +namespace components { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace components +} // namespace dpcpp + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_COMPONENTS_ABSOLUTE_ARRAY_HPP_ diff --git a/core/components/fill_array.hpp b/core/components/fill_array.hpp index 7bafb8aecb4..0845f03701f 100644 --- a/core/components/fill_array.hpp +++ b/core/components/fill_array.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -49,10 +49,16 @@ namespace kernels { void fill_array(std::shared_ptr exec, \ ValueType *data, size_type num_entries, ValueType val) +#define GKO_DECLARE_FILL_SEQ_ARRAY_KERNEL(ValueType) \ + void fill_seq_array(std::shared_ptr exec, \ + ValueType *data, size_type num_entries) -#define GKO_DECLARE_ALL_AS_TEMPLATES \ - template \ - GKO_DECLARE_FILL_ARRAY_KERNEL(IndexType) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_FILL_ARRAY_KERNEL(ValueType); \ + template \ + GKO_DECLARE_FILL_SEQ_ARRAY_KERNEL(ValueType) namespace omp { @@ -91,6 +97,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace components { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace components +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/components/precision_conversion.hpp b/core/components/precision_conversion.hpp index 719c596c34e..9f21b59d2dd 100644 --- a/core/components/precision_conversion.hpp +++ b/core/components/precision_conversion.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -93,6 +93,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace components { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace components +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/components/prefix_sum.hpp b/core/components/prefix_sum.hpp index d171be831aa..b0cdf34018d 100644 --- a/core/components/prefix_sum.hpp +++ b/core/components/prefix_sum.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,6 +45,22 @@ namespace gko { namespace kernels { +/** + * \fn prefix_sum + * Computes an exclusive prefix sum or exclusive scan of the input array. + * + * As with the standard definition of exclusive scan, the last entry of the + * input array is not read at all, but is written to. + * If the input is [3,4,1,9,100], it will be replaced by + * [0,3,7,8,17]. + * + * \tparam IndexType Type of entries to be scanned (summed). + * + * \param exec Executor on which to run the scan operation + * \param counts The input/output array to be scanned with the sum operation + * \param num_entries Size of the array, equal to one more than the number + * of entries to be summed. + */ #define GKO_DECLARE_PREFIX_SUM_KERNEL(IndexType) \ void prefix_sum(std::shared_ptr exec, \ IndexType *counts, size_type num_entries) @@ -91,6 +107,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace components { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace components +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/device_hooks/CMakeLists.txt b/core/device_hooks/CMakeLists.txt index 94dfc8ab9f0..fcb370a81a0 100644 --- a/core/device_hooks/CMakeLists.txt +++ b/core/device_hooks/CMakeLists.txt @@ -2,10 +2,30 @@ if(NOT GINKGO_BUILD_CUDA) add_library(ginkgo_cuda $ cuda_hooks.cpp) + target_link_libraries(ginkgo_cuda PUBLIC ginkgo_device) ginkgo_compile_features(ginkgo_cuda) - target_link_libraries(ginkgo_cuda PUBLIC ginkgo_hip) ginkgo_default_includes(ginkgo_cuda) - ginkgo_install_library(ginkgo_cuda cuda) + ginkgo_install_library(ginkgo_cuda) +endif() + +if (NOT GINKGO_BUILD_DPCPP) + add_library(ginkgo_dpcpp + $ + dpcpp_hooks.cpp) + target_link_libraries(ginkgo_dpcpp PUBLIC ginkgo_device) + ginkgo_compile_features(ginkgo_dpcpp) + ginkgo_default_includes(ginkgo_dpcpp) + ginkgo_install_library(ginkgo_dpcpp) +endif() + +if(NOT GINKGO_BUILD_HIP) + add_library(ginkgo_hip + $ + hip_hooks.cpp) + target_link_libraries(ginkgo_hip PUBLIC ginkgo_device) + ginkgo_compile_features(ginkgo_hip) + ginkgo_default_includes(ginkgo_hip) + ginkgo_install_library(ginkgo_hip) endif() if (NOT GINKGO_BUILD_OMP) @@ -13,26 +33,20 @@ if (NOT GINKGO_BUILD_OMP) $ omp_hooks.cpp) ginkgo_compile_features(ginkgo_omp) - target_link_libraries(ginkgo_omp PUBLIC ginkgo_cuda) - target_link_libraries(ginkgo_omp PUBLIC ginkgo_hip) + target_link_libraries(ginkgo_omp PRIVATE ginkgo_cuda) + target_link_libraries(ginkgo_omp PRIVATE ginkgo_hip) + target_link_libraries(ginkgo_omp PRIVATE ginkgo_dpcpp) + target_link_libraries(ginkgo_omp PUBLIC ginkgo_device) ginkgo_default_includes(ginkgo_omp) - ginkgo_install_library(ginkgo_omp omp) + ginkgo_install_library(ginkgo_omp) endif() if (NOT GINKGO_BUILD_REFERENCE) add_library(ginkgo_reference $ reference_hooks.cpp) + target_link_libraries(ginkgo_reference PUBLIC ginkgo_device) ginkgo_compile_features(ginkgo_reference) ginkgo_default_includes(ginkgo_reference) - ginkgo_install_library(ginkgo_reference reference) -endif() - -if(NOT GINKGO_BUILD_HIP) - add_library(ginkgo_hip - $ - hip_hooks.cpp) - ginkgo_compile_features(ginkgo_hip) - ginkgo_default_includes(ginkgo_hip) - ginkgo_install_library(ginkgo_hip hip) + ginkgo_install_library(ginkgo_reference) endif() diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 6adb0df0445..b9400e90b3f 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,11 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/base/mixed_precision_types.hpp" +#include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" #include "core/components/precision_conversion.hpp" #include "core/components/prefix_sum.hpp" #include "core/factorization/factorization_kernels.hpp" +#include "core/factorization/ic_kernels.hpp" #include "core/factorization/ilu_kernels.hpp" +#include "core/factorization/par_ic_kernels.hpp" #include "core/factorization/par_ict_kernels.hpp" #include "core/factorization/par_ilu_kernels.hpp" #include "core/factorization/par_ilut_kernels.hpp" @@ -46,17 +50,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/dense_kernels.hpp" #include "core/matrix/diagonal_kernels.hpp" #include "core/matrix/ell_kernels.hpp" +#include "core/matrix/fbcsr_kernels.hpp" #include "core/matrix/hybrid_kernels.hpp" #include "core/matrix/sellp_kernels.hpp" #include "core/matrix/sparsity_csr_kernels.hpp" +#include "core/multigrid/amgx_pgm_kernels.hpp" #include "core/preconditioner/isai_kernels.hpp" #include "core/preconditioner/jacobi_kernels.hpp" +#include "core/reorder/rcm_kernels.hpp" #include "core/solver/bicg_kernels.hpp" #include "core/solver/bicgstab_kernels.hpp" +#include "core/solver/cb_gmres_kernels.hpp" #include "core/solver/cg_kernels.hpp" #include "core/solver/cgs_kernels.hpp" #include "core/solver/fcg_kernels.hpp" #include "core/solver/gmres_kernels.hpp" +#include "core/solver/idr_kernels.hpp" #include "core/solver/ir_kernels.hpp" #include "core/solver/lower_trs_kernels.hpp" #include "core/solver/upper_trs_kernels.hpp" @@ -90,9 +99,22 @@ template GKO_DECLARE_PREFIX_SUM_KERNEL(size_type); template GKO_DECLARE_FILL_ARRAY_KERNEL(IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); -template GKO_DECLARE_FILL_ARRAY_KERNEL(size_type); +GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); + +template +GKO_DECLARE_FILL_SEQ_ARRAY_KERNEL(IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_SEQ_ARRAY_KERNEL); + +template +GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL); + +template +GKO_DECLARE_OUTPLACE_ABSOLUTE_ARRAY_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_ARRAY_KERNEL); } // namespace components @@ -111,26 +133,57 @@ GKO_DECLARE_DENSE_APPLY_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +GKO_DECLARE_DENSE_COPY_KERNEL(InValueType, OutValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( + GKO_DECLARE_DENSE_COPY_KERNEL); + +template +GKO_DECLARE_DENSE_FILL_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_FILL_KERNEL); + template GKO_DECLARE_DENSE_SCALE_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SCALE_KERNEL); +template +GKO_DECLARE_DENSE_INV_SCALE_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_INV_SCALE_KERNEL); + template GKO_DECLARE_DENSE_ADD_SCALED_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_KERNEL); +template +GKO_DECLARE_DENSE_SUB_SCALED_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_KERNEL); + template GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL); +template +GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL); + template GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); +template +GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); + template GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); @@ -196,42 +249,80 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_DENSE_CALCULATE_TOTAL_COLS_KERNEL); template -GKO_DECLARE_TRANSPOSE_KERNEL(ValueType) +GKO_DECLARE_DENSE_TRANSPOSE_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TRANSPOSE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template -GKO_DECLARE_CONJ_TRANSPOSE_KERNEL(ValueType) +GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); + +template +GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CONJ_TRANSPOSE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL); + +template +GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL); template -GKO_DECLARE_ROW_PERMUTE_KERNEL(ValueType, IndexType) +GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ROW_PERMUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_ROW_GATHER_KERNEL); template -GKO_DECLARE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) +GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_COLUMN_PERMUTE_KERNEL); + GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL); template -GKO_DECLARE_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) +GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_INVERSE_ROW_PERMUTE_KERNEL); + GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL); template -GKO_DECLARE_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) +GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_INVERSE_COLUMN_PERMUTE_KERNEL); + GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL); template -GKO_DECLARE_EXTRACT_DIAGONAL_KERNEL(ValueType) +GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_EXTRACT_DIAGONAL_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL); + +template +GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL); + +template +GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL); + +template +GKO_DECLARE_MAKE_COMPLEX_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MAKE_COMPLEX_KERNEL); + +template +GKO_DECLARE_GET_REAL_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_REAL_KERNEL); + +template +GKO_DECLARE_GET_IMAG_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_IMAG_KERNEL); } // namespace dense @@ -426,6 +517,38 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_FINALIZE_KERNEL); } // namespace bicgstab +namespace idr { + + +template +GKO_DECLARE_IDR_INITIALIZE_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); + +template +GKO_DECLARE_IDR_STEP_1_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); + +template +GKO_DECLARE_IDR_STEP_2_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); + +template +GKO_DECLARE_IDR_STEP_3_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); + +template +GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL); + + +} // namespace idr + + namespace cgs { @@ -480,6 +603,35 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_STEP_2_KERNEL); } // namespace gmres +namespace cb_gmres { + + +template +GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL); + +template +GKO_DECLARE_CB_GMRES_INITIALIZE_2_KERNEL(ValueType, ValueTypeKrylovBases) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE( + GKO_DECLARE_CB_GMRES_INITIALIZE_2_KERNEL); + +template +GKO_DECLARE_CB_GMRES_STEP_1_KERNEL(ValueType, ValueTypeKrylovBases) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_STEP_1_KERNEL); + +template +GKO_DECLARE_CB_GMRES_STEP_2_KERNEL(ValueType, ValueTypeKrylovBases) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_CONST_TYPE( + GKO_DECLARE_CB_GMRES_STEP_2_KERNEL); + + +} // namespace cb_gmres + + namespace ir { @@ -617,6 +769,12 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL); +template +GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); + template GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); @@ -624,10 +782,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); template -GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) +GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL); + GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL); template GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) @@ -635,11 +793,10 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); -template -GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) +template +GKO_DECLARE_INVERT_PERMUTATION_KERNEL(IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); template GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType) @@ -674,6 +831,78 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); } // namespace csr +namespace fbcsr { + + +template +GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + +template +GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + +template +GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + +template +GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + +template +GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + +template +GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + +template +GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + +template +GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + +template +GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + +template +GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + +template +GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr + + namespace coo { @@ -724,15 +953,20 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( namespace ell { -template -GKO_DECLARE_ELL_SPMV_KERNEL(ValueType, IndexType) +template +GKO_DECLARE_ELL_SPMV_KERNEL(InputValueType, MatrixValueType, OutputValueType, + IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ELL_SPMV_KERNEL); +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ELL_SPMV_KERNEL); -template -GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL(ValueType, IndexType) +template +GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL(InputValueType, MatrixValueType, + OutputValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL); template @@ -862,6 +1096,17 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_SIMPLE_APPLY_KERNEL); +template +GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); + +template +GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL); + template GKO_DECLARE_JACOBI_TRANSPOSE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); @@ -874,6 +1119,22 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL); +template +GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL); + +template +GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL); + +template +GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL); + template GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); @@ -896,12 +1157,25 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ISAI_GENERATE_TRI_INVERSE_KERNEL); + +template +GKO_DECLARE_ISAI_GENERATE_GENERAL_INVERSE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_GENERATE_GENERAL_INVERSE_KERNEL); + template GKO_DECLARE_ISAI_GENERATE_EXCESS_SYSTEM_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ISAI_GENERATE_EXCESS_SYSTEM_KERNEL); +template +GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL); + template GKO_DECLARE_ISAI_SCATTER_EXCESS_SOLUTION_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); @@ -949,6 +1223,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( } // namespace factorization +namespace ic_factorization { + + +template +GKO_DECLARE_IC_COMPUTE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); + + +} // namespace ic_factorization + + namespace ilu_factorization { @@ -962,6 +1248,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( } // namespace ilu_factorization +namespace par_ic_factorization { + + +template +GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); + +template +GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); + + +} // namespace par_ic_factorization + + namespace par_ict_factorization { @@ -1029,6 +1334,56 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( } // namespace par_ilut_factorization +namespace rcm { + + +template +GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL(IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); + + +template +GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL(IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); + + +} // namespace rcm + + +namespace amgx_pgm { + + +template +GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL(IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); + +template +GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL(IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); + +template +GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL(IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); + +template +GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR); + +template +GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); + + +} // namespace amgx_pgm namespace set_all_statuses { @@ -1052,6 +1407,18 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE( } // namespace residual_norm + + +namespace implicit_residual_norm { + + +template +GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL(ValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL); + + +} // namespace implicit_residual_norm } // namespace GKO_HOOK_MODULE } // namespace kernels } // namespace gko diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index d41d77d24d9..a58ef92e5ab 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -55,10 +55,17 @@ version version_info::get_cuda_version() noexcept std::shared_ptr CudaExecutor::create( - int device_id, std::shared_ptr master, bool device_reset) + int device_id, std::shared_ptr master, bool device_reset, + allocation_mode alloc_mode) { - return std::shared_ptr( - new CudaExecutor(device_id, std::move(master), device_reset)); + return std::shared_ptr(new CudaExecutor( + device_id, std::move(master), device_reset, alloc_mode)); +} + + +void CudaExecutor::populate_exec_info(const MachineTopology *mach_topo) +{ + // This method is always called, so cannot throw when not compiled. } @@ -93,6 +100,11 @@ void CudaExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes, GKO_NOT_COMPILED(cuda); +void CudaExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const + GKO_NOT_COMPILED(cuda); + + void CudaExecutor::synchronize() const GKO_NOT_COMPILED(cuda); @@ -115,6 +127,12 @@ std::string CublasError::get_error(int64) } +std::string CurandError::get_error(int64) +{ + return "ginkgo CUDA module is not compiled"; +} + + std::string CusparseError::get_error(int64) { return "ginkgo CUDA module is not compiled"; diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp new file mode 100644 index 00000000000..65ca04dd840 --- /dev/null +++ b/core/device_hooks/dpcpp_hooks.cpp @@ -0,0 +1,149 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include + + +#include +#include +#include +#include + + +namespace gko { + + +version version_info::get_dpcpp_version() noexcept +{ + // We just return the version with a special "not compiled" tag in + // placeholder modules. + return {GKO_VERSION_STR, "not compiled"}; +} + + +std::shared_ptr DpcppExecutor::create( + int device_id, std::shared_ptr master, std::string device_type) +{ + return std::shared_ptr( + new DpcppExecutor(device_id, std::move(master), device_type)); +} + + +void DpcppExecutor::populate_exec_info(const MachineTopology *mach_topo) +{ + // This method is always called, so cannot throw when not compiled. +} + + +void OmpExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const + GKO_NOT_COMPILED(dpcpp); + + +bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +{ + // Dummy check + auto dev_type = dest_exec->get_device_type(); + return dev_type == "cpu" || dev_type == "host"; +} + + +void DpcppExecutor::raw_free(void *ptr) const noexcept +{ + // Free must never fail, as it can be called in destructors. + // If the nvidia module was not compiled, the library couldn't have + // allocated the memory, so there is no need to deallocate it. +} + + +void *DpcppExecutor::raw_alloc(size_type num_bytes) const + GKO_NOT_COMPILED(dpcpp); + + +void DpcppExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const + GKO_NOT_COMPILED(dpcpp); + + +void DpcppExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const + GKO_NOT_COMPILED(dpcpp); + + +void DpcppExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const + GKO_NOT_COMPILED(dpcpp); + + +void DpcppExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const + GKO_NOT_COMPILED(dpcpp); + + +void DpcppExecutor::synchronize() const GKO_NOT_COMPILED(dpcpp); + + +void DpcppExecutor::run(const Operation &op) const +{ + op.run(std::static_pointer_cast( + this->shared_from_this())); +} + + +int DpcppExecutor::get_num_devices(std::string) { return 0; } + + +void DpcppExecutor::set_device_property() {} + + +bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const +{ + // Dummy check + return this->get_device_type() == "cpu" || + this->get_device_type() == "host"; +} + +bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +{ + // Dummy check + return dest_exec->get_device_type() == this->get_device_type() && + dest_exec->get_device_id() == this->get_device_id(); +} + + +} // namespace gko + + +#define GKO_HOOK_MODULE dpcpp +#include "core/device_hooks/common_kernels.inc.cpp" +#undef GKO_HOOK_MODULE diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp index a2e288b4157..232a86b803b 100644 --- a/core/device_hooks/hip_hooks.cpp +++ b/core/device_hooks/hip_hooks.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -52,10 +52,17 @@ version version_info::get_hip_version() noexcept std::shared_ptr HipExecutor::create( - int device_id, std::shared_ptr master, bool device_reset) + int device_id, std::shared_ptr master, bool device_reset, + allocation_mode alloc_mode) { - return std::shared_ptr( - new HipExecutor(device_id, std::move(master), device_reset)); + return std::shared_ptr(new HipExecutor( + device_id, std::move(master), device_reset, alloc_mode)); +} + + +void HipExecutor::populate_exec_info(const MachineTopology *mach_topo) +{ + // This method is always called, so cannot throw when not compiled. } @@ -90,6 +97,11 @@ void HipExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes, GKO_NOT_COMPILED(hip); +void HipExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const + GKO_NOT_COMPILED(hip); + + void HipExecutor::synchronize() const GKO_NOT_COMPILED(hip); @@ -112,6 +124,12 @@ std::string HipblasError::get_error(int64) } +std::string HiprandError::get_error(int64) +{ + return "ginkgo HIP module is not compiled"; +} + + std::string HipsparseError::get_error(int64) { return "ginkgo HIP module is not compiled"; diff --git a/core/device_hooks/omp_hooks.cpp b/core/device_hooks/omp_hooks.cpp index 131fa51a4d8..d4589755f01 100644 --- a/core/device_hooks/omp_hooks.cpp +++ b/core/device_hooks/omp_hooks.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/device_hooks/reference_hooks.cpp b/core/device_hooks/reference_hooks.cpp index ea7742776c8..33713edbed8 100644 --- a/core/device_hooks/reference_hooks.cpp +++ b/core/device_hooks/reference_hooks.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/devices/CMakeLists.txt b/core/devices/CMakeLists.txt deleted file mode 100644 index 2a5626c0018..00000000000 --- a/core/devices/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -function(ginkgo_add_object_library name) - add_library(${name} OBJECT ${ARGN}) - ginkgo_compile_features(${name}) - ginkgo_default_includes(${name}) - target_include_directories(${name} PUBLIC ${Ginkgo_SOURCE_DIR}) - set_target_properties(${name} PROPERTIES POSITION_INDEPENDENT_CODE ON) -endfunction() - -add_subdirectory(omp) -add_subdirectory(cuda) -add_subdirectory(hip) -add_subdirectory(reference) diff --git a/core/devices/cuda/CMakeLists.txt b/core/devices/cuda/CMakeLists.txt deleted file mode 100644 index 1b164165305..00000000000 --- a/core/devices/cuda/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -ginkgo_add_object_library(ginkgo_cuda_device - executor.cpp) - diff --git a/core/devices/hip/CMakeLists.txt b/core/devices/hip/CMakeLists.txt deleted file mode 100644 index 7f855b3e2e9..00000000000 --- a/core/devices/hip/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -ginkgo_add_object_library(ginkgo_hip_device - executor.cpp) - diff --git a/core/factorization/factorization_kernels.hpp b/core/factorization/factorization_kernels.hpp index f7c25964dde..02af1ed270d 100644 --- a/core/factorization/factorization_kernels.hpp +++ b/core/factorization/factorization_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -132,6 +132,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace factorization +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/factorization/ic.cpp b/core/factorization/ic.cpp new file mode 100644 index 00000000000..3c8b6dd29ba --- /dev/null +++ b/core/factorization/ic.cpp @@ -0,0 +1,127 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include + + +#include "core/factorization/factorization_kernels.hpp" +#include "core/factorization/ic_kernels.hpp" + + +namespace gko { +namespace factorization { +namespace ic_factorization { + + +GKO_REGISTER_OPERATION(compute, ic_factorization::compute); +GKO_REGISTER_OPERATION(add_diagonal_elements, + factorization::add_diagonal_elements); +GKO_REGISTER_OPERATION(initialize_row_ptrs_l, + factorization::initialize_row_ptrs_l); +GKO_REGISTER_OPERATION(initialize_l, factorization::initialize_l); + + +} // namespace ic_factorization + + +template +std::unique_ptr> Ic::generate( + const std::shared_ptr &system_matrix, bool skip_sorting, + bool both_factors) const +{ + GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix); + + const auto exec = this->get_executor(); + + // Converts the system matrix to CSR. + // Throws an exception if it is not convertible. + auto local_system_matrix = matrix_type::create(exec); + as>(system_matrix.get()) + ->convert_to(local_system_matrix.get()); + + if (!skip_sorting) { + local_system_matrix->sort_by_column_index(); + } + + // Add explicit diagonal zero elements if they are missing + exec->run(ic_factorization::make_add_diagonal_elements( + local_system_matrix.get(), false)); + + // Compute LC factorization + exec->run(ic_factorization::make_compute(local_system_matrix.get())); + + // Extract lower factor: compute non-zeros + const auto matrix_size = local_system_matrix->get_size(); + const auto num_rows = matrix_size[0]; + Array l_row_ptrs{exec, num_rows + 1}; + exec->run(ic_factorization::make_initialize_row_ptrs_l( + local_system_matrix.get(), l_row_ptrs.get_data())); + + // Get nnz from device memory + auto l_nnz = static_cast( + exec->copy_val_to_host(l_row_ptrs.get_data() + num_rows)); + + // Init arrays + Array l_col_idxs{exec, l_nnz}; + Array l_vals{exec, l_nnz}; + std::shared_ptr l_factor = matrix_type::create( + exec, matrix_size, std::move(l_vals), std::move(l_col_idxs), + std::move(l_row_ptrs), parameters_.l_strategy); + + // Extract lower factor: columns and values + exec->run(ic_factorization::make_initialize_l(local_system_matrix.get(), + l_factor.get(), false)); + + if (both_factors) { + auto lh_factor = l_factor->conj_transpose(); + return Composition::create(std::move(l_factor), + std::move(lh_factor)); + } else { + return Composition::create(std::move(l_factor)); + } +} + + +#define GKO_DECLARE_IC(ValueType, IndexType) class Ic +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC); + + +} // namespace factorization +} // namespace gko diff --git a/core/factorization/ic_kernels.hpp b/core/factorization/ic_kernels.hpp new file mode 100644 index 00000000000..4efcfe1565a --- /dev/null +++ b/core/factorization/ic_kernels.hpp @@ -0,0 +1,112 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_FACTORIZATION_IC_KERNELS_HPP_ +#define GKO_CORE_FACTORIZATION_IC_KERNELS_HPP_ + + +#include + + +#include + + +#include +#include + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_IC_COMPUTE_KERNEL(ValueType, IndexType) \ + void compute(std::shared_ptr exec, \ + matrix::Csr *system_matrix) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_IC_COMPUTE_KERNEL(ValueType, IndexType) + + +namespace omp { +namespace ic_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace ic_factorization +} // namespace omp + + +namespace cuda { +namespace ic_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace ic_factorization +} // namespace cuda + + +namespace reference { +namespace ic_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace ic_factorization +} // namespace reference + + +namespace hip { +namespace ic_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace ic_factorization +} // namespace hip + + +namespace dpcpp { +namespace ic_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace ic_factorization +} // namespace dpcpp + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_FACTORIZATION_IC_KERNELS_HPP_ diff --git a/core/factorization/ilu.cpp b/core/factorization/ilu.cpp index c2f397151d3..1518853b9b8 100644 --- a/core/factorization/ilu.cpp +++ b/core/factorization/ilu.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -63,7 +63,7 @@ GKO_REGISTER_OPERATION(initialize_l_u, factorization::initialize_l_u); template std::unique_ptr> Ilu::generate_l_u( - const std::shared_ptr &system_matrix) const + const std::shared_ptr &system_matrix, bool skip_sorting) const { GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix); @@ -75,6 +75,10 @@ std::unique_ptr> Ilu::generate_l_u( as>(system_matrix.get()) ->convert_to(local_system_matrix.get()); + if (!skip_sorting) { + local_system_matrix->sort_by_column_index(); + } + // Add explicit diagonal zero elements if they are missing exec->run(ilu_factorization::make_add_diagonal_elements( local_system_matrix.get(), false)); diff --git a/core/factorization/ilu_kernels.hpp b/core/factorization/ilu_kernels.hpp index 17602ac4ab4..50ef5fa831b 100644 --- a/core/factorization/ilu_kernels.hpp +++ b/core/factorization/ilu_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -95,6 +95,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace ilu_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace ilu_factorization +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/factorization/par_ic.cpp b/core/factorization/par_ic.cpp new file mode 100644 index 00000000000..33e40d85bb0 --- /dev/null +++ b/core/factorization/par_ic.cpp @@ -0,0 +1,157 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/factorization/factorization_kernels.hpp" +#include "core/factorization/par_ic_kernels.hpp" +#include "core/matrix/csr_kernels.hpp" + + +namespace gko { +namespace factorization { +namespace par_ic_factorization { + + +GKO_REGISTER_OPERATION(add_diagonal_elements, + factorization::add_diagonal_elements); +GKO_REGISTER_OPERATION(initialize_row_ptrs_l, + factorization::initialize_row_ptrs_l); +GKO_REGISTER_OPERATION(initialize_l, factorization::initialize_l); +GKO_REGISTER_OPERATION(init_factor, par_ic_factorization::init_factor); +GKO_REGISTER_OPERATION(compute_factor, par_ic_factorization::compute_factor); +GKO_REGISTER_OPERATION(csr_transpose, csr::transpose); +GKO_REGISTER_OPERATION(convert_to_coo, csr::convert_to_coo); + + +} // namespace par_ic_factorization + + +template +std::unique_ptr> ParIc::generate( + const std::shared_ptr &system_matrix, bool skip_sorting, + bool both_factors) const +{ + using CsrMatrix = matrix::Csr; + using CooMatrix = matrix::Coo; + + GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix); + + const auto exec = this->get_executor(); + + // Converts the system matrix to CSR. + // Throws an exception if it is not convertible. + auto csr_system_matrix = CsrMatrix::create(exec); + as>(system_matrix.get()) + ->convert_to(csr_system_matrix.get()); + // If necessary, sort it + if (!skip_sorting) { + csr_system_matrix->sort_by_column_index(); + } + + // Add explicit diagonal zero elements if they are missing + exec->run(par_ic_factorization::make_add_diagonal_elements( + csr_system_matrix.get(), true)); + + const auto matrix_size = csr_system_matrix->get_size(); + const auto number_rows = matrix_size[0]; + Array l_row_ptrs{exec, number_rows + 1}; + exec->run(par_ic_factorization::make_initialize_row_ptrs_l( + csr_system_matrix.get(), l_row_ptrs.get_data())); + + // Get nnz from device memory + auto l_nnz = static_cast( + exec->copy_val_to_host(l_row_ptrs.get_data() + number_rows)); + + // Since `row_ptrs` of L is already created, the matrix can be + // directly created with it + Array l_col_idxs{exec, l_nnz}; + Array l_vals{exec, l_nnz}; + std::shared_ptr l_factor = matrix_type::create( + exec, matrix_size, std::move(l_vals), std::move(l_col_idxs), + std::move(l_row_ptrs), parameters_.l_strategy); + + exec->run(par_ic_factorization::make_initialize_l(csr_system_matrix.get(), + l_factor.get(), false)); + + // build COO representation of lower factor + Array l_row_idxs{exec, l_nnz}; + // copy values from l_factor, which are the lower triangular values of A + auto l_vals_view = + Array::view(exec, l_nnz, l_factor->get_values()); + auto a_vals = Array{exec, l_vals_view}; + auto a_row_idxs = + Array::view(exec, l_nnz, l_factor->get_col_idxs()); + auto a_col_idxs = Array{exec, l_nnz}; + auto a_lower_coo = + CooMatrix::create(exec, matrix_size, std::move(a_vals), + std::move(a_row_idxs), std::move(a_col_idxs)); + exec->run(par_ic_factorization::make_convert_to_coo(l_factor.get(), + a_lower_coo.get())); + + // compute sqrt of diagonal entries + exec->run(par_ic_factorization::make_init_factor(l_factor.get())); + + // execute sweeps + exec->run(par_ic_factorization::make_compute_factor( + parameters_.iterations, a_lower_coo.get(), l_factor.get())); + + if (both_factors) { + auto lh_factor = l_factor->conj_transpose(); + return Composition::create(std::move(l_factor), + std::move(lh_factor)); + } else { + return Composition::create(std::move(l_factor)); + } +} + + +#define GKO_DECLARE_PAR_IC(ValueType, IndexType) \ + class ParIc +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_IC); + + +} // namespace factorization +} // namespace gko diff --git a/core/factorization/par_ic_kernels.hpp b/core/factorization/par_ic_kernels.hpp new file mode 100644 index 00000000000..4611cf88fa3 --- /dev/null +++ b/core/factorization/par_ic_kernels.hpp @@ -0,0 +1,122 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_FACTORIZATION_PAR_IC_KERNELS_HPP_ +#define GKO_CORE_FACTORIZATION_PAR_IC_KERNELS_HPP_ + + +#include + + +#include + + +#include +#include +#include +#include + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL(ValueType, IndexType) \ + void init_factor(std::shared_ptr exec, \ + matrix::Csr *l_factor) + +#define GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL(ValueType, IndexType) \ + void compute_factor( \ + std::shared_ptr exec, size_type iterations, \ + const matrix::Coo *lower_system_matrix, \ + matrix::Csr *l_factor) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL(ValueType, IndexType) + + +namespace omp { +namespace par_ic_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace par_ic_factorization +} // namespace omp + + +namespace cuda { +namespace par_ic_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace par_ic_factorization +} // namespace cuda + + +namespace reference { +namespace par_ic_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace par_ic_factorization +} // namespace reference + + +namespace hip { +namespace par_ic_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace par_ic_factorization +} // namespace hip + + +namespace dpcpp { +namespace par_ic_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace par_ic_factorization +} // namespace dpcpp + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_FACTORIZATION_PAR_IC_KERNELS_HPP_ diff --git a/core/factorization/par_ict.cpp b/core/factorization/par_ict.cpp index a1a1408fb79..69025ecd305 100644 --- a/core/factorization/par_ict.cpp +++ b/core/factorization/par_ict.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/base/utils.hpp" #include "core/factorization/factorization_kernels.hpp" #include "core/factorization/par_ict_kernels.hpp" #include "core/factorization/par_ilu_kernels.hpp" @@ -109,9 +110,9 @@ struct ParIctState { // current lower factor L std::unique_ptr l; // current upper factor L^H - std::unique_ptr lt; + std::unique_ptr lh; // current product L * L^H - std::unique_ptr llt; + std::unique_ptr llh; // temporary lower factor L' before filtering std::unique_ptr l_new; // lower factor L currently being updated with asynchronous iterations @@ -123,14 +124,14 @@ struct ParIctState { // strategy to be used by the lower factor std::shared_ptr l_strategy; // strategy to be used by the upper factor - std::shared_ptr lt_strategy; + std::shared_ptr lh_strategy; ParIctState(std::shared_ptr exec_in, const CsrMatrix *system_matrix_in, std::unique_ptr l_in, IndexType l_nnz_limit, bool use_approx_select, std::shared_ptr l_strategy_, - std::shared_ptr lt_strategy_) + std::shared_ptr lh_strategy_) : exec{std::move(exec_in)}, l_nnz_limit{l_nnz_limit}, use_approx_select{use_approx_select}, @@ -139,22 +140,22 @@ struct ParIctState { selection_tmp{exec}, selection_tmp2{exec}, l_strategy{std::move(l_strategy_)}, - lt_strategy{std::move(lt_strategy_)} + lh_strategy{std::move(lh_strategy_)} { auto mtx_size = system_matrix->get_size(); auto l_nnz = l->get_num_stored_elements(); - lt = CsrMatrix::create(exec, mtx_size, l_nnz); - llt = CsrMatrix::create(exec, mtx_size); + lh = CsrMatrix::create(exec, mtx_size, l_nnz); + llh = CsrMatrix::create(exec, mtx_size); l_new = CsrMatrix::create(exec, mtx_size); l_coo = CooMatrix::create(exec, mtx_size); - exec->run(make_csr_conj_transpose(l.get(), lt.get())); + exec->run(make_csr_conj_transpose(l.get(), lh.get())); } std::unique_ptr> to_factors() && { l->set_strategy(l_strategy); - lt->set_strategy(lt_strategy); - return Composition::create(std::move(l), std::move(lt)); + lh->set_strategy(lh_strategy); + return Composition::create(std::move(l), std::move(lh)); } void iterate(); @@ -175,30 +176,14 @@ ParIct::generate_l_lt( const auto exec = this->get_executor(); // convert and/or sort the matrix if necessary - std::unique_ptr csr_system_matrix_unique_ptr{}; - auto csr_system_matrix = - dynamic_cast(system_matrix.get()); - if (csr_system_matrix == nullptr || - csr_system_matrix->get_executor() != exec) { - csr_system_matrix_unique_ptr = CsrMatrix::create(exec); - as>(system_matrix.get()) - ->convert_to(csr_system_matrix_unique_ptr.get()); - csr_system_matrix = csr_system_matrix_unique_ptr.get(); - } - if (!parameters_.skip_sorting) { - if (csr_system_matrix_unique_ptr == nullptr) { - csr_system_matrix_unique_ptr = CsrMatrix::create(exec); - csr_system_matrix_unique_ptr->copy_from(csr_system_matrix); - } - csr_system_matrix_unique_ptr->sort_by_column_index(); - csr_system_matrix = csr_system_matrix_unique_ptr.get(); - } + auto csr_system_matrix = convert_to_with_sorting( + exec, system_matrix, parameters_.skip_sorting); // initialize the L matrix data structures const auto num_rows = csr_system_matrix->get_size()[0]; Array l_row_ptrs_array{exec, num_rows + 1}; auto l_row_ptrs = l_row_ptrs_array.get_data(); - exec->run(make_initialize_row_ptrs_l(csr_system_matrix, l_row_ptrs)); + exec->run(make_initialize_row_ptrs_l(csr_system_matrix.get(), l_row_ptrs)); auto l_nnz = static_cast(exec->copy_val_to_host(l_row_ptrs + num_rows)); @@ -209,14 +194,14 @@ ParIct::generate_l_lt( std::move(l_row_ptrs_array)); // initialize L - exec->run(make_initialize_l(csr_system_matrix, l.get(), true)); + exec->run(make_initialize_l(csr_system_matrix.get(), l.get(), true)); // compute limit #nnz for L auto l_nnz_limit = static_cast(l_nnz * parameters_.fill_in_limit); ParIctState state{exec, - csr_system_matrix, + csr_system_matrix.get(), std::move(l), l_nnz_limit, parameters_.approximate_select, @@ -235,11 +220,11 @@ template void ParIctState::iterate() { // compute L * L^H - exec->run(make_spgemm(l.get(), lt.get(), llt.get())); + exec->run(make_spgemm(l.get(), lh.get(), llh.get())); // add new candidates to L' factor exec->run( - make_add_candidates(llt.get(), system_matrix, l.get(), l_new.get())); + make_add_candidates(llh.get(), system_matrix, l.get(), l_new.get())); // update L(COO), L'^H sizes and pointers { @@ -288,11 +273,11 @@ void ParIctState::iterate() // convert L to L^H { auto l_nnz = l->get_num_stored_elements(); - CsrBuilder lt_builder{lt.get()}; + CsrBuilder lt_builder{lh.get()}; lt_builder.get_col_idx_array().resize_and_reset(l_nnz); lt_builder.get_value_array().resize_and_reset(l_nnz); } - exec->run(make_csr_conj_transpose(l.get(), lt.get())); + exec->run(make_csr_conj_transpose(l.get(), lh.get())); } diff --git a/core/factorization/par_ict_kernels.hpp b/core/factorization/par_ict_kernels.hpp index f02b6ac7bb6..04d0af80e0c 100644 --- a/core/factorization/par_ict_kernels.hpp +++ b/core/factorization/par_ict_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -52,7 +52,7 @@ namespace kernels { #define GKO_DECLARE_PAR_ICT_ADD_CANDIDATES_KERNEL(ValueType, IndexType) \ void add_candidates(std::shared_ptr exec, \ - const matrix::Csr *llt, \ + const matrix::Csr *llh, \ const matrix::Csr *a, \ const matrix::Csr *l, \ matrix::Csr *l_new) @@ -106,6 +106,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace par_ict_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace par_ict_factorization +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/factorization/par_ilu.cpp b/core/factorization/par_ilu.cpp index d61a27747af..3b5bf605a4c 100644 --- a/core/factorization/par_ilu.cpp +++ b/core/factorization/par_ilu.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -83,10 +83,9 @@ ParIlu::generate_l_u( // Converts the system matrix to CSR. // Throws an exception if it is not convertible. - auto csr_system_matrix_unique_ptr = CsrMatrix::create(exec); + auto csr_system_matrix = CsrMatrix::create(exec); as>(system_matrix.get()) - ->convert_to(csr_system_matrix_unique_ptr.get()); - auto csr_system_matrix = csr_system_matrix_unique_ptr.get(); + ->convert_to(csr_system_matrix.get()); // If necessary, sort it if (!skip_sorting) { csr_system_matrix->sort_by_column_index(); @@ -94,14 +93,14 @@ ParIlu::generate_l_u( // Add explicit diagonal zero elements if they are missing exec->run(par_ilu_factorization::make_add_diagonal_elements( - csr_system_matrix, true)); + csr_system_matrix.get(), true)); const auto matrix_size = csr_system_matrix->get_size(); const auto number_rows = matrix_size[0]; Array l_row_ptrs{exec, number_rows + 1}; Array u_row_ptrs{exec, number_rows + 1}; exec->run(par_ilu_factorization::make_initialize_row_ptrs_l_u( - csr_system_matrix, l_row_ptrs.get_data(), u_row_ptrs.get_data())); + csr_system_matrix.get(), l_row_ptrs.get_data(), u_row_ptrs.get_data())); // Get nnz from device memory auto l_nnz = static_cast( @@ -123,7 +122,7 @@ ParIlu::generate_l_u( std::move(u_row_ptrs), u_strategy); exec->run(par_ilu_factorization::make_initialize_l_u( - csr_system_matrix, l_factor.get(), u_factor.get())); + csr_system_matrix.get(), l_factor.get(), u_factor.get())); // We use `transpose()` here to convert the Csr format to Csc. auto u_factor_transpose_lin_op = u_factor->transpose(); @@ -140,18 +139,10 @@ ParIlu::generate_l_u( // If it was not, and we already own a CSR `system_matrix`, // we can move the Csr matrix to Coo, which has very little overhead. - // Otherwise, we convert from the Csr matrix, since it is the conversion - // with the least overhead. - // We also have to convert / move from the CSR matrix if it was not already - // sorted (in which case we definitively own a CSR `system_matrix`). + // We also have to move from the CSR matrix if it was not already sorted. if (!skip_sorting || coo_system_matrix_ptr == nullptr) { coo_system_matrix_unique_ptr = CooMatrix::create(exec); - if (csr_system_matrix_unique_ptr == nullptr) { - csr_system_matrix->convert_to(coo_system_matrix_unique_ptr.get()); - } else { - csr_system_matrix_unique_ptr->move_to( - coo_system_matrix_unique_ptr.get()); - } + csr_system_matrix->move_to(coo_system_matrix_unique_ptr.get()); coo_system_matrix_ptr = coo_system_matrix_unique_ptr.get(); } diff --git a/core/factorization/par_ilu_kernels.hpp b/core/factorization/par_ilu_kernels.hpp index 09bc1dd2596..ea612d66ad9 100644 --- a/core/factorization/par_ilu_kernels.hpp +++ b/core/factorization/par_ilu_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -97,6 +97,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace par_ilu_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace par_ilu_factorization +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/factorization/par_ilut.cpp b/core/factorization/par_ilut.cpp index 1eb3dfeb950..69370659cc3 100644 --- a/core/factorization/par_ilut.cpp +++ b/core/factorization/par_ilut.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/base/utils.hpp" #include "core/factorization/factorization_kernels.hpp" #include "core/factorization/par_ilu_kernels.hpp" #include "core/factorization/par_ilut_kernels.hpp" @@ -191,24 +192,8 @@ ParIlut::generate_l_u( const auto exec = this->get_executor(); // convert and/or sort the matrix if necessary - std::unique_ptr csr_system_matrix_unique_ptr{}; - auto csr_system_matrix = - dynamic_cast(system_matrix.get()); - if (csr_system_matrix == nullptr || - csr_system_matrix->get_executor() != exec) { - csr_system_matrix_unique_ptr = CsrMatrix::create(exec); - as>(system_matrix.get()) - ->convert_to(csr_system_matrix_unique_ptr.get()); - csr_system_matrix = csr_system_matrix_unique_ptr.get(); - } - if (!parameters_.skip_sorting) { - if (csr_system_matrix_unique_ptr == nullptr) { - csr_system_matrix_unique_ptr = CsrMatrix::create(exec); - csr_system_matrix_unique_ptr->copy_from(csr_system_matrix); - } - csr_system_matrix_unique_ptr->sort_by_column_index(); - csr_system_matrix = csr_system_matrix_unique_ptr.get(); - } + auto csr_system_matrix = convert_to_with_sorting( + exec, system_matrix, parameters_.skip_sorting); // initialize the L and U matrix data structures const auto num_rows = csr_system_matrix->get_size()[0]; @@ -216,7 +201,7 @@ ParIlut::generate_l_u( Array u_row_ptrs_array{exec, num_rows + 1}; auto l_row_ptrs = l_row_ptrs_array.get_data(); auto u_row_ptrs = u_row_ptrs_array.get_data(); - exec->run(make_initialize_row_ptrs_l_u(csr_system_matrix, l_row_ptrs, + exec->run(make_initialize_row_ptrs_l_u(csr_system_matrix.get(), l_row_ptrs, u_row_ptrs)); auto l_nnz = @@ -233,7 +218,7 @@ ParIlut::generate_l_u( std::move(u_row_ptrs_array)); // initialize L and U - exec->run(make_initialize_l_u(csr_system_matrix, l.get(), u.get())); + exec->run(make_initialize_l_u(csr_system_matrix.get(), l.get(), u.get())); // compute limit #nnz for L and U auto l_nnz_limit = @@ -242,7 +227,7 @@ ParIlut::generate_l_u( static_cast(u_nnz * parameters_.fill_in_limit); ParIlutState state{exec, - csr_system_matrix, + csr_system_matrix.get(), std::move(l), std::move(u), l_nnz_limit, @@ -352,4 +337,4 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PAR_ILUT); } // namespace factorization -} // namespace gko \ No newline at end of file +} // namespace gko diff --git a/core/factorization/par_ilut_kernels.hpp b/core/factorization/par_ilut_kernels.hpp index 9bb19596c3f..c484caaf81d 100644 --- a/core/factorization/par_ilut_kernels.hpp +++ b/core/factorization/par_ilut_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -143,6 +143,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace par_ilut_factorization { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace par_ilut_factorization +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/log/convergence.cpp b/core/log/convergence.cpp index 9947e40fc60..7db17ebb531 100644 --- a/core/log/convergence.cpp +++ b/core/log/convergence.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,16 +46,30 @@ namespace log { template void Convergence::on_criterion_check_completed( const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, + const LinOp *residual, const LinOp *residual_norm, + const LinOp *implicit_sq_resnorm, const LinOp *solution, const uint8 &stopping_id, const bool &set_finalized, - const Array *status, const bool &oneChanged, - const bool &converged) const + const Array *status, const bool &one_changed, + const bool &stopped) const { - if (converged) { + if (stopped) { + Array tmp(status->get_executor()->get_master(), + *status); + this->convergence_status_ = true; + for (int i = 0; i < status->get_num_elems(); i++) { + if (!tmp.get_data()[i].has_converged()) { + this->convergence_status_ = false; + break; + } + } this->num_iterations_ = num_iterations; if (residual != nullptr) { this->residual_.reset(residual->clone().release()); } + if (implicit_sq_resnorm != nullptr) { + this->implicit_sq_resnorm_.reset( + implicit_sq_resnorm->clone().release()); + } if (residual_norm != nullptr) { this->residual_norm_.reset(residual_norm->clone().release()); } else if (residual != nullptr) { @@ -70,6 +84,20 @@ void Convergence::on_criterion_check_completed( } +template +void Convergence::on_criterion_check_completed( + const stop::Criterion *criterion, const size_type &num_iterations, + const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, + const uint8 &stopping_id, const bool &set_finalized, + const Array *status, const bool &one_changed, + const bool &stopped) const +{ + this->on_criterion_check_completed( + criterion, num_iterations, residual, residual_norm, nullptr, solution, + stopping_id, set_finalized, status, one_changed, stopped); +} + + #define GKO_DECLARE_CONVERGENCE(_type) class Convergence<_type> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CONVERGENCE); diff --git a/core/log/logger.cpp b/core/log/logger.cpp index 46ee98b2895..0dc36f98948 100644 --- a/core/log/logger.cpp +++ b/core/log/logger.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/log/papi.cpp b/core/log/papi.cpp index 1c8a17419fa..bfb5a16d00b 100644 --- a/core/log/papi.cpp +++ b/core/log/papi.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -256,6 +256,17 @@ void Papi::on_iteration_complete(const LinOp *solver, const LinOp *residual, const LinOp *solution, const LinOp *residual_norm) const +{ + this->on_iteration_complete(solver, num_iterations, residual, solution, + residual_norm, nullptr); +} + + +template +void Papi::on_iteration_complete( + const LinOp *solver, const size_type &num_iterations, const LinOp *residual, + const LinOp *solution, const LinOp *residual_norm, + const LinOp *implicit_sq_residual_norm) const { iteration_complete.get_counter(solver) = num_iterations; } diff --git a/core/log/record.cpp b/core/log/record.cpp index 48026c1563b..ba249edf3e6 100644 --- a/core/log/record.cpp +++ b/core/log/record.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -244,7 +244,8 @@ void Record::on_criterion_check_started( void Record::on_criterion_check_completed( const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, + const LinOp *residual, const LinOp *residual_norm, + const LinOp *implicit_residual_norm_sq, const LinOp *solution, const uint8 &stopping_id, const bool &set_finalized, const Array *status, const bool &oneChanged, const bool &converged) const @@ -257,15 +258,40 @@ void Record::on_criterion_check_completed( } +void Record::on_criterion_check_completed( + const stop::Criterion *criterion, const size_type &num_iterations, + const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, + const uint8 &stopping_id, const bool &set_finalized, + const Array *status, const bool &oneChanged, + const bool &converged) const +{ + this->on_criterion_check_completed( + criterion, num_iterations, residual, residual_norm, nullptr, solution, + stopping_id, set_finalized, status, oneChanged, converged); +} + + void Record::on_iteration_complete(const LinOp *solver, const size_type &num_iterations, const LinOp *residual, const LinOp *solution, const LinOp *residual_norm) const +{ + this->on_iteration_complete(solver, num_iterations, residual, solution, + residual_norm, nullptr); +} + + +void Record::on_iteration_complete(const LinOp *solver, + const size_type &num_iterations, + const LinOp *residual, const LinOp *solution, + const LinOp *residual_norm, + const LinOp *implicit_sq_residual_norm) const { append_deque( data_.iteration_completed, (std::unique_ptr(new iteration_complete_data{ - solver, num_iterations, residual, solution, residual_norm}))); + solver, num_iterations, residual, solution, residual_norm, + implicit_sq_residual_norm}))); } diff --git a/core/log/stream.cpp b/core/log/stream.cpp index 3cad7421aee..bc0c32a7c48 100644 --- a/core/log/stream.cpp +++ b/core/log/stream.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -428,12 +428,24 @@ void Stream::on_iteration_complete(const LinOp *solver, const LinOp *residual, const LinOp *solution, const LinOp *residual_norm) const +{ + this->on_iteration_complete(solver, num_iterations, residual, solution, + residual_norm, nullptr); +} + + +template +void Stream::on_iteration_complete( + const LinOp *solver, const size_type &num_iterations, const LinOp *residual, + const LinOp *solution, const LinOp *residual_norm, + const LinOp *implicit_sq_residual_norm) const { os_ << prefix_ << "iteration " << num_iterations << " completed with solver " << demangle_name(solver) << " with residual " << demangle_name(residual) << ", solution " - << demangle_name(solution) << " and residual_norm " - << demangle_name(residual_norm) << std::endl; + << demangle_name(solution) << ", residual_norm " + << demangle_name(residual_norm) << " and implicit_sq_residual_norm " + << demangle_name(implicit_sq_residual_norm) << std::endl; if (verbose_) { os_ << demangle_name(residual) << as>(residual) << std::endl; @@ -446,6 +458,11 @@ void Stream::on_iteration_complete(const LinOp *solver, << as>(residual_norm) << std::endl; } + if (implicit_sq_residual_norm != nullptr) { + os_ << demangle_name(implicit_sq_residual_norm) + << as>(implicit_sq_residual_norm) + << std::endl; + } } } diff --git a/core/matrix/coo.cpp b/core/matrix/coo.cpp index dd5a4dda553..efa2b160a92 100644 --- a/core/matrix/coo.cpp +++ b/core/matrix/coo.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -40,11 +40,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include +#include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" #include "core/matrix/coo_kernels.hpp" @@ -64,6 +66,10 @@ GKO_REGISTER_OPERATION(convert_to_csr, coo::convert_to_csr); GKO_REGISTER_OPERATION(convert_to_dense, coo::convert_to_dense); GKO_REGISTER_OPERATION(extract_diagonal, coo::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(inplace_absolute_array, + components::inplace_absolute_array); +GKO_REGISTER_OPERATION(outplace_absolute_array, + components::outplace_absolute_array); } // namespace coo @@ -72,8 +78,11 @@ GKO_REGISTER_OPERATION(fill_array, components::fill_array); template void Coo::apply_impl(const LinOp *b, LinOp *x) const { - using Dense = Dense; - this->get_executor()->run(coo::make_spmv(this, as(b), as(x))); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->get_executor()->run(coo::make_spmv(this, dense_b, dense_x)); + }, + b, x); } @@ -81,18 +90,23 @@ template void Coo::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - using Dense = Dense; - this->get_executor()->run(coo::make_advanced_spmv( - as(alpha), this, as(b), as(beta), as(x))); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + this->get_executor()->run(coo::make_advanced_spmv( + dense_alpha, this, dense_b, dense_beta, dense_x)); + }, + alpha, b, beta, x); } template void Coo::apply2_impl(const LinOp *b, LinOp *x) const { - using Dense = Dense; - this->get_executor()->run( - coo::make_spmv2(this, as(b), as(x))); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->get_executor()->run(coo::make_spmv2(this, dense_b, dense_x)); + }, + b, x); } @@ -100,9 +114,12 @@ template void Coo::apply2_impl(const LinOp *alpha, const LinOp *b, LinOp *x) const { - using Dense = Dense; - this->get_executor()->run(coo::make_advanced_spmv2( - as(alpha), this, as(b), as(x))); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_x) { + this->get_executor()->run( + coo::make_advanced_spmv2(dense_alpha, this, dense_b, dense_x)); + }, + alpha, b, x); } @@ -233,6 +250,35 @@ Coo::extract_diagonal() const } +template +void Coo::compute_absolute_inplace() +{ + auto exec = this->get_executor(); + + exec->run(coo::make_inplace_absolute_array( + this->get_values(), this->get_num_stored_elements())); +} + + +template +std::unique_ptr::absolute_type> +Coo::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto abs_coo = absolute_type::create(exec, this->get_size(), + this->get_num_stored_elements()); + + abs_coo->col_idxs_ = col_idxs_; + abs_coo->row_idxs_ = row_idxs_; + exec->run(coo::make_outplace_absolute_array(this->get_const_values(), + this->get_num_stored_elements(), + abs_coo->get_values())); + + return abs_coo; +} + + #define GKO_DECLARE_COO_MATRIX(ValueType, IndexType) \ class Coo GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_MATRIX); diff --git a/core/matrix/coo_builder.hpp b/core/matrix/coo_builder.hpp index de323ad42b6..7c4ce38cb89 100644 --- a/core/matrix/coo_builder.hpp +++ b/core/matrix/coo_builder.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/matrix/coo_kernels.hpp b/core/matrix/coo_kernels.hpp index 752798c4e2e..2f2134cf183 100644 --- a/core/matrix/coo_kernels.hpp +++ b/core/matrix/coo_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -140,6 +140,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace coo { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace coo +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index 1863eea2959..cd09474cf63 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -46,6 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" #include "core/matrix/csr_kernels.hpp" @@ -68,10 +70,11 @@ GKO_REGISTER_OPERATION(convert_to_ell, csr::convert_to_ell); GKO_REGISTER_OPERATION(convert_to_hybrid, csr::convert_to_hybrid); GKO_REGISTER_OPERATION(transpose, csr::transpose); GKO_REGISTER_OPERATION(conj_transpose, csr::conj_transpose); +GKO_REGISTER_OPERATION(inv_symm_permute, csr::inv_symm_permute); GKO_REGISTER_OPERATION(row_permute, csr::row_permute); -GKO_REGISTER_OPERATION(column_permute, csr::column_permute); GKO_REGISTER_OPERATION(inverse_row_permute, csr::inverse_row_permute); GKO_REGISTER_OPERATION(inverse_column_permute, csr::inverse_column_permute); +GKO_REGISTER_OPERATION(invert_permutation, csr::invert_permutation); GKO_REGISTER_OPERATION(calculate_max_nnz_per_row, csr::calculate_max_nnz_per_row); GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, @@ -81,6 +84,10 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index, csr::is_sorted_by_column_index); GKO_REGISTER_OPERATION(extract_diagonal, csr::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(inplace_absolute_array, + components::inplace_absolute_array); +GKO_REGISTER_OPERATION(outplace_absolute_array, + components::outplace_absolute_array); } // namespace csr @@ -89,16 +96,19 @@ GKO_REGISTER_OPERATION(fill_array, components::fill_array); template void Csr::apply_impl(const LinOp *b, LinOp *x) const { - using Dense = Dense; + using ComplexDense = Dense>; using TCsr = Csr; if (auto b_csr = dynamic_cast(b)) { // if b is a CSR matrix, we compute a SpGeMM auto x_csr = as(x); this->get_executor()->run(csr::make_spgemm(this, b_csr, x_csr)); } else { - // otherwise we assume that b is dense and compute a SpMV/SpMM - this->get_executor()->run( - csr::make_spmv(this, as(b), as(x))); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->get_executor()->run( + csr::make_spmv(this, dense_b, dense_x)); + }, + b, x); } } @@ -107,26 +117,31 @@ template void Csr::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - using Dense = Dense; + using ComplexDense = Dense>; + using RealDense = Dense>; using TCsr = Csr; if (auto b_csr = dynamic_cast(b)) { // if b is a CSR matrix, we compute a SpGeMM auto x_csr = as(x); auto x_copy = x_csr->clone(); - this->get_executor()->run( - csr::make_advanced_spgemm(as(alpha), this, b_csr, - as(beta), x_copy.get(), x_csr)); + this->get_executor()->run(csr::make_advanced_spgemm( + as>(alpha), this, b_csr, + as>(beta), x_copy.get(), x_csr)); } else if (dynamic_cast *>(b)) { // if b is an identity matrix, we compute an SpGEAM auto x_csr = as(x); auto x_copy = x_csr->clone(); - this->get_executor()->run(csr::make_spgeam( - as(alpha), this, as(beta), lend(x_copy), x_csr)); - } else { - // otherwise we assume that b is dense and compute a SpMV/SpMM this->get_executor()->run( - csr::make_advanced_spmv(as(alpha), this, as(b), - as(beta), as(x))); + csr::make_spgeam(as>(alpha), this, + as>(beta), lend(x_copy), x_csr)); + } else { + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, + auto dense_x) { + this->get_executor()->run(csr::make_advanced_spmv( + dense_alpha, this, dense_b, dense_beta, dense_x)); + }, + alpha, b, beta, x); } } @@ -379,6 +394,48 @@ std::unique_ptr Csr::conj_transpose() const } +template +std::unique_ptr Csr::permute( + const Array *permutation_indices) const +{ + GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + auto exec = this->get_executor(); + auto permute_cpy = + Csr::create(exec, this->get_size(), this->get_num_stored_elements(), + this->get_strategy()); + Array inv_permutation(exec, this->get_size()[1]); + + exec->run(csr::make_invert_permutation( + this->get_size()[1], + make_temporary_clone(exec, permutation_indices)->get_const_data(), + inv_permutation.get_data())); + exec->run(csr::make_inv_symm_permute(inv_permutation.get_const_data(), this, + permute_cpy.get())); + permute_cpy->make_srow(); + return std::move(permute_cpy); +} + + +template +std::unique_ptr Csr::inverse_permute( + const Array *permutation_indices) const +{ + GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + auto exec = this->get_executor(); + auto permute_cpy = + Csr::create(exec, this->get_size(), this->get_num_stored_elements(), + this->get_strategy()); + + exec->run(csr::make_inv_symm_permute( + make_temporary_clone(exec, permutation_indices)->get_const_data(), this, + permute_cpy.get())); + permute_cpy->make_srow(); + return std::move(permute_cpy); +} + + template std::unique_ptr Csr::row_permute( const Array *permutation_indices) const @@ -389,8 +446,9 @@ std::unique_ptr Csr::row_permute( Csr::create(exec, this->get_size(), this->get_num_stored_elements(), this->get_strategy()); - exec->run( - csr::make_row_permute(permutation_indices, this, permute_cpy.get())); + exec->run(csr::make_row_permute( + make_temporary_clone(exec, permutation_indices)->get_const_data(), this, + permute_cpy.get())); permute_cpy->make_srow(); return std::move(permute_cpy); } @@ -405,27 +463,33 @@ std::unique_ptr Csr::column_permute( auto permute_cpy = Csr::create(exec, this->get_size(), this->get_num_stored_elements(), this->get_strategy()); - - exec->run( - csr::make_column_permute(permutation_indices, this, permute_cpy.get())); + Array inv_permutation(exec, this->get_size()[1]); + + exec->run(csr::make_invert_permutation( + this->get_size()[1], + make_temporary_clone(exec, permutation_indices)->get_const_data(), + inv_permutation.get_data())); + exec->run(csr::make_inverse_column_permute(inv_permutation.get_const_data(), + this, permute_cpy.get())); permute_cpy->make_srow(); + permute_cpy->sort_by_column_index(); return std::move(permute_cpy); } template std::unique_ptr Csr::inverse_row_permute( - const Array *inverse_permutation_indices) const + const Array *permutation_indices) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[0]); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); auto exec = this->get_executor(); auto inverse_permute_cpy = Csr::create(exec, this->get_size(), this->get_num_stored_elements(), this->get_strategy()); - exec->run(csr::make_inverse_row_permute(inverse_permutation_indices, this, - inverse_permute_cpy.get())); + exec->run(csr::make_inverse_row_permute( + make_temporary_clone(exec, permutation_indices)->get_const_data(), this, + inverse_permute_cpy.get())); inverse_permute_cpy->make_srow(); return std::move(inverse_permute_cpy); } @@ -433,18 +497,19 @@ std::unique_ptr Csr::inverse_row_permute( template std::unique_ptr Csr::inverse_column_permute( - const Array *inverse_permutation_indices) const + const Array *permutation_indices) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[1]); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); auto exec = this->get_executor(); auto inverse_permute_cpy = Csr::create(exec, this->get_size(), this->get_num_stored_elements(), this->get_strategy()); exec->run(csr::make_inverse_column_permute( - inverse_permutation_indices, this, inverse_permute_cpy.get())); + make_temporary_clone(exec, permutation_indices)->get_const_data(), this, + inverse_permute_cpy.get())); inverse_permute_cpy->make_srow(); + inverse_permute_cpy->sort_by_column_index(); return std::move(inverse_permute_cpy); } @@ -482,6 +547,36 @@ Csr::extract_diagonal() const } +template +void Csr::compute_absolute_inplace() +{ + auto exec = this->get_executor(); + + exec->run(csr::make_inplace_absolute_array( + this->get_values(), this->get_num_stored_elements())); +} + + +template +std::unique_ptr::absolute_type> +Csr::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto abs_csr = absolute_type::create(exec, this->get_size(), + this->get_num_stored_elements()); + + abs_csr->col_idxs_ = col_idxs_; + abs_csr->row_ptrs_ = row_ptrs_; + exec->run(csr::make_outplace_absolute_array(this->get_const_values(), + this->get_num_stored_elements(), + abs_csr->get_values())); + + convert_strategy_helper(abs_csr.get()); + return abs_csr; +} + + #define GKO_DECLARE_CSR_MATRIX(ValueType, IndexType) \ class Csr GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_MATRIX); diff --git a/core/matrix/csr_builder.hpp b/core/matrix/csr_builder.hpp index 73f892dc3a8..37b4ac76f24 100644 --- a/core/matrix/csr_builder.hpp +++ b/core/matrix/csr_builder.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 92d9f462dfd..89cb7fd7e7c 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -129,31 +129,36 @@ namespace kernels { const matrix::Csr *orig, \ matrix::Csr *trans) +#define GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType) \ + void inv_symm_permute(std::shared_ptr exec, \ + const IndexType *permutation_indices, \ + const matrix::Csr *orig, \ + matrix::Csr *permuted) + #define GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ void row_permute(std::shared_ptr exec, \ - const Array *permutation_indices, \ + const IndexType *permutation_indices, \ const matrix::Csr *orig, \ matrix::Csr *row_permuted) -#define GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ - void column_permute(std::shared_ptr exec, \ - const Array *permutation_indices, \ - const matrix::Csr *orig, \ - matrix::Csr *column_permuted) - #define GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ void inverse_row_permute(std::shared_ptr exec, \ - const Array *permutation_indices, \ + const IndexType *permutation_indices, \ const matrix::Csr *orig, \ matrix::Csr *row_permuted) #define GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ void inverse_column_permute( \ std::shared_ptr exec, \ - const Array *permutation_indices, \ + const IndexType *permutation_indices, \ const matrix::Csr *orig, \ matrix::Csr *column_permuted) +#define GKO_DECLARE_INVERT_PERMUTATION_KERNEL(IndexType) \ + void invert_permutation( \ + std::shared_ptr exec, size_type size, \ + const IndexType *permutation_indices, IndexType *inv_permutation) + #define GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType) \ void calculate_max_nnz_per_row( \ std::shared_ptr exec, \ @@ -208,13 +213,15 @@ namespace kernels { template \ GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ + GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_INVERT_PERMUTATION_KERNEL(IndexType); \ template \ GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType); \ template \ @@ -263,6 +270,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace csr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace csr +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index aab1d2e598a..abf0fe57ac5 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -41,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -61,10 +63,16 @@ namespace dense { GKO_REGISTER_OPERATION(simple_apply, dense::simple_apply); GKO_REGISTER_OPERATION(apply, dense::apply); +GKO_REGISTER_OPERATION(copy, dense::copy); +GKO_REGISTER_OPERATION(fill, dense::fill); GKO_REGISTER_OPERATION(scale, dense::scale); +GKO_REGISTER_OPERATION(inv_scale, dense::inv_scale); GKO_REGISTER_OPERATION(add_scaled, dense::add_scaled); +GKO_REGISTER_OPERATION(sub_scaled, dense::sub_scaled); GKO_REGISTER_OPERATION(add_scaled_diag, dense::add_scaled_diag); +GKO_REGISTER_OPERATION(sub_scaled_diag, dense::sub_scaled_diag); GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot); +GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot); GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2); GKO_REGISTER_OPERATION(count_nonzeros, dense::count_nonzeros); GKO_REGISTER_OPERATION(calculate_max_nnz_per_row, @@ -74,7 +82,9 @@ GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, GKO_REGISTER_OPERATION(calculate_total_cols, dense::calculate_total_cols); GKO_REGISTER_OPERATION(transpose, dense::transpose); GKO_REGISTER_OPERATION(conj_transpose, dense::conj_transpose); -GKO_REGISTER_OPERATION(row_permute, dense::row_permute); +GKO_REGISTER_OPERATION(symm_permute, dense::symm_permute); +GKO_REGISTER_OPERATION(inv_symm_permute, dense::inv_symm_permute); +GKO_REGISTER_OPERATION(row_gather, dense::row_gather); GKO_REGISTER_OPERATION(column_permute, dense::column_permute); GKO_REGISTER_OPERATION(inverse_row_permute, dense::inverse_row_permute); GKO_REGISTER_OPERATION(inverse_column_permute, dense::inverse_column_permute); @@ -85,6 +95,11 @@ GKO_REGISTER_OPERATION(convert_to_hybrid, dense::convert_to_hybrid); GKO_REGISTER_OPERATION(convert_to_sellp, dense::convert_to_sellp); GKO_REGISTER_OPERATION(convert_to_sparsity_csr, dense::convert_to_sparsity_csr); GKO_REGISTER_OPERATION(extract_diagonal, dense::extract_diagonal); +GKO_REGISTER_OPERATION(inplace_absolute_dense, dense::inplace_absolute_dense); +GKO_REGISTER_OPERATION(outplace_absolute_dense, dense::outplace_absolute_dense); +GKO_REGISTER_OPERATION(make_complex, dense::make_complex); +GKO_REGISTER_OPERATION(get_real, dense::get_real); +GKO_REGISTER_OPERATION(get_imag, dense::get_imag); } // namespace dense @@ -213,8 +228,12 @@ inline void conversion_helper(SparsityCsr *result, template void Dense::apply_impl(const LinOp *b, LinOp *x) const { - this->get_executor()->run(dense::make_simple_apply( - this, as>(b), as>(x))); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->get_executor()->run( + dense::make_simple_apply(this, dense_b, dense_x)); + }, + b, x); } @@ -222,9 +241,33 @@ template void Dense::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - this->get_executor()->run(dense::make_apply( - as>(alpha), this, as>(b), - as>(beta), as>(x))); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + this->get_executor()->run(dense::make_apply( + dense_alpha, this, dense_b, dense_beta, dense_x)); + }, + alpha, b, beta, x); +} + + +template +void Dense::fill(const ValueType value) +{ + this->get_executor()->run(dense::make_fill(this, value)); +} + + +template +void Dense::inv_scale_impl(const LinOp *alpha) +{ + GKO_ASSERT_EQUAL_ROWS(alpha, dim<2>(1, 1)); + if (alpha->get_size()[1] != 1) { + // different alpha for each column + GKO_ASSERT_EQUAL_COLS(this, alpha); + } + auto exec = this->get_executor(); + exec->run(dense::make_inv_scale( + make_temporary_conversion(alpha).get(), this)); } @@ -237,7 +280,8 @@ void Dense::scale_impl(const LinOp *alpha) GKO_ASSERT_EQUAL_COLS(this, alpha); } auto exec = this->get_executor(); - exec->run(dense::make_scale(as>(alpha), this)); + exec->run(dense::make_scale( + make_temporary_conversion(alpha).get(), this)); } @@ -256,11 +300,34 @@ void Dense::add_scaled_impl(const LinOp *alpha, const LinOp *b) exec->run(dense::make_add_scaled_diag( as>(alpha), dynamic_cast *>(b), this)); - return; + } else { + exec->run(dense::make_add_scaled( + make_temporary_conversion(alpha).get(), + make_temporary_conversion(b).get(), this)); } +} + - exec->run(dense::make_add_scaled(as>(alpha), - as>(b), this)); +template +void Dense::sub_scaled_impl(const LinOp *alpha, const LinOp *b) +{ + GKO_ASSERT_EQUAL_ROWS(alpha, dim<2>(1, 1)); + if (alpha->get_size()[1] != 1) { + // different alpha for each column + GKO_ASSERT_EQUAL_COLS(this, alpha); + } + GKO_ASSERT_EQUAL_DIMENSIONS(this, b); + auto exec = this->get_executor(); + + if (dynamic_cast *>(b)) { + exec->run(dense::make_sub_scaled_diag( + as>(alpha), + dynamic_cast *>(b), this)); + } else { + exec->run(dense::make_sub_scaled( + make_temporary_conversion(alpha).get(), + make_temporary_conversion(b).get(), this)); + } } @@ -270,19 +337,65 @@ void Dense::compute_dot_impl(const LinOp *b, LinOp *result) const GKO_ASSERT_EQUAL_DIMENSIONS(this, b); GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); auto exec = this->get_executor(); - exec->run(dense::make_compute_dot(this, as>(b), - as>(result))); + auto dense_b = make_temporary_conversion(b); + auto dense_res = make_temporary_conversion(result); + exec->run(dense::make_compute_dot(this, dense_b.get(), dense_res.get())); +} + + +template +void Dense::compute_conj_dot_impl(const LinOp *b, + LinOp *result) const +{ + GKO_ASSERT_EQUAL_DIMENSIONS(this, b); + GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); + auto exec = this->get_executor(); + auto dense_b = make_temporary_conversion(b); + auto dense_res = make_temporary_conversion(result); + exec->run( + dense::make_compute_conj_dot(this, dense_b.get(), dense_res.get())); } template void Dense::compute_norm2_impl(LinOp *result) const { - using NormVector = Dense>; GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); auto exec = this->get_executor(); - exec->run(dense::make_compute_norm2(as>(this), - as(result))); + auto dense_res = + make_temporary_conversion>(result); + exec->run(dense::make_compute_norm2(this, dense_res.get())); +} + + +template +void Dense::convert_to(Dense *result) const +{ + if (this->get_size() && result->get_size() == this->get_size()) { + // we need to create a executor-local clone of the target data, that + // will be copied back later. + auto exec = this->get_executor(); + auto result_array = make_temporary_output_clone(exec, &result->values_); + // create a (value, not pointer to avoid allocation overhead) view + // matrix on the array to avoid special-casing cross-executor copies + auto tmp_result = + Dense{exec, result->get_size(), + Array::view(exec, result_array->get_num_elems(), + result_array->get_data()), + result->get_stride()}; + exec->run(dense::make_copy(this, &tmp_result)); + } else { + result->values_ = this->values_; + result->stride_ = this->stride_; + result->set_size(this->get_size()); + } +} + + +template +void Dense::move_to(Dense *result) +{ + this->convert_to(result); } @@ -290,9 +403,15 @@ template void Dense::convert_to( Dense> *result) const { - result->values_ = this->values_; - result->stride_ = this->stride_; - result->set_size(this->get_size()); + if (result->get_size() == this->get_size()) { + auto exec = this->get_executor(); + exec->run(dense::make_copy( + this, make_temporary_output_clone(exec, result).get())); + } else { + result->values_ = this->values_; + result->stride_ = this->stride_; + result->set_size(this->get_size()); + } } @@ -596,53 +715,229 @@ void Dense::write(mat_data32 &data) const template std::unique_ptr Dense::transpose() const { + auto result = + Dense::create(this->get_executor(), gko::transpose(this->get_size())); + this->transpose(result.get()); + return result; +} + + +template +std::unique_ptr Dense::conj_transpose() const +{ + auto result = + Dense::create(this->get_executor(), gko::transpose(this->get_size())); + this->conj_transpose(result.get()); + return result; +} + + +template +void Dense::transpose(Dense *output) const +{ + GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size())); auto exec = this->get_executor(); - auto trans_cpy = Dense::create(exec, gko::transpose(this->get_size())); + exec->run(dense::make_transpose( + this, make_temporary_output_clone(exec, output).get())); +} - exec->run(dense::make_transpose(this, trans_cpy.get())); - return std::move(trans_cpy); +template +void Dense::conj_transpose(Dense *output) const +{ + GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size())); + auto exec = this->get_executor(); + exec->run(dense::make_conj_transpose( + this, make_temporary_output_clone(exec, output).get())); } template -std::unique_ptr Dense::conj_transpose() const +template +void Dense::permute_impl(const Array *permutation_indices, + Dense *output) const { + GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); auto exec = this->get_executor(); - auto trans_cpy = Dense::create(exec, gko::transpose(this->get_size())); - exec->run(dense::make_conj_transpose(this, trans_cpy.get())); - return std::move(trans_cpy); + exec->run(dense::make_symm_permute( + make_temporary_clone(exec, permutation_indices).get(), this, + make_temporary_output_clone(exec, output).get())); } template -std::unique_ptr Dense::row_permute( - const Array *permutation_indices) const +template +void Dense::inverse_permute_impl( + const Array *permutation_indices, Dense *output) const { + GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); - exec->run( - dense::make_row_permute(permutation_indices, this, permute_cpy.get())); + exec->run(dense::make_inv_symm_permute( + make_temporary_clone(exec, permutation_indices).get(), this, + make_temporary_output_clone(exec, output).get())); +} - return std::move(permute_cpy); + +template +template +void Dense::row_permute_impl( + const Array *permutation_indices, Dense *output) const +{ + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); + auto exec = this->get_executor(); + + exec->run(dense::make_row_gather( + make_temporary_clone(exec, permutation_indices).get(), this, + make_temporary_output_clone(exec, output).get())); } template -std::unique_ptr Dense::column_permute( - const Array *permutation_indices) const +template +void Dense::row_gather_impl(const Array *row_indices, + Dense *row_gathered) const +{ + auto exec = this->get_executor(); + dim<2> expected_dim{row_indices->get_num_elems(), this->get_size()[1]}; + GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, row_gathered); + + exec->run(dense::make_row_gather( + make_temporary_clone(exec, row_indices).get(), this, + make_temporary_output_clone(exec, row_gathered).get())); +} + + +template +template +void Dense::column_permute_impl( + const Array *permutation_indices, Dense *output) const +{ + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); + auto exec = this->get_executor(); + + exec->run(dense::make_column_permute( + make_temporary_clone(exec, permutation_indices).get(), this, + make_temporary_output_clone(exec, output).get())); +} + + +template +template +void Dense::inverse_row_permute_impl( + const Array *permutation_indices, Dense *output) const +{ + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); + auto exec = this->get_executor(); + + exec->run(dense::make_inverse_row_permute( + make_temporary_clone(exec, permutation_indices).get(), this, + make_temporary_output_clone(exec, output).get())); +} + + +template +template +void Dense::inverse_column_permute_impl( + const Array *permutation_indices, Dense *output) const { GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); - exec->run(dense::make_column_permute(permutation_indices, this, - permute_cpy.get())); + exec->run(dense::make_inverse_column_permute( + make_temporary_clone(exec, permutation_indices).get(), this, + make_temporary_output_clone(exec, output).get())); +} + + +template +std::unique_ptr Dense::permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->permute(permutation_indices, result.get()); + return result; +} + + +template +std::unique_ptr Dense::permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->permute(permutation_indices, result.get()); + return result; +} + + +template +void Dense::permute(const Array *permutation_indices, + Dense *output) const +{ + this->permute_impl(permutation_indices, output); +} + + +template +void Dense::permute(const Array *permutation_indices, + Dense *output) const +{ + this->permute_impl(permutation_indices, output); +} + + +template +std::unique_ptr Dense::inverse_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_permute(permutation_indices, result.get()); + return result; +} + + +template +std::unique_ptr Dense::inverse_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_permute(permutation_indices, result.get()); + return result; +} + + +template +void Dense::inverse_permute(const Array *permutation_indices, + Dense *output) const +{ + this->inverse_permute_impl(permutation_indices, output); +} - return std::move(permute_cpy); + +template +void Dense::inverse_permute(const Array *permutation_indices, + Dense *output) const +{ + this->inverse_permute_impl(permutation_indices, output); +} + + +template +std::unique_ptr Dense::row_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->row_permute(permutation_indices, result.get()); + return result; } @@ -650,14 +945,75 @@ template std::unique_ptr Dense::row_permute( const Array *permutation_indices) const { - GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + auto result = Dense::create(this->get_executor(), this->get_size()); + this->row_permute(permutation_indices, result.get()); + return result; +} + + +template +void Dense::row_permute(const Array *permutation_indices, + Dense *output) const +{ + this->row_permute_impl(permutation_indices, output); +} + + +template +void Dense::row_permute(const Array *permutation_indices, + Dense *output) const +{ + this->row_permute_impl(permutation_indices, output); +} + + +template +std::unique_ptr> Dense::row_gather( + const Array *row_indices) const +{ auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); + dim<2> out_dim{row_indices->get_num_elems(), this->get_size()[1]}; + auto result = Dense::create(exec, out_dim); + this->row_gather(row_indices, result.get()); + return result; +} + + +template +std::unique_ptr> Dense::row_gather( + const Array *row_indices) const +{ + auto exec = this->get_executor(); + dim<2> out_dim{row_indices->get_num_elems(), this->get_size()[1]}; + auto result = Dense::create(exec, out_dim); + this->row_gather(row_indices, result.get()); + return result; +} - exec->run( - dense::make_row_permute(permutation_indices, this, permute_cpy.get())); - return std::move(permute_cpy); +template +void Dense::row_gather(const Array *row_indices, + Dense *row_gathered) const +{ + this->row_gather_impl(row_indices, row_gathered); +} + + +template +void Dense::row_gather(const Array *row_indices, + Dense *row_gathered) const +{ + this->row_gather_impl(row_indices, row_gathered); +} + + +template +std::unique_ptr Dense::column_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->column_permute(permutation_indices, result.get()); + return result; } @@ -665,90 +1021,215 @@ template std::unique_ptr Dense::column_permute( const Array *permutation_indices) const { - GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); - auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); + auto result = Dense::create(this->get_executor(), this->get_size()); + this->column_permute(permutation_indices, result.get()); + return result; +} - exec->run(dense::make_column_permute(permutation_indices, this, - permute_cpy.get())); - return std::move(permute_cpy); +template +void Dense::column_permute(const Array *permutation_indices, + Dense *output) const +{ + this->column_permute_impl(permutation_indices, output); +} + + +template +void Dense::column_permute(const Array *permutation_indices, + Dense *output) const +{ + this->column_permute_impl(permutation_indices, output); } template std::unique_ptr Dense::inverse_row_permute( - const Array *inverse_permutation_indices) const + const Array *permutation_indices) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[0]); - auto exec = this->get_executor(); - auto inverse_permute_cpy = Dense::create(exec, this->get_size()); + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_row_permute(permutation_indices, result.get()); + return result; +} + + +template +std::unique_ptr Dense::inverse_row_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_row_permute(permutation_indices, result.get()); + return result; +} + + +template +void Dense::inverse_row_permute( + const Array *permutation_indices, Dense *output) const +{ + this->inverse_row_permute_impl(permutation_indices, output); +} - exec->run(dense::make_inverse_row_permute(inverse_permutation_indices, this, - inverse_permute_cpy.get())); - return std::move(inverse_permute_cpy); +template +void Dense::inverse_row_permute( + const Array *permutation_indices, Dense *output) const +{ + this->inverse_row_permute_impl(permutation_indices, output); } template std::unique_ptr Dense::inverse_column_permute( - const Array *inverse_permutation_indices) const + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_column_permute(permutation_indices, result.get()); + return result; +} + + +template +std::unique_ptr Dense::inverse_column_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_column_permute(permutation_indices, result.get()); + return result; +} + + +template +void Dense::inverse_column_permute( + const Array *permutation_indices, Dense *output) const +{ + this->inverse_column_permute_impl(permutation_indices, output); +} + + +template +void Dense::inverse_column_permute( + const Array *permutation_indices, Dense *output) const +{ + this->inverse_column_permute_impl(permutation_indices, output); +} + + +template +void Dense::extract_diagonal(Diagonal *output) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[1]); auto exec = this->get_executor(); - auto inverse_permute_cpy = Dense::create(exec, this->get_size()); + const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); + GKO_ASSERT_EQ(output->get_size()[0], diag_size); + + exec->run(dense::make_extract_diagonal( + this, make_temporary_output_clone(exec, output).get())); +} - exec->run(dense::make_inverse_column_permute( - inverse_permutation_indices, this, inverse_permute_cpy.get())); - return std::move(inverse_permute_cpy); +template +std::unique_ptr> Dense::extract_diagonal() const +{ + const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); + auto diag = Diagonal::create(this->get_executor(), diag_size); + this->extract_diagonal(diag.get()); + return diag; } template -std::unique_ptr Dense::inverse_row_permute( - const Array *inverse_permutation_indices) const +void Dense::compute_absolute_inplace() { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[0]); + this->get_executor()->run(dense::make_inplace_absolute_dense(this)); +} + + +template +std::unique_ptr::absolute_type> +Dense::compute_absolute() const +{ + // do not inherit the stride + auto result = absolute_type::create(this->get_executor(), this->get_size()); + this->compute_absolute(result.get()); + return result; +} + + +template +void Dense::compute_absolute( + Dense::absolute_type *output) const +{ + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); auto exec = this->get_executor(); - auto inverse_permute_cpy = Dense::create(exec, this->get_size()); - exec->run(dense::make_inverse_row_permute(inverse_permutation_indices, this, - inverse_permute_cpy.get())); + exec->run(dense::make_outplace_absolute_dense( + this, make_temporary_output_clone(exec, output).get())); +} + - return std::move(inverse_permute_cpy); +template +std::unique_ptr::complex_type> +Dense::make_complex() const +{ + auto result = complex_type::create(this->get_executor(), this->get_size()); + this->make_complex(result.get()); + return result; } template -std::unique_ptr Dense::inverse_column_permute( - const Array *inverse_permutation_indices) const +void Dense::make_complex( + typename Dense::complex_type *result) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[1]); + GKO_ASSERT_EQUAL_DIMENSIONS(this, result); auto exec = this->get_executor(); - auto inverse_permute_cpy = Dense::create(exec, this->get_size()); - exec->run(dense::make_inverse_column_permute( - inverse_permutation_indices, this, inverse_permute_cpy.get())); + exec->run(dense::make_make_complex( + this, make_temporary_output_clone(exec, result).get())); +} - return std::move(inverse_permute_cpy); + +template +std::unique_ptr::real_type> +Dense::get_real() const +{ + auto result = real_type::create(this->get_executor(), this->get_size()); + this->get_real(result.get()); + return result; } template -std::unique_ptr> Dense::extract_diagonal() const +void Dense::get_real( + typename Dense::real_type *result) const { + GKO_ASSERT_EQUAL_DIMENSIONS(this, result); auto exec = this->get_executor(); - const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); - auto diag = Diagonal::create(exec, diag_size); - exec->run(dense::make_extract_diagonal(this, lend(diag))); - return diag; + exec->run(dense::make_get_real( + this, make_temporary_output_clone(exec, result).get())); +} + + +template +std::unique_ptr::real_type> +Dense::get_imag() const +{ + auto result = real_type::create(this->get_executor(), this->get_size()); + this->get_imag(result.get()); + return result; +} + + +template +void Dense::get_imag( + typename Dense::real_type *result) const +{ + GKO_ASSERT_EQUAL_DIMENSIONS(this, result); + auto exec = this->get_executor(); + + exec->run(dense::make_get_imag( + this, make_temporary_output_clone(exec, result).get())); } @@ -757,6 +1238,4 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_MATRIX); } // namespace matrix - - } // namespace gko diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 8edeec6878e..74633826cac 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + #include #include #include @@ -57,27 +60,57 @@ namespace kernels { const matrix::Dense<_type> *a, const matrix::Dense<_type> *b, \ const matrix::Dense<_type> *beta, matrix::Dense<_type> *c) +#define GKO_DECLARE_DENSE_COPY_KERNEL(_intype, _outtype) \ + void copy(std::shared_ptr exec, \ + const matrix::Dense<_intype> *input, \ + matrix::Dense<_outtype> *output) + +#define GKO_DECLARE_DENSE_FILL_KERNEL(_type) \ + void fill(std::shared_ptr exec, \ + matrix::Dense<_type> *mat, _type value) + #define GKO_DECLARE_DENSE_SCALE_KERNEL(_type) \ void scale(std::shared_ptr exec, \ const matrix::Dense<_type> *alpha, matrix::Dense<_type> *x) +#define GKO_DECLARE_DENSE_INV_SCALE_KERNEL(_type) \ + void inv_scale(std::shared_ptr exec, \ + const matrix::Dense<_type> *alpha, matrix::Dense<_type> *x) + #define GKO_DECLARE_DENSE_ADD_SCALED_KERNEL(_type) \ void add_scaled(std::shared_ptr exec, \ const matrix::Dense<_type> *alpha, \ const matrix::Dense<_type> *x, matrix::Dense<_type> *y) +#define GKO_DECLARE_DENSE_SUB_SCALED_KERNEL(_type) \ + void sub_scaled(std::shared_ptr exec, \ + const matrix::Dense<_type> *alpha, \ + const matrix::Dense<_type> *x, matrix::Dense<_type> *y) + #define GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL(_type) \ void add_scaled_diag(std::shared_ptr exec, \ const matrix::Dense<_type> *alpha, \ const matrix::Diagonal<_type> *x, \ matrix::Dense<_type> *y) +#define GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL(_type) \ + void sub_scaled_diag(std::shared_ptr exec, \ + const matrix::Dense<_type> *alpha, \ + const matrix::Diagonal<_type> *x, \ + matrix::Dense<_type> *y) + #define GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(_type) \ void compute_dot(std::shared_ptr exec, \ const matrix::Dense<_type> *x, \ const matrix::Dense<_type> *y, \ matrix::Dense<_type> *result) +#define GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(_type) \ + void compute_conj_dot(std::shared_ptr exec, \ + const matrix::Dense<_type> *x, \ + const matrix::Dense<_type> *y, \ + matrix::Dense<_type> *result) + #define GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL(_type) \ void compute_norm2(std::shared_ptr exec, \ const matrix::Dense<_type> *x, \ @@ -133,59 +166,109 @@ namespace kernels { size_type *result, size_type stride_factor, \ size_type slice_size) -#define GKO_DECLARE_TRANSPOSE_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_TRANSPOSE_KERNEL(_type) \ void transpose(std::shared_ptr exec, \ const matrix::Dense<_type> *orig, \ matrix::Dense<_type> *trans) -#define GKO_DECLARE_CONJ_TRANSPOSE_KERNEL(_type) \ +#define GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL(_type) \ void conj_transpose(std::shared_ptr exec, \ const matrix::Dense<_type> *orig, \ matrix::Dense<_type> *trans) -#define GKO_DECLARE_ROW_PERMUTE_KERNEL(_vtype, _itype) \ - void row_permute(std::shared_ptr exec, \ - const Array<_itype> *permutation_indices, \ - const matrix::Dense<_vtype> *orig, \ - matrix::Dense<_vtype> *row_permuted) +#define GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(_vtype, _itype) \ + void symm_permute(std::shared_ptr exec, \ + const Array<_itype> *permutation_indices, \ + const matrix::Dense<_vtype> *orig, \ + matrix::Dense<_vtype> *permuted) + +#define GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(_vtype, _itype) \ + void inv_symm_permute(std::shared_ptr exec, \ + const Array<_itype> *permutation_indices, \ + const matrix::Dense<_vtype> *orig, \ + matrix::Dense<_vtype> *permuted) + +#define GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(_vtype, _itype) \ + void row_gather(std::shared_ptr exec, \ + const Array<_itype> *gather_indices, \ + const matrix::Dense<_vtype> *orig, \ + matrix::Dense<_vtype> *row_gathered) -#define GKO_DECLARE_COLUMN_PERMUTE_KERNEL(_vtype, _itype) \ +#define GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL(_vtype, _itype) \ void column_permute(std::shared_ptr exec, \ const Array<_itype> *permutation_indices, \ const matrix::Dense<_vtype> *orig, \ matrix::Dense<_vtype> *column_permuted) -#define GKO_DECLARE_INVERSE_ROW_PERMUTE_KERNEL(_vtype, _itype) \ +#define GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL(_vtype, _itype) \ void inverse_row_permute(std::shared_ptr exec, \ const Array<_itype> *permutation_indices, \ const matrix::Dense<_vtype> *orig, \ matrix::Dense<_vtype> *row_permuted) -#define GKO_DECLARE_INVERSE_COLUMN_PERMUTE_KERNEL(_vtype, _itype) \ +#define GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL(_vtype, _itype) \ void inverse_column_permute(std::shared_ptr exec, \ const Array<_itype> *permutation_indices, \ const matrix::Dense<_vtype> *orig, \ matrix::Dense<_vtype> *column_permuted) -#define GKO_DECLARE_EXTRACT_DIAGONAL_KERNEL(_vtype) \ +#define GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL(_vtype) \ void extract_diagonal(std::shared_ptr exec, \ const matrix::Dense<_vtype> *orig, \ matrix::Diagonal<_vtype> *diag) +#define GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL(_vtype) \ + void inplace_absolute_dense(std::shared_ptr exec, \ + matrix::Dense<_vtype> *source) + +#define GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL(_vtype) \ + void outplace_absolute_dense( \ + std::shared_ptr exec, \ + const matrix::Dense<_vtype> *source, \ + matrix::Dense> *result) + +#define GKO_DECLARE_MAKE_COMPLEX_KERNEL(_vtype) \ + void make_complex(std::shared_ptr exec, \ + const matrix::Dense<_vtype> *source, \ + matrix::Dense> *result) + +#define GKO_DECLARE_GET_REAL_KERNEL(_vtype) \ + void get_real(std::shared_ptr exec, \ + const matrix::Dense<_vtype> *source, \ + matrix::Dense> *result) + +#define GKO_DECLARE_GET_IMAG_KERNEL(_vtype) \ + void get_imag(std::shared_ptr exec, \ + const matrix::Dense<_vtype> *source, \ + matrix::Dense> *result) + + #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_APPLY_KERNEL(ValueType); \ + template \ + GKO_DECLARE_DENSE_COPY_KERNEL(InValueType, OutValueType); \ + template \ + GKO_DECLARE_DENSE_FILL_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_SCALE_KERNEL(ValueType); \ template \ + GKO_DECLARE_DENSE_INV_SCALE_KERNEL(ValueType); \ + template \ GKO_DECLARE_DENSE_ADD_SCALED_KERNEL(ValueType); \ template \ + GKO_DECLARE_DENSE_SUB_SCALED_KERNEL(ValueType); \ + template \ GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL(ValueType); \ template \ + GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL(ValueType); \ + template \ GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(ValueType); \ template \ + GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(ValueType); \ + template \ GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL(ValueType, IndexType); \ @@ -208,19 +291,33 @@ namespace kernels { template \ GKO_DECLARE_DENSE_CALCULATE_TOTAL_COLS_KERNEL(ValueType); \ template \ - GKO_DECLARE_TRANSPOSE_KERNEL(ValueType); \ + GKO_DECLARE_DENSE_TRANSPOSE_KERNEL(ValueType); \ template \ - GKO_DECLARE_CONJ_TRANSPOSE_KERNEL(ValueType); \ + GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL(ValueType); \ + template \ + GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ + GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ + GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ + GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ template \ - GKO_DECLARE_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ + GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL(ValueType); \ + template \ + GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL(ValueType); \ template \ - GKO_DECLARE_EXTRACT_DIAGONAL_KERNEL(ValueType) + GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL(ValueType); \ + template \ + GKO_DECLARE_MAKE_COMPLEX_KERNEL(ValueType); \ + template \ + GKO_DECLARE_GET_REAL_KERNEL(ValueType); \ + template \ + GKO_DECLARE_GET_IMAG_KERNEL(ValueType) namespace omp { @@ -259,6 +356,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace dense { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace dense +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/matrix/diagonal.cpp b/core/matrix/diagonal.cpp index 04130cd4604..8a6d36a8759 100644 --- a/core/matrix/diagonal.cpp +++ b/core/matrix/diagonal.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,10 +34,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include +#include "core/components/absolute_array.hpp" #include "core/matrix/diagonal_kernels.hpp" @@ -52,6 +54,10 @@ GKO_REGISTER_OPERATION(apply_to_csr, diagonal::apply_to_csr); GKO_REGISTER_OPERATION(right_apply_to_csr, diagonal::right_apply_to_csr); GKO_REGISTER_OPERATION(convert_to_csr, diagonal::convert_to_csr); GKO_REGISTER_OPERATION(conj_transpose, diagonal::conj_transpose); +GKO_REGISTER_OPERATION(inplace_absolute_array, + components::inplace_absolute_array); +GKO_REGISTER_OPERATION(outplace_absolute_array, + components::outplace_absolute_array); } // namespace diagonal @@ -62,12 +68,8 @@ void Diagonal::apply_impl(const LinOp *b, LinOp *x) const { auto exec = this->get_executor(); - if (dynamic_cast *>(b) && - dynamic_cast *>(x)) { - exec->run(diagonal::make_apply_to_dense(this, as>(b), - as>(x))); - } else if (dynamic_cast *>(b) && - dynamic_cast *>(x)) { + if (dynamic_cast *>(b) && + dynamic_cast *>(x)) { exec->run(diagonal::make_apply_to_csr( this, as>(b), as>(x))); } else if (dynamic_cast *>(b) && @@ -75,7 +77,12 @@ void Diagonal::apply_impl(const LinOp *b, LinOp *x) const exec->run(diagonal::make_apply_to_csr( this, as>(b), as>(x))); } else { - GKO_NOT_IMPLEMENTED; + precision_dispatch_real_complex( + [this, &exec](auto dense_b, auto dense_x) { + exec->run( + diagonal::make_apply_to_dense(this, dense_b, dense_x)); + }, + b, x); } } @@ -85,12 +92,8 @@ void Diagonal::rapply_impl(const LinOp *b, LinOp *x) const { auto exec = this->get_executor(); - if (dynamic_cast *>(b) && - dynamic_cast *>(x)) { - exec->run(diagonal::make_right_apply_to_dense( - this, as>(b), as>(x))); - } else if (dynamic_cast *>(b) && - dynamic_cast *>(x)) { + if (dynamic_cast *>(b) && + dynamic_cast *>(x)) { exec->run(diagonal::make_right_apply_to_csr( this, as>(b), as>(x))); } else if (dynamic_cast *>(b) && @@ -98,7 +101,14 @@ void Diagonal::rapply_impl(const LinOp *b, LinOp *x) const exec->run(diagonal::make_right_apply_to_csr( this, as>(b), as>(x))); } else { - GKO_NOT_IMPLEMENTED; + // no real-to-complex conversion, as this would require doubling the + // diagonal entries for the complex-to-real columns + precision_dispatch( + [this, &exec](auto dense_b, auto dense_x) { + exec->run(diagonal::make_right_apply_to_dense(this, dense_b, + dense_x)); + }, + b, x); } } @@ -107,14 +117,14 @@ template void Diagonal::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - if (dynamic_cast *>(b) && - dynamic_cast *>(x)) { - auto dense_x = as>(x); - dense_x->scale(beta); - dense_x->add_scaled(alpha, b); - } else { - GKO_NOT_IMPLEMENTED; - } + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); } @@ -136,6 +146,22 @@ std::unique_ptr Diagonal::conj_transpose() const } +template +void Diagonal::convert_to( + Diagonal> *result) const +{ + result->values_ = this->values_; + result->set_size(this->get_size()); +} + + +template +void Diagonal::move_to(Diagonal> *result) +{ + this->convert_to(result); +} + + template void Diagonal::convert_to(Csr *result) const { @@ -261,9 +287,52 @@ void Diagonal::write(mat_data32 &data) const } +template +void Diagonal::compute_absolute_inplace() +{ + auto exec = this->get_executor(); + + exec->run(diagonal::make_inplace_absolute_array(this->get_values(), + this->get_size()[0])); +} + + +template +std::unique_ptr::absolute_type> +Diagonal::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto abs_diagonal = absolute_type::create(exec, this->get_size()[0]); + + exec->run(diagonal::make_outplace_absolute_array( + this->get_const_values(), this->get_size()[0], + abs_diagonal->get_values())); + + return abs_diagonal; +} + + #define GKO_DECLARE_DIAGONAL_MATRIX(value_type) class Diagonal GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_MATRIX); } // namespace matrix + + +// Implement DiagonalExtractable for LinOp when Diagonal is complete class +template +std::unique_ptr DiagonalExtractable::extract_diagonal_linop() + const +{ + return this->extract_diagonal(); +} + + +#define GKO_DECLARE_DIAGONAL_EXTRACTABLE(value_type) \ + std::unique_ptr \ + DiagonalExtractable::extract_diagonal_linop() const +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_EXTRACTABLE); + + } // namespace gko diff --git a/core/matrix/diagonal_kernels.hpp b/core/matrix/diagonal_kernels.hpp index bb8d4c14e7d..3db4c42478b 100644 --- a/core/matrix/diagonal_kernels.hpp +++ b/core/matrix/diagonal_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -135,6 +135,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace diagonal { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace diagonal +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/matrix/ell.cpp b/core/matrix/ell.cpp index 5f7591421e1..fcb198106cd 100644 --- a/core/matrix/ell.cpp +++ b/core/matrix/ell.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -39,11 +39,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include +#include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" #include "core/matrix/ell_kernels.hpp" @@ -62,6 +64,10 @@ GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, ell::calculate_nonzeros_per_row); GKO_REGISTER_OPERATION(extract_diagonal, ell::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(inplace_absolute_array, + components::inplace_absolute_array); +GKO_REGISTER_OPERATION(outplace_absolute_array, + components::outplace_absolute_array); } // namespace ell @@ -96,8 +102,11 @@ size_type calculate_max_nnz_per_row( template void Ell::apply_impl(const LinOp *b, LinOp *x) const { - using Dense = Dense; - this->get_executor()->run(ell::make_spmv(this, as(b), as(x))); + mixed_precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->get_executor()->run(ell::make_spmv(this, dense_b, dense_x)); + }, + b, x); } @@ -105,9 +114,15 @@ template void Ell::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - using Dense = Dense; - this->get_executor()->run(ell::make_advanced_spmv( - as(alpha), this, as(b), as(beta), as(x))); + mixed_precision_dispatch_real_complex( + [this, alpha, beta](auto dense_b, auto dense_x) { + auto dense_alpha = make_temporary_conversion(alpha); + auto dense_beta = make_temporary_conversion< + typename std::decay_t::value_type>(beta); + this->get_executor()->run(ell::make_advanced_spmv( + dense_alpha.get(), this, dense_b, dense_beta.get(), dense_x)); + }, + b, x); } @@ -251,6 +266,35 @@ Ell::extract_diagonal() const } +template +void Ell::compute_absolute_inplace() +{ + auto exec = this->get_executor(); + + exec->run(ell::make_inplace_absolute_array( + this->get_values(), this->get_num_stored_elements())); +} + + +template +std::unique_ptr::absolute_type> +Ell::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto abs_ell = absolute_type::create( + exec, this->get_size(), this->get_num_stored_elements_per_row(), + this->get_stride()); + + abs_ell->col_idxs_ = col_idxs_; + exec->run(ell::make_outplace_absolute_array(this->get_const_values(), + this->get_num_stored_elements(), + abs_ell->get_values())); + + return abs_ell; +} + + #define GKO_DECLARE_ELL_MATRIX(ValueType, IndexType) \ class Ell GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ELL_MATRIX); diff --git a/core/matrix/ell_kernels.hpp b/core/matrix/ell_kernels.hpp index 049ed280b66..7cbe54b3720 100644 --- a/core/matrix/ell_kernels.hpp +++ b/core/matrix/ell_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,18 +46,21 @@ namespace gko { namespace kernels { -#define GKO_DECLARE_ELL_SPMV_KERNEL(ValueType, IndexType) \ - void spmv(std::shared_ptr exec, \ - const matrix::Ell *a, \ - const matrix::Dense *b, matrix::Dense *c) +#define GKO_DECLARE_ELL_SPMV_KERNEL(InputValueType, MatrixValueType, \ + OutputValueType, IndexType) \ + void spmv(std::shared_ptr exec, \ + const matrix::Ell *a, \ + const matrix::Dense *b, \ + matrix::Dense *c) -#define GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ - void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Ell *a, \ - const matrix::Dense *b, \ - const matrix::Dense *beta, \ - matrix::Dense *c) +#define GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL(InputValueType, MatrixValueType, \ + OutputValueType, IndexType) \ + void advanced_spmv(std::shared_ptr exec, \ + const matrix::Dense *alpha, \ + const matrix::Ell *a, \ + const matrix::Dense *b, \ + const matrix::Dense *beta, \ + matrix::Dense *c) #define GKO_DECLARE_ELL_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense(std::shared_ptr exec, \ @@ -87,10 +90,14 @@ namespace kernels { matrix::Diagonal *diag) #define GKO_DECLARE_ALL_AS_TEMPLATES \ - template \ - GKO_DECLARE_ELL_SPMV_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_ELL_SPMV_KERNEL(InputValueType, MatrixValueType, \ + OutputValueType, IndexType); \ + template \ + GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL(InputValueType, MatrixValueType, \ + OutputValueType, IndexType); \ template \ GKO_DECLARE_ELL_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType); \ template \ @@ -139,6 +146,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace ell { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace ell +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp new file mode 100644 index 00000000000..9e4a264b3e6 --- /dev/null +++ b/core/matrix/fbcsr.cpp @@ -0,0 +1,515 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" +#include "core/components/absolute_array.hpp" +#include "core/components/fill_array.hpp" +#include "core/matrix/fbcsr_kernels.hpp" + + +namespace gko { +namespace matrix { +namespace fbcsr { + + +GKO_REGISTER_OPERATION(spmv, fbcsr::spmv); +GKO_REGISTER_OPERATION(advanced_spmv, fbcsr::advanced_spmv); +GKO_REGISTER_OPERATION(convert_to_csr, fbcsr::convert_to_csr); +GKO_REGISTER_OPERATION(convert_to_dense, fbcsr::convert_to_dense); +GKO_REGISTER_OPERATION(transpose, fbcsr::transpose); +GKO_REGISTER_OPERATION(conj_transpose, fbcsr::conj_transpose); +GKO_REGISTER_OPERATION(calculate_max_nnz_per_row, + fbcsr::calculate_max_nnz_per_row); +GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, + fbcsr::calculate_nonzeros_per_row); +GKO_REGISTER_OPERATION(is_sorted_by_column_index, + fbcsr::is_sorted_by_column_index); +GKO_REGISTER_OPERATION(sort_by_column_index, fbcsr::sort_by_column_index); +GKO_REGISTER_OPERATION(extract_diagonal, fbcsr::extract_diagonal); +GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(inplace_absolute_array, + components::inplace_absolute_array); +GKO_REGISTER_OPERATION(outplace_absolute_array, + components::outplace_absolute_array); + + +} // namespace fbcsr + + +namespace detail { + + +/** + * @internal + * A lightweight dynamic block type on the host + * + * Currently used only while reading a FBCSR matrix from matrix_data. + * + * @tparam ValueType The numeric type of entries of the block + */ +template +class DenseBlock final { +public: + using value_type = ValueType; + + DenseBlock() = default; + + DenseBlock(const int num_rows, const int num_cols) + : nrows_{num_rows}, ncols_{num_cols}, vals_(num_rows * num_cols) + {} + + value_type &at(const int row, const int col) + { + return vals_[row + col * nrows_]; + } + + const value_type &at(const int row, const int col) const + { + return vals_[row + col * nrows_]; + } + + value_type &operator()(const int row, const int col) + { + return at(row, col); + } + + const value_type &operator()(const int row, const int col) const + { + return at(row, col); + } + + int size() const { return nrows_ * ncols_; } + + void resize(const int nrows, const int ncols) + { + vals_.resize(nrows * ncols); + nrows_ = nrows; + ncols_ = ncols; + } + + void zero() + { + std::fill(vals_.begin(), vals_.end(), static_cast(0)); + } + +private: + int nrows_ = 0; + int ncols_ = 0; + std::vector vals_; +}; + + +} // namespace detail + + +template +void Fbcsr::apply_impl(const LinOp *const b, + LinOp *const x) const +{ + using Dense = Dense; + if (auto b_fbcsr = dynamic_cast *>(b)) { + // if b is a FBCSR matrix, we need an SpGeMM + GKO_NOT_SUPPORTED(b_fbcsr); + } else { + // otherwise we assume that b is dense and compute a SpMV/SpMM + this->get_executor()->run( + fbcsr::make_spmv(this, as(b), as(x))); + } +} + + +template +void Fbcsr::apply_impl(const LinOp *const alpha, + const LinOp *const b, + const LinOp *const beta, + LinOp *const x) const +{ + using Dense = Dense; + if (auto b_fbcsr = dynamic_cast *>(b)) { + // if b is a FBCSR matrix, we need an SpGeMM + GKO_NOT_SUPPORTED(b_fbcsr); + } else if (auto b_ident = dynamic_cast *>(b)) { + // if b is an identity matrix, we need an SpGEAM + GKO_NOT_SUPPORTED(b_ident); + } else { + // otherwise we assume that b is dense and compute a SpMV/SpMM + this->get_executor()->run( + fbcsr::make_advanced_spmv(as(alpha), this, as(b), + as(beta), as(x))); + } +} + + +template +void Fbcsr::convert_to( + Fbcsr, IndexType> *const result) const +{ + result->values_ = this->values_; + result->col_idxs_ = this->col_idxs_; + result->row_ptrs_ = this->row_ptrs_; + result->set_size(this->get_size()); + result->bs_ = this->bs_; + result->nbcols_ = this->nbcols_; +} + + +template +void Fbcsr::move_to( + Fbcsr, IndexType> *const result) +{ + this->convert_to(result); +} + + +template +void Fbcsr::convert_to( + Dense *const result) const +{ + auto exec = this->get_executor(); + auto tmp = Dense::create(exec, this->get_size()); + exec->run(fbcsr::make_convert_to_dense(this, tmp.get())); + tmp->move_to(result); +} + + +template +void Fbcsr::move_to(Dense *const result) +{ + this->convert_to(result); +} + + +template +void Fbcsr::convert_to( + Csr *const result) const +{ + auto exec = this->get_executor(); + auto tmp = Csr::create( + exec, this->get_size(), this->get_num_stored_elements(), + result->get_strategy()); + exec->run(fbcsr::make_convert_to_csr(this, tmp.get())); + tmp->move_to(result); +} + + +template +void Fbcsr::move_to( + Csr *const result) +{ + this->convert_to(result); +} + + +template +void Fbcsr::convert_to( + SparsityCsr *const result) const +{ + auto exec = this->get_executor(); + auto tmp = SparsityCsr::create( + exec, + gko::dim<2>{static_cast(this->get_num_block_rows()), + static_cast(this->get_num_block_cols())}, + this->get_num_stored_blocks()); + + tmp->col_idxs_ = this->col_idxs_; + tmp->row_ptrs_ = this->row_ptrs_; + tmp->value_ = Array(exec, {one()}); + tmp->move_to(result); +} + + +template +void Fbcsr::move_to( + SparsityCsr *const result) +{ + this->convert_to(result); +} + + +/* + * Currently, this implementation is sequential and has complexity + * O(nnz log(nnz)). + * @note Can this be changed to a parallel O(nnz) implementation? + */ +template +void Fbcsr::read(const mat_data &data) +{ + GKO_ENSURE_IN_BOUNDS(data.nonzeros.size(), + std::numeric_limits::max()); + + const auto nnz = static_cast(data.nonzeros.size()); + const int bs = this->bs_; + + using Block_t = detail::DenseBlock; + + struct FbEntry { + index_type block_row; + index_type block_column; + }; + + struct FbLess { + bool operator()(const FbEntry &a, const FbEntry &b) const + { + if (a.block_row != b.block_row) + return a.block_row < b.block_row; + else + return a.block_column < b.block_column; + } + }; + + auto create_block_map = [nnz, bs](const mat_data &mdata) { + std::map blocks; + for (index_type inz = 0; inz < nnz; inz++) { + const index_type row = mdata.nonzeros[inz].row; + const index_type col = mdata.nonzeros[inz].column; + const value_type val = mdata.nonzeros[inz].value; + + const auto localrow = static_cast(row % bs); + const auto localcol = static_cast(col % bs); + const index_type blockrow = row / bs; + const index_type blockcol = col / bs; + + Block_t &nnzblk = blocks[{blockrow, blockcol}]; + if (nnzblk.size() == 0) { + nnzblk.resize(bs, bs); + nnzblk.zero(); + nnzblk(localrow, localcol) = val; + } else { + // If this does not happen, we re-visited a nonzero + assert(nnzblk(localrow, localcol) == gko::zero()); + nnzblk(localrow, localcol) = val; + } + } + return blocks; + }; + + const std::map blocks = create_block_map(data); + + auto tmp = Fbcsr::create(this->get_executor()->get_master(), data.size, + blocks.size() * bs * bs, bs); + + tmp->row_ptrs_.get_data()[0] = 0; + if (data.nonzeros.size() == 0) { + tmp->move_to(this); + return; + } + + index_type cur_brow = 0; + index_type cur_bnz = 0; + index_type cur_bcol = blocks.begin()->first.block_column; + const index_type num_brows = detail::get_num_blocks(bs, data.size[0]); + + acc::range> values( + std::array{blocks.size(), static_cast(bs), + static_cast(bs)}, + tmp->values_.get_data()); + + for (auto it = blocks.begin(); it != blocks.end(); it++) { + GKO_ENSURE_IN_BOUNDS(cur_brow, num_brows); + + tmp->col_idxs_.get_data()[cur_bnz] = it->first.block_column; + for (int ibr = 0; ibr < bs; ibr++) { + for (int jbr = 0; jbr < bs; jbr++) { + values(cur_bnz, ibr, jbr) = it->second(ibr, jbr); + } + } + if (it->first.block_row > cur_brow) { + tmp->row_ptrs_.get_data()[++cur_brow] = cur_bnz; + } else { + assert(cur_brow == it->first.block_row); + assert(cur_bcol <= it->first.block_column); + } + + cur_bcol = it->first.block_column; + cur_bnz++; + } + + tmp->row_ptrs_.get_data()[++cur_brow] = + static_cast(blocks.size()); + + assert(cur_brow == tmp->get_size()[0] / bs); + + tmp->move_to(this); +} + + +template +void Fbcsr::write(mat_data &data) const +{ + std::unique_ptr op{}; + const Fbcsr *tmp{}; + if (this->get_executor()->get_master() != this->get_executor()) { + op = this->clone(this->get_executor()->get_master()); + tmp = static_cast(op.get()); + } else { + tmp = this; + } + + data = {tmp->get_size(), {}}; + + const size_type nbnz = tmp->get_num_stored_blocks(); + const acc::range> vblocks( + std::array{nbnz, static_cast(bs_), + static_cast(bs_)}, + tmp->values_.get_const_data()); + + for (size_type brow = 0; brow < tmp->get_num_block_rows(); ++brow) { + const auto start = tmp->row_ptrs_.get_const_data()[brow]; + const auto end = tmp->row_ptrs_.get_const_data()[brow + 1]; + + for (int ib = 0; ib < bs_; ib++) { + const auto row = brow * bs_ + ib; + for (auto inz = start; inz < end; ++inz) { + for (int jb = 0; jb < bs_; jb++) { + const auto col = + tmp->col_idxs_.get_const_data()[inz] * bs_ + jb; + const auto val = vblocks(inz, ib, jb); + data.nonzeros.emplace_back(row, col, val); + } + } + } + } +} + + +template +std::unique_ptr Fbcsr::transpose() const +{ + auto exec = this->get_executor(); + auto trans_cpy = Fbcsr::create(exec, gko::transpose(this->get_size()), + this->get_num_stored_elements(), bs_); + + exec->run(fbcsr::make_transpose(this, trans_cpy.get())); + return std::move(trans_cpy); +} + + +template +std::unique_ptr Fbcsr::conj_transpose() const +{ + auto exec = this->get_executor(); + auto trans_cpy = Fbcsr::create(exec, gko::transpose(this->get_size()), + this->get_num_stored_elements(), bs_); + + exec->run(fbcsr::make_conj_transpose(this, trans_cpy.get())); + return std::move(trans_cpy); +} + + +template +void Fbcsr::sort_by_column_index() +{ + auto exec = this->get_executor(); + exec->run(fbcsr::make_sort_by_column_index(this)); +} + + +template +bool Fbcsr::is_sorted_by_column_index() const +{ + auto exec = this->get_executor(); + bool is_sorted; + exec->run(fbcsr::make_is_sorted_by_column_index(this, &is_sorted)); + return is_sorted; +} + + +template +std::unique_ptr> +Fbcsr::extract_diagonal() const +{ + auto exec = this->get_executor(); + + const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); + auto diag = Diagonal::create(exec, diag_size); + exec->run(fbcsr::make_fill_array(diag->get_values(), diag->get_size()[0], + zero())); + exec->run(fbcsr::make_extract_diagonal(this, lend(diag))); + return diag; +} + + +template +void Fbcsr::compute_absolute_inplace() +{ + auto exec = this->get_executor(); + + exec->run(fbcsr::make_inplace_absolute_array( + this->get_values(), this->get_num_stored_elements())); +} + + +template +std::unique_ptr::absolute_type> +Fbcsr::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto abs_fbcsr = absolute_type::create(exec, this->get_size(), + this->get_num_stored_elements(), + this->get_block_size()); + + abs_fbcsr->col_idxs_ = col_idxs_; + abs_fbcsr->row_ptrs_ = row_ptrs_; + exec->run(fbcsr::make_outplace_absolute_array( + this->get_const_values(), this->get_num_stored_elements(), + abs_fbcsr->get_values())); + + return abs_fbcsr; +} + + +#define GKO_DECLARE_FBCSR_MATRIX(ValueType, IndexType) \ + class Fbcsr +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_MATRIX); + + +} // namespace matrix +} // namespace gko diff --git a/core/matrix/fbcsr_builder.hpp b/core/matrix/fbcsr_builder.hpp new file mode 100644 index 00000000000..df10c2a3a57 --- /dev/null +++ b/core/matrix/fbcsr_builder.hpp @@ -0,0 +1,94 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_FBCSR_BUILDER_HPP_ +#define GKO_CORE_MATRIX_FBCSR_BUILDER_HPP_ + + +#include + + +namespace gko { +namespace matrix { + + +/** + * @internal + * + * Allows intrusive access to the arrays stored within a @ref Fbcsr matrix. + * + * @tparam ValueType the value type of the matrix + * @tparam IndexType the index type of the matrix + */ +template +class FbcsrBuilder { +public: + /** + * @return The column index array of the matrix. + */ + Array &get_col_idx_array() { return matrix_->col_idxs_; } + + /** + * @return The value array of the matrix. + */ + Array &get_value_array() { return matrix_->values_; } + + /** + * @return The (uniform) block size + */ + int get_block_size() const { return matrix_->bs_; } + + /** + * @param matrix An existing FBCSR matrix + * for which intrusive access is needed + */ + explicit FbcsrBuilder(Fbcsr *const matrix) + : matrix_{matrix} + {} + + ~FbcsrBuilder() = default; + + // make this type non-movable + FbcsrBuilder(const FbcsrBuilder &) = delete; + FbcsrBuilder(FbcsrBuilder &&) = delete; + FbcsrBuilder &operator=(const FbcsrBuilder &) = delete; + FbcsrBuilder &operator=(FbcsrBuilder &&) = delete; + +private: + Fbcsr *matrix_; +}; + + +} // namespace matrix +} // namespace gko + +#endif // GKO_CORE_MATRIX_FBCSR_BUILDER_HPP_ diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp new file mode 100644 index 00000000000..5d2492a3b37 --- /dev/null +++ b/core/matrix/fbcsr_kernels.hpp @@ -0,0 +1,189 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_FBCSR_KERNELS_HPP_ +#define GKO_CORE_MATRIX_FBCSR_KERNELS_HPP_ + + +#include + + +#include +#include +#include +#include +#include +#include + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType) \ + void spmv(std::shared_ptr exec, \ + const matrix::Fbcsr *a, \ + const matrix::Dense *b, matrix::Dense *c) + +#define GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ + void advanced_spmv(std::shared_ptr exec, \ + const matrix::Dense *alpha, \ + const matrix::Fbcsr *a, \ + const matrix::Dense *b, \ + const matrix::Dense *beta, \ + matrix::Dense *c) + +#define GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ + void convert_to_dense(std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + matrix::Dense *result) + +#define GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ + void convert_to_csr(std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + matrix::Csr *result) + +#define GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType) \ + void transpose(std::shared_ptr exec, \ + const matrix::Fbcsr *orig, \ + matrix::Fbcsr *trans) + +#define GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType) \ + void conj_transpose(std::shared_ptr exec, \ + const matrix::Fbcsr *orig, \ + matrix::Fbcsr *trans) + +#define GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, \ + IndexType) \ + void calculate_max_nnz_per_row( \ + std::shared_ptr exec, \ + const matrix::Fbcsr *source, size_type *result) + +#define GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, \ + IndexType) \ + void calculate_nonzeros_per_row( \ + std::shared_ptr exec, \ + const matrix::Fbcsr *source, \ + Array *result) + +#define GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ + void sort_by_column_index(std::shared_ptr exec, \ + matrix::Fbcsr *to_sort) + +#define GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) \ + void is_sorted_by_column_index( \ + std::shared_ptr exec, \ + const matrix::Fbcsr *to_check, bool *is_sorted) + +#define GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) \ + void extract_diagonal(std::shared_ptr exec, \ + const matrix::Fbcsr *orig, \ + matrix::Diagonal *diag) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType); \ + template \ + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) + + +namespace omp { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace omp + + +namespace cuda { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace cuda + + +namespace reference { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace reference + + +namespace hip { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace hip + + +namespace dpcpp { +namespace fbcsr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fbcsr +} // namespace dpcpp + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_MATRIX_FBCSR_KERNELS_HPP_ diff --git a/core/matrix/hybrid.cpp b/core/matrix/hybrid.cpp index d5f012b79c7..f6cf69f2955 100644 --- a/core/matrix/hybrid.cpp +++ b/core/matrix/hybrid.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -39,10 +39,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include +#include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" #include "core/matrix/coo_kernels.hpp" #include "core/matrix/ell_kernels.hpp" @@ -60,6 +62,10 @@ GKO_REGISTER_OPERATION(count_nonzeros, hybrid::count_nonzeros); GKO_REGISTER_OPERATION(extract_coo_diagonal, coo::extract_diagonal); GKO_REGISTER_OPERATION(extract_ell_diagonal, ell::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(inplace_absolute_array, + components::inplace_absolute_array); +GKO_REGISTER_OPERATION(outplace_absolute_array, + components::outplace_absolute_array); } // namespace hybrid @@ -96,10 +102,14 @@ void get_each_row_nnz(const matrix_data &data, template void Hybrid::apply_impl(const LinOp *b, LinOp *x) const { - auto ell_mtx = this->get_ell(); - auto coo_mtx = this->get_coo(); - ell_mtx->apply(b, x); - coo_mtx->apply2(b, x); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + auto ell_mtx = this->get_ell(); + auto coo_mtx = this->get_coo(); + ell_mtx->apply(dense_b, dense_x); + coo_mtx->apply2(dense_b, dense_x); + }, + b, x); } @@ -108,10 +118,14 @@ void Hybrid::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - auto ell_mtx = this->get_ell(); - auto coo_mtx = this->get_coo(); - ell_mtx->apply(alpha, b, beta, x); - coo_mtx->apply2(alpha, b, x); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto ell_mtx = this->get_ell(); + auto coo_mtx = this->get_coo(); + ell_mtx->apply(dense_alpha, dense_b, dense_beta, dense_x); + coo_mtx->apply2(dense_alpha, dense_b, dense_x); + }, + alpha, b, beta, x); } @@ -122,7 +136,8 @@ void Hybrid::convert_to( this->ell_->convert_to(result->ell_.get()); this->coo_->convert_to(result->coo_.get()); // TODO set strategy correctly - // There is no way to correctly clone the strategy like in Csr::convert_to + // There is no way to correctly clone the strategy like in + // Csr::convert_to result->set_size(this->get_size()); } @@ -285,6 +300,34 @@ Hybrid::extract_diagonal() const } +template +void Hybrid::compute_absolute_inplace() +{ + auto exec = this->get_executor(); + + exec->run(hybrid::make_inplace_absolute_array( + this->get_ell_values(), this->get_ell_num_stored_elements())); + exec->run(hybrid::make_inplace_absolute_array( + this->get_coo_values(), this->get_coo_num_stored_elements())); +} + + +template +std::unique_ptr::absolute_type> +Hybrid::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto abs_hybrid = absolute_type::create( + exec, this->get_size(), this->get_strategy()); + + abs_hybrid->ell_->copy_from(ell_->compute_absolute()); + abs_hybrid->coo_->copy_from(coo_->compute_absolute()); + + return abs_hybrid; +} + + #define GKO_DECLARE_HYBRID_MATRIX(ValueType, IndexType) \ class Hybrid GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_HYBRID_MATRIX); diff --git a/core/matrix/hybrid_kernels.hpp b/core/matrix/hybrid_kernels.hpp index 788fe66e15b..b0222cba0bd 100644 --- a/core/matrix/hybrid_kernels.hpp +++ b/core/matrix/hybrid_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -104,6 +104,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace hybrid { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace hybrid +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/matrix/identity.cpp b/core/matrix/identity.cpp index 884e5781ee8..169a4f4ba68 100644 --- a/core/matrix/identity.cpp +++ b/core/matrix/identity.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include @@ -53,9 +54,12 @@ template void Identity::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - auto dense_x = as>(x); - dense_x->scale(beta); - dense_x->add_scaled(alpha, b); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, dense_b); + }, + alpha, b, beta, x); } diff --git a/core/matrix/permutation.cpp b/core/matrix/permutation.cpp index a8b6e5ff139..5e29d88ddd6 100644 --- a/core/matrix/permutation.cpp +++ b/core/matrix/permutation.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/matrix/sellp.cpp b/core/matrix/sellp.cpp index 7164af154f4..07542e3ab40 100644 --- a/core/matrix/sellp.cpp +++ b/core/matrix/sellp.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -36,12 +36,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include #include "core/base/allocator.hpp" +#include "core/components/absolute_array.hpp" #include "core/components/fill_array.hpp" #include "core/matrix/sellp_kernels.hpp" @@ -58,6 +60,10 @@ GKO_REGISTER_OPERATION(convert_to_csr, sellp::convert_to_csr); GKO_REGISTER_OPERATION(count_nonzeros, sellp::count_nonzeros); GKO_REGISTER_OPERATION(extract_diagonal, sellp::extract_diagonal); GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(inplace_absolute_array, + components::inplace_absolute_array); +GKO_REGISTER_OPERATION(outplace_absolute_array, + components::outplace_absolute_array); } // namespace sellp @@ -107,9 +113,11 @@ size_type calculate_total_cols(const matrix_data &data, template void Sellp::apply_impl(const LinOp *b, LinOp *x) const { - using Dense = Dense; - this->get_executor()->run( - sellp::make_spmv(this, as(b), as(x))); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->get_executor()->run(sellp::make_spmv(this, dense_b, dense_x)); + }, + b, x); } @@ -117,9 +125,12 @@ template void Sellp::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - using Dense = Dense; - this->get_executor()->run(sellp::make_advanced_spmv( - as(alpha), this, as(b), as(beta), as(x))); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + this->get_executor()->run(sellp::make_advanced_spmv( + dense_alpha, this, dense_b, dense_beta, dense_x)); + }, + alpha, b, beta, x); } @@ -304,6 +315,37 @@ Sellp::extract_diagonal() const } +template +void Sellp::compute_absolute_inplace() +{ + auto exec = this->get_executor(); + + exec->run(sellp::make_inplace_absolute_array( + this->get_values(), this->get_num_stored_elements())); +} + + +template +std::unique_ptr::absolute_type> +Sellp::compute_absolute() const +{ + auto exec = this->get_executor(); + + auto abs_sellp = absolute_type::create( + exec, this->get_size(), this->get_slice_size(), + this->get_stride_factor(), this->get_total_cols()); + + abs_sellp->col_idxs_ = col_idxs_; + abs_sellp->slice_lengths_ = slice_lengths_; + abs_sellp->slice_sets_ = slice_sets_; + exec->run(sellp::make_outplace_absolute_array( + this->get_const_values(), this->get_num_stored_elements(), + abs_sellp->get_values())); + + return abs_sellp; +} + + #define GKO_DECLARE_SELLP_MATRIX(ValueType, IndexType) \ class Sellp GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_MATRIX); diff --git a/core/matrix/sellp_kernels.hpp b/core/matrix/sellp_kernels.hpp index 7b9b6fdf551..a2ea6f87682 100644 --- a/core/matrix/sellp_kernels.hpp +++ b/core/matrix/sellp_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -130,6 +130,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace sellp { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace sellp +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/matrix/sparsity_csr.cpp b/core/matrix/sparsity_csr.cpp index 851dcd946a5..d253792c6a3 100644 --- a/core/matrix/sparsity_csr.cpp +++ b/core/matrix/sparsity_csr.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -68,9 +69,12 @@ template void SparsityCsr::apply_impl(const LinOp *b, LinOp *x) const { - using Dense = Dense; - this->get_executor()->run( - sparsity_csr::make_spmv(this, as(b), as(x))); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->get_executor()->run( + sparsity_csr::make_spmv(this, dense_b, dense_x)); + }, + b, x); } @@ -80,9 +84,12 @@ void SparsityCsr::apply_impl(const LinOp *alpha, const LinOp *beta, LinOp *x) const { - using Dense = Dense; - this->get_executor()->run(sparsity_csr::make_advanced_spmv( - as(alpha), this, as(b), as(beta), as(x))); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + this->get_executor()->run(sparsity_csr::make_advanced_spmv( + dense_alpha, this, dense_b, dense_beta, dense_x)); + }, + alpha, b, beta, x); } diff --git a/core/matrix/sparsity_csr_kernels.hpp b/core/matrix/sparsity_csr_kernels.hpp index 58ec58e789f..a9d17cdd133 100644 --- a/core/matrix/sparsity_csr_kernels.hpp +++ b/core/matrix/sparsity_csr_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -144,6 +144,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace sparsity_csr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace sparsity_csr +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/multigrid/amgx_pgm.cpp b/core/multigrid/amgx_pgm.cpp new file mode 100644 index 00000000000..e38d10d27c3 --- /dev/null +++ b/core/multigrid/amgx_pgm.cpp @@ -0,0 +1,171 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/components/fill_array.hpp" +#include "core/matrix/csr_builder.hpp" +#include "core/multigrid/amgx_pgm_kernels.hpp" + + +namespace gko { +namespace multigrid { +namespace amgx_pgm { + + +GKO_REGISTER_OPERATION(match_edge, amgx_pgm::match_edge); +GKO_REGISTER_OPERATION(count_unagg, amgx_pgm::count_unagg); +GKO_REGISTER_OPERATION(renumber, amgx_pgm::renumber); +GKO_REGISTER_OPERATION(find_strongest_neighbor, + amgx_pgm::find_strongest_neighbor); +GKO_REGISTER_OPERATION(assign_to_exist_agg, amgx_pgm::assign_to_exist_agg); +GKO_REGISTER_OPERATION(fill_array, components::fill_array); +GKO_REGISTER_OPERATION(fill_seq_array, components::fill_seq_array); + + +} // namespace amgx_pgm + + +template +void AmgxPgm::generate() +{ + using matrix_type = matrix::Csr; + using real_type = remove_complex; + using weight_matrix_type = remove_complex; + auto exec = this->get_executor(); + const auto num_rows = this->system_matrix_->get_size()[0]; + Array strongest_neighbor(this->get_executor(), num_rows); + Array intermediate_agg(this->get_executor(), + parameters_.deterministic * num_rows); + // Only support csr matrix currently. + const matrix_type *amgxpgm_op = nullptr; + // Store the csr matrix if needed + auto amgxpgm_op_unique_ptr = matrix_type::create(exec); + amgxpgm_op = dynamic_cast(system_matrix_.get()); + if (!amgxpgm_op) { + // if original matrix is not csr, converting it to csr. + as>(this->system_matrix_.get()) + ->convert_to(amgxpgm_op_unique_ptr.get()); + amgxpgm_op = amgxpgm_op_unique_ptr.get(); + } + + // Initial agg = -1 + exec->run(amgx_pgm::make_fill_array(agg_.get_data(), agg_.get_num_elems(), + -one())); + IndexType num_unagg = num_rows; + IndexType num_unagg_prev = num_rows; + // TODO: if mtx is a hermitian matrix, weight_mtx = abs(mtx) + // compute weight_mtx = (abs(mtx) + abs(mtx'))/2; + auto abs_mtx = amgxpgm_op->compute_absolute(); + // abs_mtx is already real valuetype, so transpose is enough + auto weight_mtx = gko::as(abs_mtx->transpose()); + auto half_scalar = initialize>({0.5}, exec); + auto identity = matrix::Identity::create(exec, num_rows); + // W = (abs_mtx + transpose(abs_mtx))/2 + abs_mtx->apply(lend(half_scalar), lend(identity), lend(half_scalar), + lend(weight_mtx)); + // Extract the diagonal value of matrix + auto diag = weight_mtx->extract_diagonal(); + for (int i = 0; i < parameters_.max_iterations; i++) { + // Find the strongest neighbor of each row + exec->run(amgx_pgm::make_find_strongest_neighbor( + weight_mtx.get(), diag.get(), agg_, strongest_neighbor)); + // Match edges + exec->run(amgx_pgm::make_match_edge(strongest_neighbor, agg_)); + // Get the num_unagg + exec->run(amgx_pgm::make_count_unagg(agg_, &num_unagg)); + // no new match, all match, or the ratio of num_unagg/num is lower + // than parameter.max_unassigned_ratio + if (num_unagg == 0 || num_unagg == num_unagg_prev || + num_unagg < parameters_.max_unassigned_ratio * num_rows) { + break; + } + num_unagg_prev = num_unagg; + } + // Handle the left unassign points + if (num_unagg != 0 && parameters_.deterministic) { + // copy the agg to intermediate_agg + intermediate_agg = agg_; + } + if (num_unagg != 0) { + // Assign all left points + exec->run(amgx_pgm::make_assign_to_exist_agg( + weight_mtx.get(), diag.get(), agg_, intermediate_agg)); + } + IndexType num_agg = 0; + // Renumber the index + exec->run(amgx_pgm::make_renumber(agg_, &num_agg)); + + gko::dim<2>::dimension_type coarse_dim = num_agg; + auto fine_dim = system_matrix_->get_size()[0]; + // TODO: prolong_op can be done with lightway format + auto prolong_op = share( + matrix_type::create(exec, gko::dim<2>{fine_dim, coarse_dim}, fine_dim)); + exec->copy_from(exec.get(), agg_.get_num_elems(), agg_.get_const_data(), + prolong_op->get_col_idxs()); + exec->run(amgx_pgm::make_fill_seq_array(prolong_op->get_row_ptrs(), + fine_dim + 1)); + exec->run(amgx_pgm::make_fill_array(prolong_op->get_values(), fine_dim, + one())); + // TODO: implement the restrict_op from aggregation. + auto restrict_op = gko::as(share(prolong_op->transpose())); + + // Construct the coarse matrix + // TODO: use less memory footprint to improve it + auto coarse_matrix = + share(matrix_type::create(exec, gko::dim<2>{coarse_dim, coarse_dim})); + auto tmp = matrix_type::create(exec, gko::dim<2>{fine_dim, coarse_dim}); + amgxpgm_op->apply(prolong_op.get(), tmp.get()); + restrict_op->apply(tmp.get(), coarse_matrix.get()); + + this->set_multigrid_level(prolong_op, coarse_matrix, restrict_op); +} + + +#define GKO_DECLARE_AMGX_PGM(_vtype, _itype) class AmgxPgm<_vtype, _itype> +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_AMGX_PGM); + + +} // namespace multigrid +} // namespace gko diff --git a/core/multigrid/amgx_pgm_kernels.hpp b/core/multigrid/amgx_pgm_kernels.hpp new file mode 100644 index 00000000000..793780ae505 --- /dev/null +++ b/core/multigrid/amgx_pgm_kernels.hpp @@ -0,0 +1,149 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MULTIGRID_AMGX_PGM_KERNELS_HPP_ +#define GKO_CORE_MULTIGRID_AMGX_PGM_KERNELS_HPP_ + + +#include + + +#include + + +#include +#include +#include +#include + + +namespace gko { +namespace kernels { +namespace amgx_pgm { + + +#define GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL(IndexType) \ + void match_edge(std::shared_ptr exec, \ + const Array &strongest_neighbor, \ + Array &agg) + +#define GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL(IndexType) \ + void count_unagg(std::shared_ptr exec, \ + const Array &agg, IndexType *num_unagg) + +#define GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL(IndexType) \ + void renumber(std::shared_ptr exec, \ + Array &agg, IndexType *num_agg) + +#define GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR(ValueType, IndexType) \ + void find_strongest_neighbor( \ + std::shared_ptr exec, \ + const matrix::Csr *weight_mtx, \ + const matrix::Diagonal *diag, Array &agg, \ + Array &strongest_neighbor) + +#define GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG(ValueType, IndexType) \ + void assign_to_exist_agg( \ + std::shared_ptr exec, \ + const matrix::Csr *weight_mtx, \ + const matrix::Diagonal *diag, Array &agg, \ + Array &intermediate_agg) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL(IndexType); \ + template \ + GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL(IndexType); \ + template \ + GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL(IndexType); \ + template \ + GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR(ValueType, IndexType); \ + template \ + GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG(ValueType, IndexType) + + +} // namespace amgx_pgm + + +namespace omp { +namespace amgx_pgm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace amgx_pgm +} // namespace omp + + +namespace cuda { +namespace amgx_pgm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace amgx_pgm +} // namespace cuda + + +namespace reference { +namespace amgx_pgm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace amgx_pgm +} // namespace reference + + +namespace hip { +namespace amgx_pgm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace amgx_pgm +} // namespace hip + + +namespace dpcpp { +namespace amgx_pgm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace amgx_pgm +} // namespace dpcpp + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_MULTIGRID_AMGX_PGM_KERNELS_HPP_ diff --git a/core/preconditioner/isai.cpp b/core/preconditioner/isai.cpp index 0b8738c5594..7247b9a04e7 100644 --- a/core/preconditioner/isai.cpp +++ b/core/preconditioner/isai.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -42,10 +42,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include +#include #include #include +#include +#include +#include "core/base/utils.hpp" +#include "core/factorization/factorization_kernels.hpp" #include "core/preconditioner/isai_kernels.hpp" @@ -55,55 +61,19 @@ namespace isai { GKO_REGISTER_OPERATION(generate_tri_inverse, isai::generate_tri_inverse); +GKO_REGISTER_OPERATION(generate_general_inverse, + isai::generate_general_inverse); GKO_REGISTER_OPERATION(generate_excess_system, isai::generate_excess_system); +GKO_REGISTER_OPERATION(scale_excess_solution, isai::scale_excess_solution); GKO_REGISTER_OPERATION(scatter_excess_solution, isai::scatter_excess_solution); +GKO_REGISTER_OPERATION(initialize_row_ptrs_l, + factorization::initialize_row_ptrs_l); +GKO_REGISTER_OPERATION(initialize_l, factorization::initialize_l); } // namespace isai -/** - * @internal - * - * Helper function that converts the given matrix to the (const) CSR format with - * additional sorting. - * - * If the given matrix was already sorted, is on the same executor and with a - * dynamic type of `const Csr`, the same pointer is returned with an empty - * deleter. - * In all other cases, a new matrix is created, which stores the converted Csr - * matrix. - * If `skip_sorting` is false, the matrix will be sorted by column index, - * otherwise, it will not be sorted. - */ -template -std::shared_ptr convert_to_csr_and_sort( - std::shared_ptr &exec, std::shared_ptr mtx, - bool skip_sorting) -{ - static_assert( - std::is_same>::value, - "The given `Csr` type must be of type `matrix::Csr`!"); - if (skip_sorting && exec == mtx->get_executor()) { - auto csr_mtx = std::dynamic_pointer_cast(mtx); - if (csr_mtx) { - // Here, we can just forward the pointer with an empty deleter - // since it is already sorted and in the correct format - return csr_mtx; - } - } - auto copy = Csr::create(exec); - as>(mtx)->convert_to(lend(copy)); - // Here, we assume that a sorted matrix converted to CSR will also be - // sorted - if (!skip_sorting) { - copy->sort_by_column_index(); - } - return {std::move(copy)}; -} - - /** * @internal * @@ -149,17 +119,49 @@ std::shared_ptr extend_sparsity(std::shared_ptr &exec, template void Isai::generate_inverse( - std::shared_ptr input, bool skip_sorting, int power) + std::shared_ptr input, bool skip_sorting, int power, + IndexType excess_limit) { using Dense = matrix::Dense; using LowerTrs = solver::LowerTrs; using UpperTrs = solver::UpperTrs; + using Gmres = solver::Gmres; + using Bj = preconditioner::Jacobi; GKO_ASSERT_IS_SQUARE_MATRIX(input); auto exec = this->get_executor(); - auto to_invert = convert_to_csr_and_sort(exec, input, skip_sorting); - auto inverted = extend_sparsity(exec, to_invert, power); - auto num_rows = inverted->get_size()[0]; auto is_lower = IsaiType == isai_type::lower; + auto is_general = IsaiType == isai_type::general; + auto is_spd = IsaiType == isai_type::spd; + auto to_invert = convert_to_with_sorting(exec, input, skip_sorting); + auto num_rows = to_invert->get_size()[0]; + std::shared_ptr inverted; + if (!is_spd) { + inverted = extend_sparsity(exec, to_invert, power); + } else { + // Extract lower triangular part: compute non-zeros + Array inverted_row_ptrs{exec, num_rows + 1}; + exec->run(isai::make_initialize_row_ptrs_l( + to_invert.get(), inverted_row_ptrs.get_data())); + + // Get nnz from device memory + auto inverted_nnz = static_cast( + exec->copy_val_to_host(inverted_row_ptrs.get_data() + num_rows)); + + // Init arrays + Array inverted_col_idxs{exec, inverted_nnz}; + Array inverted_vals{exec, inverted_nnz}; + auto inverted_base = share(Csr::create( + exec, dim<2>{num_rows, num_rows}, std::move(inverted_vals), + std::move(inverted_col_idxs), std::move(inverted_row_ptrs))); + + // Extract lower factor: columns and values + exec->run(isai::make_initialize_l(to_invert.get(), inverted_base.get(), + false)); + + inverted = power == 1 + ? std::move(inverted_base) + : extend_sparsity(exec, inverted_base, power); + } // This stores the beginning of the RHS for the sparse block associated with // each row of inverted_l @@ -168,38 +170,95 @@ void Isai::generate_inverse( // system of excess blocks Array excess_row_ptrs_full{exec, num_rows + 1}; - exec->run(isai::make_generate_tri_inverse( - lend(to_invert), lend(inverted), excess_block_ptrs.get_data(), - excess_row_ptrs_full.get_data(), is_lower)); + if (is_general || is_spd) { + exec->run(isai::make_generate_general_inverse( + lend(to_invert), lend(inverted), excess_block_ptrs.get_data(), + excess_row_ptrs_full.get_data(), is_spd)); + } else { + exec->run(isai::make_generate_tri_inverse( + lend(to_invert), lend(inverted), excess_block_ptrs.get_data(), + excess_row_ptrs_full.get_data(), is_lower)); + } - auto excess_dim = - exec->copy_val_to_host(excess_block_ptrs.get_const_data() + num_rows); + auto host_excess_block_ptrs_array = + Array(exec->get_master(), excess_block_ptrs); + auto host_excess_block_ptrs = host_excess_block_ptrs_array.get_const_data(); + auto host_excess_row_ptrs_full_array = + Array(exec->get_master(), excess_row_ptrs_full); + auto host_excess_row_ptrs_full = + host_excess_row_ptrs_full_array.get_const_data(); + auto total_excess_dim = host_excess_block_ptrs[num_rows]; + auto excess_lim = excess_limit == 0 ? total_excess_dim : excess_limit; // if we had long rows: - if (excess_dim > 0) { - // build the excess sparse triangular system - auto excess_nnz = exec->copy_val_to_host( - excess_row_ptrs_full.get_const_data() + num_rows); - auto excess_system = - Csr::create(exec, dim<2>(excess_dim, excess_dim), excess_nnz); - auto excess_rhs = Dense::create(exec, dim<2>(excess_dim, 1)); - auto excess_solution = Dense::create(exec, dim<2>(excess_dim, 1)); - exec->run(isai::make_generate_excess_system( - lend(to_invert), lend(inverted), excess_block_ptrs.get_const_data(), - excess_row_ptrs_full.get_const_data(), lend(excess_system), - lend(excess_rhs))); - // solve it after transposing - std::unique_ptr trs_factory; - if (is_lower) { - trs_factory = UpperTrs::build().on(exec); - } else { - trs_factory = LowerTrs::build().on(exec); + if (total_excess_dim > 0) { + bool done = false; + size_type block = 0; + while (block < num_rows) { + // build the excess sparse triangular system + size_type excess_dim; + size_type excess_start = block; + const auto block_offset = host_excess_block_ptrs[block]; + const auto nnz_offset = host_excess_row_ptrs_full[block]; + for (excess_dim = 0; excess_dim < excess_lim && block < num_rows; + excess_dim = host_excess_block_ptrs[block] - block_offset) { + block++; + } + if (excess_dim == 0) { + break; + } + auto excess_nnz = host_excess_row_ptrs_full[block] - nnz_offset; + auto excess_system = + Csr::create(exec, dim<2>(excess_dim, excess_dim), excess_nnz); + excess_system->set_strategy( + std::make_shared()); + auto excess_rhs = Dense::create(exec, dim<2>(excess_dim, 1)); + auto excess_solution = Dense::create(exec, dim<2>(excess_dim, 1)); + exec->run(isai::make_generate_excess_system( + lend(to_invert), lend(inverted), + excess_block_ptrs.get_const_data(), + excess_row_ptrs_full.get_const_data(), lend(excess_system), + lend(excess_rhs), excess_start, block)); + // solve it after transposing + auto system_copy = Csr::create(exec->get_master()); + system_copy->copy_from(excess_system.get()); + auto rhs_copy = Dense::create(exec->get_master()); + rhs_copy->copy_from(excess_rhs.get()); + std::shared_ptr excess_solver_factory; + if (parameters_.excess_solver_factory) { + excess_solver_factory = + share(parameters_.excess_solver_factory); + excess_solution->copy_from(excess_rhs.get()); + } else if (is_general || is_spd) { + excess_solver_factory = + Gmres::build() + .with_preconditioner( + Bj::build().with_max_block_size(32u).on(exec)) + .with_criteria( + gko::stop::Iteration::build() + .with_max_iters(excess_dim) + .on(exec), + gko::stop::RelativeResidualNorm::build() + .with_tolerance(remove_complex{1e-6}) + .on(exec)) + .on(exec); + excess_solution->copy_from(excess_rhs.get()); + } else if (is_lower) { + excess_solver_factory = UpperTrs::build().on(exec); + } else { + excess_solver_factory = LowerTrs::build().on(exec); + } + excess_solver_factory->generate(share(excess_system->transpose())) + ->apply(lend(excess_rhs), lend(excess_solution)); + if (is_spd) { + exec->run(isai::make_scale_excess_solution( + excess_block_ptrs.get_const_data(), lend(excess_solution), + excess_start, block)); + } + // and copy the results back to the original ISAI + exec->run(isai::make_scatter_excess_solution( + excess_block_ptrs.get_const_data(), lend(excess_solution), + lend(inverted), excess_start, block)); } - trs_factory->generate(share(excess_system->transpose())) - ->apply(lend(excess_rhs), lend(excess_solution)); - // and copy the results back to the original ISAI - exec->run(isai::make_scatter_excess_solution( - excess_block_ptrs.get_const_data(), lend(excess_solution), - lend(inverted))); } approximate_inverse_ = std::move(inverted); @@ -209,11 +268,16 @@ void Isai::generate_inverse( template std::unique_ptr Isai::transpose() const { + auto is_spd = IsaiType == isai_type::spd; + if (is_spd) { + return this->clone(); + } + std::unique_ptr transp{ new transposed_type{this->get_executor()}}; transp->set_size(gko::transpose(this->get_size())); transp->approximate_inverse_ = - share(as(this->get_approximate_inverse()->transpose())); + share(as(this->get_approximate_inverse())->transpose()); return std::move(transp); } @@ -223,11 +287,16 @@ template std::unique_ptr Isai::conj_transpose() const { + auto is_spd = IsaiType == isai_type::spd; + if (is_spd) { + return this->clone(); + } + std::unique_ptr transp{ new transposed_type{this->get_executor()}}; transp->set_size(gko::transpose(this->get_size())); transp->approximate_inverse_ = - share(as(this->get_approximate_inverse()->conj_transpose())); + share(as(this->get_approximate_inverse())->conj_transpose()); return std::move(transp); } @@ -241,6 +310,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LOWER_ISAI); class Isai GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_UPPER_ISAI); +#define GKO_DECLARE_GENERAL_ISAI(ValueType, IndexType) \ + class Isai +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_GENERAL_ISAI); + +#define GKO_DECLARE_SPD_ISAI(ValueType, IndexType) \ + class Isai +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SPD_ISAI); + } // namespace preconditioner } // namespace gko diff --git a/core/preconditioner/isai_kernels.hpp b/core/preconditioner/isai_kernels.hpp index ce53d51cd3c..bdf6360f1f4 100644 --- a/core/preconditioner/isai_kernels.hpp +++ b/core/preconditioner/isai_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,10 +34,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_PRECONDITIONER_ISAI_KERNELS_HPP_ -#include #include +#include + + namespace gko { namespace kernels { @@ -49,6 +51,13 @@ namespace kernels { IndexType *excess_rhs_ptrs, \ IndexType *excess_nz_ptrs, bool lower) +#define GKO_DECLARE_ISAI_GENERATE_GENERAL_INVERSE_KERNEL(ValueType, IndexType) \ + void generate_general_inverse( \ + std::shared_ptr exec, \ + const matrix::Csr *input, \ + matrix::Csr *inverse, \ + IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, bool spd) + #define GKO_DECLARE_ISAI_GENERATE_EXCESS_SYSTEM_KERNEL(ValueType, IndexType) \ void generate_excess_system( \ std::shared_ptr exec, \ @@ -56,22 +65,34 @@ namespace kernels { const matrix::Csr *inverse, \ const IndexType *excess_rhs_ptrs, const IndexType *excess_nz_ptrs, \ matrix::Csr *excess_system, \ - matrix::Dense *excess_rhs) + matrix::Dense *excess_rhs, size_type e_start, \ + size_type e_end) + +#define GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL(ValueType, IndexType) \ + void scale_excess_solution(std::shared_ptr exec, \ + const IndexType *excess_block_ptrs, \ + matrix::Dense *excess_solution, \ + size_type e_start, size_type e_end) #define GKO_DECLARE_ISAI_SCATTER_EXCESS_SOLUTION_KERNEL(ValueType, IndexType) \ void scatter_excess_solution( \ std::shared_ptr exec, \ const IndexType *excess_rhs_ptrs, \ const matrix::Dense *excess_solution, \ - matrix::Csr *inverse) - -#define GKO_DECLARE_ALL_AS_TEMPLATES \ - constexpr auto row_size_limit = 32; \ - template \ - GKO_DECLARE_ISAI_GENERATE_TRI_INVERSE_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_ISAI_GENERATE_EXCESS_SYSTEM_KERNEL(ValueType, IndexType); \ - template \ + matrix::Csr *inverse, size_type e_start, \ + size_type e_end) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + constexpr int row_size_limit = 32; \ + template \ + GKO_DECLARE_ISAI_GENERATE_TRI_INVERSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_ISAI_GENERATE_GENERAL_INVERSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_ISAI_GENERATE_EXCESS_SYSTEM_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL(ValueType, IndexType); \ + template \ GKO_DECLARE_ISAI_SCATTER_EXCESS_SOLUTION_KERNEL(ValueType, IndexType) @@ -111,6 +132,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace isai { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace isai +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/preconditioner/jacobi.cpp b/core/preconditioner/jacobi.cpp index f7351cd779c..d62d97ec0b5 100644 --- a/core/preconditioner/jacobi.cpp +++ b/core/preconditioner/jacobi.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,15 +33,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + #include #include #include +#include +#include #include #include #include #include "core/base/extended_float.hpp" +#include "core/base/utils.hpp" #include "core/preconditioner/jacobi_kernels.hpp" #include "core/preconditioner/jacobi_utils.hpp" @@ -52,12 +58,18 @@ namespace jacobi { GKO_REGISTER_OPERATION(simple_apply, jacobi::simple_apply); +GKO_REGISTER_OPERATION(simple_scalar_apply, jacobi::simple_scalar_apply); GKO_REGISTER_OPERATION(apply, jacobi::apply); +GKO_REGISTER_OPERATION(scalar_apply, jacobi::scalar_apply); GKO_REGISTER_OPERATION(find_blocks, jacobi::find_blocks); GKO_REGISTER_OPERATION(generate, jacobi::generate); +GKO_REGISTER_OPERATION(scalar_conj, jacobi::scalar_conj); +GKO_REGISTER_OPERATION(invert_diagonal, jacobi::invert_diagonal); GKO_REGISTER_OPERATION(transpose_jacobi, jacobi::transpose_jacobi); GKO_REGISTER_OPERATION(conj_transpose_jacobi, jacobi::conj_transpose_jacobi); GKO_REGISTER_OPERATION(convert_to_dense, jacobi::convert_to_dense); +GKO_REGISTER_OPERATION(scalar_convert_to_dense, + jacobi::scalar_convert_to_dense); GKO_REGISTER_OPERATION(initialize_precisions, jacobi::initialize_precisions); @@ -67,11 +79,19 @@ GKO_REGISTER_OPERATION(initialize_precisions, jacobi::initialize_precisions); template void Jacobi::apply_impl(const LinOp *b, LinOp *x) const { - using dense = matrix::Dense; - this->get_executor()->run(jacobi::make_simple_apply( - num_blocks_, parameters_.max_block_size, storage_scheme_, - parameters_.storage_optimization.block_wise, parameters_.block_pointers, - blocks_, as(b), as(x))); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + if (parameters_.max_block_size == 1) { + this->get_executor()->run(jacobi::make_simple_scalar_apply( + this->blocks_, dense_b, dense_x)); + } else { + this->get_executor()->run(jacobi::make_simple_apply( + num_blocks_, parameters_.max_block_size, storage_scheme_, + parameters_.storage_optimization.block_wise, + parameters_.block_pointers, blocks_, dense_b, dense_x)); + } + }, + b, x); } @@ -80,12 +100,20 @@ void Jacobi::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - using dense = matrix::Dense; - this->get_executor()->run(jacobi::make_apply( - num_blocks_, parameters_.max_block_size, storage_scheme_, - parameters_.storage_optimization.block_wise, parameters_.block_pointers, - blocks_, as(alpha), as(b), as(beta), - as(x))); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + if (parameters_.max_block_size == 1) { + this->get_executor()->run(jacobi::make_scalar_apply( + this->blocks_, dense_alpha, dense_b, dense_beta, dense_x)); + } else { + this->get_executor()->run(jacobi::make_apply( + num_blocks_, parameters_.max_block_size, storage_scheme_, + parameters_.storage_optimization.block_wise, + parameters_.block_pointers, blocks_, dense_alpha, dense_b, + dense_beta, dense_x)); + } + }, + alpha, b, beta, x); } @@ -95,10 +123,14 @@ void Jacobi::convert_to( { auto exec = this->get_executor(); auto tmp = matrix::Dense::create(exec, this->get_size()); - exec->run(jacobi::make_convert_to_dense( - num_blocks_, parameters_.storage_optimization.block_wise, - parameters_.block_pointers, blocks_, storage_scheme_, tmp->get_values(), - tmp->get_stride())); + if (parameters_.max_block_size == 1) { + exec->run(jacobi::make_scalar_convert_to_dense(blocks_, tmp.get())); + } else { + exec->run(jacobi::make_convert_to_dense( + num_blocks_, parameters_.storage_optimization.block_wise, + parameters_.block_pointers, blocks_, storage_scheme_, + tmp->get_values(), tmp->get_stride())); + } tmp->move_to(result); } @@ -117,29 +149,40 @@ void Jacobi::write(mat_data &data) const make_temporary_clone(this->get_executor()->get_master(), this); data = {local_clone->get_size(), {}}; - const auto ptrs = local_clone->parameters_.block_pointers.get_const_data(); - for (size_type block = 0; block < local_clone->get_num_blocks(); ++block) { - const auto scheme = local_clone->get_storage_scheme(); - const auto group_data = local_clone->blocks_.get_const_data() + - scheme.get_group_offset(block); - const auto block_size = ptrs[block + 1] - ptrs[block]; - const auto precisions = local_clone->parameters_.storage_optimization - .block_wise.get_const_data(); - const auto prec = - precisions ? precisions[block] : precision_reduction(); - GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION(ValueType, prec, { - const auto block_data = - reinterpret_cast(group_data) + - scheme.get_block_offset(block); - for (IndexType row = 0; row < block_size; ++row) { - for (IndexType col = 0; col < block_size; ++col) { - data.nonzeros.emplace_back( - ptrs[block] + row, ptrs[block] + col, - static_cast( - block_data[row + col * scheme.get_stride()])); + if (parameters_.max_block_size == 1) { + for (IndexType row = 0; row < data.size[0]; ++row) { + data.nonzeros.emplace_back( + row, row, + static_cast(local_clone->get_blocks()[row])); + } + } else { + const auto ptrs = + local_clone->parameters_.block_pointers.get_const_data(); + for (size_type block = 0; block < local_clone->get_num_blocks(); + ++block) { + const auto scheme = local_clone->get_storage_scheme(); + const auto group_data = local_clone->blocks_.get_const_data() + + scheme.get_group_offset(block); + const auto block_size = ptrs[block + 1] - ptrs[block]; + const auto precisions = + local_clone->parameters_.storage_optimization.block_wise + .get_const_data(); + const auto prec = + precisions ? precisions[block] : precision_reduction(); + GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION(ValueType, prec, { + const auto block_data = + reinterpret_cast(group_data) + + scheme.get_block_offset(block); + for (IndexType row = 0; row < block_size; ++row) { + for (IndexType col = 0; col < block_size; ++col) { + data.nonzeros.emplace_back( + ptrs[block] + row, ptrs[block] + col, + static_cast( + block_data[row + col * scheme.get_stride()])); + } } - } - }); + }); + } } } @@ -156,10 +199,15 @@ std::unique_ptr Jacobi::transpose() const res->blocks_.resize_and_reset(blocks_.get_num_elems()); res->conditioning_ = conditioning_; res->parameters_ = parameters_; - this->get_executor()->run(jacobi::make_transpose_jacobi( - num_blocks_, parameters_.max_block_size, - parameters_.storage_optimization.block_wise, parameters_.block_pointers, - blocks_, storage_scheme_, res->blocks_)); + if (parameters_.max_block_size == 1) { + res->blocks_ = blocks_; + } else { + this->get_executor()->run(jacobi::make_transpose_jacobi( + num_blocks_, parameters_.max_block_size, + parameters_.storage_optimization.block_wise, + parameters_.block_pointers, blocks_, storage_scheme_, + res->blocks_)); + } return std::move(res); } @@ -177,10 +225,16 @@ std::unique_ptr Jacobi::conj_transpose() const res->blocks_.resize_and_reset(blocks_.get_num_elems()); res->conditioning_ = conditioning_; res->parameters_ = parameters_; - this->get_executor()->run(jacobi::make_conj_transpose_jacobi( - num_blocks_, parameters_.max_block_size, - parameters_.storage_optimization.block_wise, parameters_.block_pointers, - blocks_, storage_scheme_, res->blocks_)); + if (parameters_.max_block_size == 1) { + this->get_executor()->run( + jacobi::make_scalar_conj(this->blocks_, res->blocks_)); + } else { + this->get_executor()->run(jacobi::make_conj_transpose_jacobi( + num_blocks_, parameters_.max_block_size, + parameters_.storage_optimization.block_wise, + parameters_.block_pointers, blocks_, storage_scheme_, + res->blocks_)); + } return std::move(res); } @@ -201,37 +255,59 @@ void Jacobi::detect_blocks( template -void Jacobi::generate(const LinOp *system_matrix) +void Jacobi::generate(const LinOp *system_matrix, + bool skip_sorting) { GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix); + using csr_type = matrix::Csr; const auto exec = this->get_executor(); - const auto csr_mtx = copy_and_convert_to>( - exec, system_matrix); - - if (parameters_.block_pointers.get_data() == nullptr) { - this->detect_blocks(csr_mtx.get()); - } + if (parameters_.max_block_size == 1) { + auto diag = share(as(system_matrix) + ->extract_diagonal_linop()); + auto diag_vt = + ::gko::detail::temporary_conversion>:: + template create>>( + diag.get()); + if (!diag_vt) { + GKO_NOT_SUPPORTED(system_matrix); + } + auto temp = Array::view(diag_vt->get_executor(), + diag_vt->get_size()[0], + diag_vt->get_values()); + this->blocks_ = Array(exec, temp.get_num_elems()); + exec->run(jacobi::make_invert_diagonal(temp, this->blocks_)); + this->num_blocks_ = diag_vt->get_size()[0]; + } else { + auto csr_mtx = convert_to_with_sorting(exec, system_matrix, + skip_sorting); + + if (parameters_.block_pointers.get_data() == nullptr) { + this->detect_blocks(csr_mtx.get()); + } - const auto all_block_opt = parameters_.storage_optimization.of_all_blocks; - auto &precisions = parameters_.storage_optimization.block_wise; - // if adaptive version is used, make sure that the precision array is of the - // correct size by replicating it multiple times if needed - if (parameters_.storage_optimization.is_block_wise || - all_block_opt != precision_reduction(0, 0)) { - if (!parameters_.storage_optimization.is_block_wise) { - precisions = gko::Array(exec, {all_block_opt}); + const auto all_block_opt = + parameters_.storage_optimization.of_all_blocks; + auto &precisions = parameters_.storage_optimization.block_wise; + // if adaptive version is used, make sure that the precision array is of + // the correct size by replicating it multiple times if needed + if (parameters_.storage_optimization.is_block_wise || + all_block_opt != precision_reduction(0, 0)) { + if (!parameters_.storage_optimization.is_block_wise) { + precisions = + gko::Array(exec, {all_block_opt}); + } + Array tmp( + exec, parameters_.block_pointers.get_num_elems() - 1); + exec->run(jacobi::make_initialize_precisions(precisions, tmp)); + precisions = std::move(tmp); + conditioning_.resize_and_reset(num_blocks_); } - Array tmp( - exec, parameters_.block_pointers.get_num_elems() - 1); - exec->run(jacobi::make_initialize_precisions(precisions, tmp)); - precisions = std::move(tmp); - conditioning_.resize_and_reset(num_blocks_); - } - exec->run(jacobi::make_generate( - csr_mtx.get(), num_blocks_, parameters_.max_block_size, - parameters_.accuracy, storage_scheme_, conditioning_, precisions, - parameters_.block_pointers, blocks_)); + exec->run(jacobi::make_generate( + csr_mtx.get(), num_blocks_, parameters_.max_block_size, + parameters_.accuracy, storage_scheme_, conditioning_, precisions, + parameters_.block_pointers, blocks_)); + } } diff --git a/core/preconditioner/jacobi_kernels.hpp b/core/preconditioner/jacobi_kernels.hpp index 12d232c26f8..7745438abda 100644 --- a/core/preconditioner/jacobi_kernels.hpp +++ b/core/preconditioner/jacobi_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -62,6 +62,16 @@ namespace kernels { Array &block_precisions, \ const Array &block_pointers, Array &blocks) +#define GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL(ValueType) \ + void scalar_conj(std::shared_ptr exec, \ + const Array &diag, \ + Array &conj_diag) + +#define GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL(ValueType) \ + void invert_diagonal(std::shared_ptr exec, \ + const Array &diag, \ + Array &inv_diag) + #define GKO_DECLARE_JACOBI_APPLY_KERNEL(ValueType, IndexType) \ void apply( \ std::shared_ptr exec, size_type num_blocks, \ @@ -74,6 +84,12 @@ namespace kernels { const matrix::Dense *b, \ const matrix::Dense *beta, matrix::Dense *x) +#define GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL(ValueType) \ + void simple_scalar_apply(std::shared_ptr exec, \ + const Array &diag, \ + const matrix::Dense *b, \ + matrix::Dense *x) + #define GKO_DECLARE_JACOBI_SIMPLE_APPLY_KERNEL(ValueType, IndexType) \ void simple_apply( \ std::shared_ptr exec, size_type num_blocks, \ @@ -85,6 +101,13 @@ namespace kernels { const Array &blocks, const matrix::Dense *b, \ matrix::Dense *x) +#define GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL(ValueType) \ + void scalar_apply( \ + std::shared_ptr exec, \ + const Array &diag, const matrix::Dense *alpha, \ + const matrix::Dense *b, \ + const matrix::Dense *beta, matrix::Dense *x) + #define GKO_DECLARE_JACOBI_TRANSPOSE_KERNEL(ValueType, IndexType) \ void transpose_jacobi( \ std::shared_ptr exec, size_type num_blocks, \ @@ -107,6 +130,11 @@ namespace kernels { &storage_scheme, \ Array &out_blocks) +#define GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL(ValueType) \ + void scalar_convert_to_dense(std::shared_ptr exec, \ + const Array &blocks, \ + matrix::Dense *result) + #define GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense( \ std::shared_ptr exec, size_type num_blocks, \ @@ -127,14 +155,24 @@ namespace kernels { GKO_DECLARE_JACOBI_FIND_BLOCKS_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_JACOBI_GENERATE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL(ValueType); \ + template \ + GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL(ValueType); \ + template \ + GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL(ValueType); \ template \ GKO_DECLARE_JACOBI_APPLY_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL(ValueType); \ template \ GKO_DECLARE_JACOBI_SIMPLE_APPLY_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_JACOBI_TRANSPOSE_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL(ValueType); \ template \ GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType); \ GKO_DECLARE_JACOBI_INITIALIZE_PRECISIONS_KERNEL() @@ -176,6 +214,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace jacobi { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace jacobi +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/preconditioner/jacobi_utils.hpp b/core/preconditioner/jacobi_utils.hpp index 904820cbce2..baa7fda04b6 100644 --- a/core/preconditioner/jacobi_utils.hpp +++ b/core/preconditioner/jacobi_utils.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp new file mode 100644 index 00000000000..f2ec8f0fd63 --- /dev/null +++ b/core/reorder/rcm.cpp @@ -0,0 +1,91 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/csr_kernels.hpp" +#include "core/reorder/rcm_kernels.hpp" + + +namespace gko { +namespace reorder { +namespace rcm { + + +GKO_REGISTER_OPERATION(get_permutation, rcm::get_permutation); +GKO_REGISTER_OPERATION(get_degree_of_nodes, rcm::get_degree_of_nodes); + + +} // namespace rcm + + +template +void Rcm::generate( + std::shared_ptr &exec, + std::unique_ptr adjacency_matrix) const +{ + const IndexType num_rows = adjacency_matrix->get_size()[0]; + const auto mtx = adjacency_matrix.get(); + auto degrees = Array(exec, num_rows); + // RCM is only valid for symmetric matrices. Need to add an expensive check + // for symmetricity here ? + exec->run(rcm::make_get_degree_of_nodes(num_rows, mtx->get_const_row_ptrs(), + degrees.get_data())); + exec->run(rcm::make_get_permutation( + num_rows, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(), + degrees.get_const_data(), permutation_->get_permutation(), + inv_permutation_.get() ? inv_permutation_->get_permutation() : nullptr, + parameters_.strategy)); +} + + +#define GKO_DECLARE_RCM(ValueType, IndexType) class Rcm +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_RCM); + + +} // namespace reorder +} // namespace gko diff --git a/core/reorder/rcm_kernels.hpp b/core/reorder/rcm_kernels.hpp new file mode 100644 index 00000000000..d72e8c0160a --- /dev/null +++ b/core/reorder/rcm_kernels.hpp @@ -0,0 +1,126 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_REORDER_RCM_KERNELS_HPP_ +#define GKO_CORE_REORDER_RCM_KERNELS_HPP_ + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL(IndexType) \ + void get_permutation(std::shared_ptr exec, \ + IndexType num_vertices, const IndexType *row_ptrs, \ + const IndexType *col_idxs, const IndexType *degrees, \ + IndexType *permutation, IndexType *inv_permutation, \ + gko::reorder::starting_strategy strategy) + +#define GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL(IndexType) \ + void get_degree_of_nodes(std::shared_ptr exec, \ + IndexType num_vertices, \ + const IndexType *row_ptrs, IndexType *degrees) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL(IndexType); \ + template \ + GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL(IndexType) + + +namespace omp { +namespace rcm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace rcm +} // namespace omp + + +namespace cuda { +namespace rcm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace rcm +} // namespace cuda + + +namespace hip { +namespace rcm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace rcm +} // namespace hip + + +namespace dpcpp { +namespace rcm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace rcm +} // namespace dpcpp + + +namespace reference { +namespace rcm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace rcm +} // namespace reference + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_REORDER_RCM_KERNELS_HPP_ diff --git a/core/solver/bicg.cpp b/core/solver/bicg.cpp index a7519f48a33..f79832dedc0 100644 --- a/core/solver/bicg.cpp +++ b/core/solver/bicg.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -87,15 +88,15 @@ std::unique_ptr Bicg::conj_transpose() const /** * @internal - * Transposes the matrix by converting it into a CSR matrix of type - * CsrType, followed by transposing. + * (Conjugate-)Transposes the matrix by converting it into a CSR matrix of type + * CsrType, followed by (conjugate-)transposing. * - * @param mtx Matrix to transpose + * @param mtx Matrix to (conjugate-)transpose * @tparam CsrType Matrix format in which the matrix mtx is converted into - * before transposing it + * before (conjugate-)transposing it */ template -std::unique_ptr transpose_with_csr(const LinOp *mtx) +std::unique_ptr conj_transpose_with_csr(const LinOp *mtx) { auto csr_matrix_unique_ptr = copy_and_convert_to( mtx->get_executor(), const_cast(mtx)); @@ -103,12 +104,24 @@ std::unique_ptr transpose_with_csr(const LinOp *mtx) csr_matrix_unique_ptr->set_strategy( std::make_shared()); - return csr_matrix_unique_ptr->transpose(); + return csr_matrix_unique_ptr->conj_transpose(); } template void Bicg::apply_impl(const LinOp *b, LinOp *x) const +{ + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->apply_dense_impl(dense_b, dense_x); + }, + b, x); +} + + +template +void Bicg::apply_dense_impl(const matrix::Dense *dense_b, + matrix::Dense *dense_x) const { using std::swap; using Vector = matrix::Dense; @@ -119,8 +132,6 @@ void Bicg::apply_impl(const LinOp *b, LinOp *x) const auto one_op = initialize({one()}, exec); auto neg_one_op = initialize({-one()}, exec); - auto dense_b = as(b); - auto dense_x = as(x); auto r = Vector::create_with_config_of(dense_b); auto r2 = Vector::create_with_config_of(dense_b); auto z = Vector::create_with_config_of(dense_b); @@ -149,12 +160,12 @@ void Bicg::apply_impl(const LinOp *b, LinOp *x) const // r = r2 = dense_b // z2 = p2 = q2 = 0 - std::unique_ptr trans_A; - auto transposable_system_matrix = + std::unique_ptr conj_trans_A; + auto conj_transposable_system_matrix = dynamic_cast(system_matrix_.get()); - if (transposable_system_matrix) { - trans_A = transposable_system_matrix->transpose(); + if (conj_transposable_system_matrix) { + conj_trans_A = conj_transposable_system_matrix->conj_transpose(); } else { // TODO Extend when adding more IndexTypes // Try to figure out the IndexType that can be used for the CSR matrix @@ -163,57 +174,69 @@ void Bicg::apply_impl(const LinOp *b, LinOp *x) const auto supports_int64 = dynamic_cast *>(system_matrix_.get()); if (supports_int64) { - trans_A = transpose_with_csr(system_matrix_.get()); + conj_trans_A = conj_transpose_with_csr(system_matrix_.get()); } else { - trans_A = transpose_with_csr(system_matrix_.get()); + conj_trans_A = conj_transpose_with_csr(system_matrix_.get()); } } - auto trans_preconditioner_tmp = + auto conj_trans_preconditioner_tmp = as(get_preconditioner().get()); - auto trans_preconditioner = trans_preconditioner_tmp->transpose(); + auto conj_trans_preconditioner = + conj_trans_preconditioner_tmp->conj_transpose(); system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), r.get()); // r = r - Ax = -1.0 * A*dense_x + 1.0*r r2->copy_from(r.get()); // r2 = r auto stop_criterion = stop_criterion_factory_->generate( - system_matrix_, std::shared_ptr(b, [](const LinOp *) {}), - x, r.get()); + system_matrix_, + std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + r.get()); int iter = -1; + /* Memory movement summary: + * 28n * values + matrix/preconditioner storage + conj storage + * 2x SpMV: 4n * values + storage + conj storage + * 2x Preconditioner: 4n * values + storage + conj storage + * 2x dot 4n + * 1x step 1 (axpys) 6n + * 1x step 2 (axpys) 9n + * 1x norm2 residual n + */ while (true) { get_preconditioner()->apply(r.get(), z.get()); - trans_preconditioner->apply(r2.get(), z2.get()); - z->compute_dot(r2.get(), rho.get()); + conj_trans_preconditioner->apply(r2.get(), z2.get()); + z->compute_conj_dot(r2.get(), rho.get()); ++iter; - this->template log(this, iter, r.get(), - dense_x); + this->template log( + this, iter, r.get(), dense_x, nullptr, rho.get()); if (stop_criterion->update() .num_iterations(iter) .residual(r.get()) + .implicit_sq_residual_norm(rho.get()) .solution(dense_x) .check(RelativeStoppingId, true, &stop_status, &one_changed)) { break; } - exec->run(bicg::make_step_1(p.get(), z.get(), p2.get(), z2.get(), - rho.get(), prev_rho.get(), &stop_status)); // tmp = rho / prev_rho // p = z + tmp * p // p2 = z2 + tmp * p2 + exec->run(bicg::make_step_1(p.get(), z.get(), p2.get(), z2.get(), + rho.get(), prev_rho.get(), &stop_status)); system_matrix_->apply(p.get(), q.get()); - trans_A->apply(p2.get(), q2.get()); - p2->compute_dot(q.get(), beta.get()); - exec->run(bicg::make_step_2(dense_x, r.get(), r2.get(), p.get(), - q.get(), q2.get(), beta.get(), rho.get(), - &stop_status)); + conj_trans_A->apply(p2.get(), q2.get()); + p2->compute_conj_dot(q.get(), beta.get()); // tmp = rho / beta // x = x + tmp * p // r = r - tmp * q // r2 = r2 - tmp * q2 + exec->run(bicg::make_step_2(dense_x, r.get(), r2.get(), p.get(), + q.get(), q2.get(), beta.get(), rho.get(), + &stop_status)); swap(prev_rho, rho); } } @@ -223,12 +246,14 @@ template void Bicg::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - auto dense_x = as>(x); - - auto x_clone = dense_x->clone(); - this->apply(b, x_clone.get()); - dense_x->scale(beta); - dense_x->add_scaled(alpha, x_clone.get()); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_dense_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); } diff --git a/core/solver/bicg_kernels.hpp b/core/solver/bicg_kernels.hpp index 9ef21b3a243..87bac70421c 100644 --- a/core/solver/bicg_kernels.hpp +++ b/core/solver/bicg_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_SOLVER_BICG_KERNELS_HPP_ +#include + + #include #include #include @@ -124,6 +127,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace bicg { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace bicg +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/solver/bicgstab.cpp b/core/solver/bicgstab.cpp index 570c9daee6f..7c8c852fa11 100644 --- a/core/solver/bicgstab.cpp +++ b/core/solver/bicgstab.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -86,9 +87,23 @@ std::unique_ptr Bicgstab::conj_transpose() const template void Bicgstab::apply_impl(const LinOp *b, LinOp *x) const +{ + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->apply_dense_impl(dense_b, dense_x); + }, + b, x); +} + + +template +void Bicgstab::apply_dense_impl( + const matrix::Dense *dense_b, + matrix::Dense *dense_x) const { using std::swap; using Vector = matrix::Dense; + using AbsVector = matrix::Dense>; constexpr uint8 RelativeStoppingId{1}; @@ -97,8 +112,6 @@ void Bicgstab::apply_impl(const LinOp *b, LinOp *x) const auto one_op = initialize({one()}, exec); auto neg_one_op = initialize({-one()}, exec); - auto dense_b = as(b); - auto dense_x = as(x); auto r = Vector::create_with_config_of(dense_b); auto z = Vector::create_with_config_of(dense_b); auto y = Vector::create_with_config_of(dense_b); @@ -131,80 +144,95 @@ void Bicgstab::apply_impl(const LinOp *b, LinOp *x) const system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), r.get()); auto stop_criterion = stop_criterion_factory_->generate( - system_matrix_, std::shared_ptr(b, [](const LinOp *) {}), - x, r.get()); + system_matrix_, + std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + r.get()); rr->copy_from(r.get()); int iter = -1; + + /* Memory movement summary: + * 31n * values + 2 * matrix/preconditioner storage + * 2x SpMV: 4n * values + 2 * storage + * 2x Preconditioner: 4n * values + 2 * storage + * 3x dot 6n + * 1x norm2 n + * 1x step 1 (fused axpys) 4n + * 1x step 2 (axpy) 3n + * 1x step 3 (fused axpys) 7n + * 2x norm2 residual 2n + */ while (true) { ++iter; - this->template log(this, iter, r.get(), - dense_x); + this->template log( + this, iter, r.get(), dense_x, nullptr, rho.get()); + rr->compute_conj_dot(r.get(), rho.get()); + if (stop_criterion->update() .num_iterations(iter) .residual(r.get()) + .implicit_sq_residual_norm(rho.get()) .solution(dense_x) .check(RelativeStoppingId, true, &stop_status, &one_changed)) { break; } - rr->compute_dot(r.get(), rho.get()); - + // tmp = rho / prev_rho * alpha / omega + // p = r + tmp * (p - omega * v) exec->run(bicgstab::make_step_1(r.get(), p.get(), v.get(), rho.get(), prev_rho.get(), alpha.get(), omega.get(), &stop_status)); - // tmp = rho / prev_rho * alpha / omega - // p = r + tmp * (p - omega * v) get_preconditioner()->apply(p.get(), y.get()); system_matrix_->apply(y.get(), v.get()); - rr->compute_dot(v.get(), beta.get()); - exec->run(bicgstab::make_step_2(r.get(), s.get(), v.get(), rho.get(), - alpha.get(), beta.get(), &stop_status)); + rr->compute_conj_dot(v.get(), beta.get()); // alpha = rho / beta // s = r - alpha * v + exec->run(bicgstab::make_step_2(r.get(), s.get(), v.get(), rho.get(), + alpha.get(), beta.get(), &stop_status)); - ++iter; auto all_converged = stop_criterion->update() .num_iterations(iter) .residual(s.get()) + .implicit_sq_residual_norm(rho.get()) // .solution(dense_x) // outdated at this point .check(RelativeStoppingId, false, &stop_status, &one_changed); if (one_changed) { exec->run(bicgstab::make_finalize(dense_x, y.get(), alpha.get(), &stop_status)); } - this->template log(this, iter, - r.get()); if (all_converged) { break; } get_preconditioner()->apply(s.get(), z.get()); system_matrix_->apply(z.get(), t.get()); - s->compute_dot(t.get(), gamma.get()); - t->compute_dot(t.get(), beta.get()); - exec->run(bicgstab::make_step_3( - dense_x, r.get(), s.get(), t.get(), y.get(), z.get(), alpha.get(), - beta.get(), gamma.get(), omega.get(), &stop_status)); + s->compute_conj_dot(t.get(), gamma.get()); + t->compute_conj_dot(t.get(), beta.get()); // omega = gamma / beta // x = x + alpha * y + omega * z // r = s - omega * t + exec->run(bicgstab::make_step_3( + dense_x, r.get(), s.get(), t.get(), y.get(), z.get(), alpha.get(), + beta.get(), gamma.get(), omega.get(), &stop_status)); swap(prev_rho, rho); } -} // namespace solver +} template void Bicgstab::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - auto dense_x = as>(x); - auto x_clone = dense_x->clone(); - this->apply(b, x_clone.get()); - dense_x->scale(beta); - dense_x->add_scaled(alpha, x_clone.get()); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_dense_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); } diff --git a/core/solver/bicgstab_kernels.hpp b/core/solver/bicgstab_kernels.hpp index 8b48151a50f..74b1208d4d7 100644 --- a/core/solver/bicgstab_kernels.hpp +++ b/core/solver/bicgstab_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_SOLVER_BICGSTAB_KERNELS_HPP_ +#include + + #include #include #include @@ -148,6 +151,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace bicgstab { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace bicgstab +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/solver/cb_gmres.cpp b/core/solver/cb_gmres.cpp new file mode 100644 index 00000000000..8478ea49b5e --- /dev/null +++ b/core/solver/cb_gmres.cpp @@ -0,0 +1,525 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/base/extended_float.hpp" +#include "core/solver/cb_gmres_accessor.hpp" +#include "core/solver/cb_gmres_kernels.hpp" + + +namespace gko { +namespace solver { + + +namespace cb_gmres { + + +GKO_REGISTER_OPERATION(initialize_1, cb_gmres::initialize_1); +GKO_REGISTER_OPERATION(initialize_2, cb_gmres::initialize_2); +GKO_REGISTER_OPERATION(step_1, cb_gmres::step_1); +GKO_REGISTER_OPERATION(step_2, cb_gmres::step_2); + + +} // namespace cb_gmres + + +template +struct to_integer_impl { + using type = T; +}; + +template <> +struct to_integer_impl { + using type = int64; +}; + +template <> +struct to_integer_impl { + using type = int32; +}; + +template <> +struct to_integer_impl { + using type = int16; +}; + +template +using to_integer = typename to_integer_impl::type; + + +template +using reduce_precision_skip = + typename std::conditional_t, Skip>::value, + T, reduce_precision>; + + +namespace detail { + + +template +struct reduce_precision_skip_count_impl { + static_assert(count > 0, + "The count variable must be larger or equal to zero."); + using type = typename reduce_precision_skip_count_impl< + reduce_precision_skip, Skip, count - 1>::type; +}; + +template +struct reduce_precision_skip_count_impl { + using type = T; +}; + + +} // namespace detail + + +template +using reduce_precision_skip_count = + typename detail::reduce_precision_skip_count_impl::type; + + +template +using reduce_precision_count = + typename detail::reduce_precision_skip_count_impl::type; + + +template +struct helper { + template + static void call(Callable callable, + gko::solver::cb_gmres::storage_precision st) + { + switch (st) { + case cb_gmres::storage_precision::reduce1: + callable(reduce_precision_count{}); + break; + case cb_gmres::storage_precision::reduce2: + callable(reduce_precision_count{}); + break; + case cb_gmres::storage_precision::integer: + callable(to_integer{}); + break; + case cb_gmres::storage_precision::ireduce1: + callable(to_integer>{}); + break; + case cb_gmres::storage_precision::ireduce2: + callable(to_integer>{}); + break; + default: + callable(ValueType{}); + } + } +}; + + +// helper for complex numbers +template +struct helper> { + using ValueType = std::complex; + using skip_type = std::complex; + + template + static void call(Callable callable, + gko::solver::cb_gmres::storage_precision st) + { + switch (st) { + case cb_gmres::storage_precision::reduce1: + callable(reduce_precision_skip_count{}); + break; + case cb_gmres::storage_precision::reduce2: + callable(reduce_precision_skip_count{}); + break; + case cb_gmres::storage_precision::integer: + case cb_gmres::storage_precision::ireduce1: + case cb_gmres::storage_precision::ireduce2: + GKO_NOT_SUPPORTED(st); + break; + default: + callable(ValueType{}); + } + } +}; + + +template +void CbGmres::apply_impl(const LinOp *b, LinOp *x) const +{ + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->apply_dense_impl(dense_b, dense_x); + }, + b, x); +} + + +template +void CbGmres::apply_dense_impl( + const matrix::Dense *dense_b, + matrix::Dense *dense_x) const +{ + // Current workaround to get a lambda with a template argument (only + // the type of `value` matters, the content does not) + auto apply_templated = [&](auto value) { + using storage_type = decltype(value); + GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix_); + + using Vector = matrix::Dense; + using VectorNorms = matrix::Dense>; + using Range3dHelper = + gko::cb_gmres::Range3dHelper; + + + constexpr uint8 RelativeStoppingId{1}; + + auto exec = this->get_executor(); + + auto one_op = initialize({one()}, exec); + auto neg_one_op = initialize({-one()}, exec); + + auto residual = Vector::create_with_config_of(dense_b); + /* The dimensions {x, y, z} explained for the krylov_bases: + * - x: selects the krylov vector (which has krylov_dim + 1 vectors) + * - y: selects the (row-)element of said krylov vector + * - z: selects which column-element of said krylov vector should be + * used + */ + const dim<3> krylov_bases_dim{krylov_dim_ + 1, + system_matrix_->get_size()[1], + dense_b->get_size()[1]}; + Range3dHelper helper(exec, krylov_bases_dim); + auto krylov_bases_range = helper.get_range(); + + auto next_krylov_basis = Vector::create_with_config_of(dense_b); + std::shared_ptr> preconditioned_vector = + Vector::create_with_config_of(dense_b); + auto hessenberg = Vector::create( + exec, + dim<2>{krylov_dim_ + 1, krylov_dim_ * dense_b->get_size()[1]}); + auto buffer = Vector::create( + exec, dim<2>{krylov_dim_ + 1, dense_b->get_size()[1]}); + auto givens_sin = + Vector::create(exec, dim<2>{krylov_dim_, dense_b->get_size()[1]}); + auto givens_cos = + Vector::create(exec, dim<2>{krylov_dim_, dense_b->get_size()[1]}); + auto residual_norm_collection = Vector::create( + exec, dim<2>{krylov_dim_ + 1, dense_b->get_size()[1]}); + auto residual_norm = + VectorNorms::create(exec, dim<2>{1, dense_b->get_size()[1]}); + // 1st row of arnoldi_norm: == eta * norm2(old_next_krylov_basis) + // with eta == 1 / sqrt(2) + // (computed right before updating + // next_krylov_basis) + // 2nd row of arnoldi_norm: The actual arnoldi norm + // == norm2(next_krylov_basis) + // 3rd row of arnoldi_norm: the infinity norm of next_krylov_basis + // (ONLY when using a scalar accessor) + auto arnoldi_norm = + VectorNorms::create(exec, dim<2>{3, dense_b->get_size()[1]}); + Array final_iter_nums(this->get_executor(), + dense_b->get_size()[1]); + auto y = + Vector::create(exec, dim<2>{krylov_dim_, dense_b->get_size()[1]}); + + bool one_changed{}; + Array stop_status(this->get_executor(), + dense_b->get_size()[1]); + // reorth_status and num_reorth are both helper variables for GPU + // implementations at the moment. + // num_reorth := Number of vectors which require a re-orthogonalization + // reorth_status := stopping status for the re-orthogonalization, + // marking which RHS requires one, and which does not + Array reorth_status(this->get_executor(), + dense_b->get_size()[1]); + Array num_reorth(this->get_executor(), 1); + + // Initialization + exec->run(cb_gmres::make_initialize_1( + dense_b, residual.get(), givens_sin.get(), givens_cos.get(), + &stop_status, krylov_dim_)); + // residual = dense_b + // givens_sin = givens_cos = 0 + system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), + residual.get()); + // residual = residual - Ax + + exec->run(cb_gmres::make_initialize_2( + residual.get(), residual_norm.get(), residual_norm_collection.get(), + arnoldi_norm.get(), krylov_bases_range, next_krylov_basis.get(), + &final_iter_nums, krylov_dim_)); + // residual_norm = norm(residual) + // residual_norm_collection = {residual_norm, 0, ..., 0} + // krylov_bases(:, 1) = residual / residual_norm + // next_krylov_basis = residual / residual_norm + // final_iter_nums = {0, ..., 0} + + auto stop_criterion = stop_criterion_factory_->generate( + system_matrix_, + std::shared_ptr(dense_b, [](const LinOp *) {}), + dense_x, residual.get()); + + int total_iter = -1; + size_type restart_iter = 0; + + auto before_preconditioner = + matrix::Dense::create_with_config_of(dense_x); + auto after_preconditioner = + matrix::Dense::create_with_config_of(dense_x); + + Array stop_encountered_rhs(exec->get_master(), + dense_b->get_size()[1]); + Array fully_converged_rhs(exec->get_master(), + dense_b->get_size()[1]); + Array host_stop_status( + this->get_executor()->get_master(), stop_status); + for (size_type i = 0; i < stop_encountered_rhs.get_num_elems(); ++i) { + stop_encountered_rhs.get_data()[i] = false; + fully_converged_rhs.get_data()[i] = false; + } + // Start only after this value with performing forced iterations after + // convergence detection + constexpr decltype(total_iter) start_force_reset{10}; + bool perform_reset{false}; + // Fraction of the krylov_dim_ (or total_iter if it is lower), + // determining the number of forced iteration to perform + constexpr decltype(krylov_dim_) forced_iteration_fraction{10}; + const decltype(krylov_dim_) forced_limit{krylov_dim_ / + forced_iteration_fraction}; + // Counter for the forced iterations. Start at max in order to properly + // test convergence at the beginning + decltype(krylov_dim_) forced_iterations{forced_limit}; + + while (true) { + ++total_iter; + this->template log( + this, total_iter, residual.get(), dense_x, residual_norm.get()); + // In the beginning, only force a fraction of the total iterations + if (forced_iterations < forced_limit && + forced_iterations < total_iter / forced_iteration_fraction) { + ++forced_iterations; + } else { + bool all_changed = stop_criterion->update() + .num_iterations(total_iter) + .residual(residual.get()) + .residual_norm(residual_norm.get()) + .solution(dense_x) + .check(RelativeStoppingId, true, + &stop_status, &one_changed); + if (one_changed || all_changed) { + host_stop_status = stop_status; + bool host_array_changed{false}; + for (size_type i = 0; i < host_stop_status.get_num_elems(); + ++i) { + auto local_status = host_stop_status.get_data() + i; + // Ignore all actually converged ones! + if (fully_converged_rhs.get_data()[i]) { + continue; + } + if (local_status->has_converged()) { + // If convergence was detected earlier, or + // at the very beginning: + if (stop_encountered_rhs.get_data()[i] || + total_iter < start_force_reset) { + fully_converged_rhs.get_data()[i] = true; + } else { + stop_encountered_rhs.get_data()[i] = true; + local_status->reset(); + host_array_changed = true; + } + } + } + if (host_array_changed) { + perform_reset = true; + stop_status = host_stop_status; + } else { + // Stop here can happen if all RHS are "fully_converged" + // or if it was stopped for non-convergence reason + // (like time or iteration) + break; + } + forced_iterations = 0; + + } else { + for (size_type i = 0; + i < stop_encountered_rhs.get_num_elems(); ++i) { + stop_encountered_rhs.get_data()[i] = false; + } + } + } + + if (perform_reset || restart_iter == krylov_dim_) { + perform_reset = false; + // Restart + // use a view in case this is called earlier + auto hessenberg_view = hessenberg->create_submatrix( + span{0, restart_iter}, + span{0, dense_b->get_size()[1] * (restart_iter)}); + + exec->run(cb_gmres::make_step_2( + residual_norm_collection.get(), + krylov_bases_range.get_accessor().to_const(), + hessenberg_view.get(), y.get(), before_preconditioner.get(), + &final_iter_nums)); + // Solve upper triangular. + // y = hessenberg \ residual_norm_collection + + this->get_preconditioner()->apply(before_preconditioner.get(), + after_preconditioner.get()); + dense_x->add_scaled(one_op.get(), after_preconditioner.get()); + // Solve x + // x = x + get_preconditioner() * krylov_bases * y + residual->copy_from(dense_b); + // residual = dense_b + system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), + residual.get()); + // residual = residual - Ax + exec->run(cb_gmres::make_initialize_2( + residual.get(), residual_norm.get(), + residual_norm_collection.get(), arnoldi_norm.get(), + krylov_bases_range, next_krylov_basis.get(), + &final_iter_nums, krylov_dim_)); + // residual_norm = norm(residual) + // residual_norm_collection = {residual_norm, 0, ..., 0} + // krylov_bases(:, 1) = residual / residual_norm + // next_krylov_basis = residual / residual_norm + // final_iter_nums = {0, ..., 0} + restart_iter = 0; + } + + this->get_preconditioner()->apply(next_krylov_basis.get(), + preconditioned_vector.get()); + // preconditioned_vector = get_preconditioner() * + // next_krylov_basis + + // Do Arnoldi and givens rotation + auto hessenberg_iter = hessenberg->create_submatrix( + span{0, restart_iter + 2}, + span{dense_b->get_size()[1] * restart_iter, + dense_b->get_size()[1] * (restart_iter + 1)}); + auto buffer_iter = buffer->create_submatrix( + span{0, restart_iter + 2}, span{0, dense_b->get_size()[1]}); + + // Start of arnoldi + system_matrix_->apply(preconditioned_vector.get(), + next_krylov_basis.get()); + // next_krylov_basis = A * preconditioned_vector + exec->run(cb_gmres::make_step_1( + next_krylov_basis.get(), givens_sin.get(), givens_cos.get(), + residual_norm.get(), residual_norm_collection.get(), + krylov_bases_range, hessenberg_iter.get(), buffer_iter.get(), + arnoldi_norm.get(), restart_iter, &final_iter_nums, + &stop_status, &reorth_status, &num_reorth)); + // for i in 0:restart_iter + // hessenberg(restart_iter, i) = next_krylov_basis' * + // krylov_bases(:, i) next_krylov_basis -= + // hessenberg(restart_iter, i) * krylov_bases(:, i) + // end + // hessenberg(restart_iter, restart_iter + 1) = + // norm(next_krylov_basis) next_krylov_basis /= + // hessenberg(restart_iter, restart_iter + 1) End of arnoldi + // Start apply givens rotation for j in 0:restart_iter + // temp = cos(j)*hessenberg(j) + + // sin(j)*hessenberg(j+1) + // hessenberg(j+1) = -sin(j)*hessenberg(j) + + // cos(j)*hessenberg(j+1) + // hessenberg(j) = temp; + // end + // Calculate sin and cos + // hessenberg(restart_iter) = + // cos(restart_iter)*hessenberg(restart_iter) + + // sin(restart_iter)*hessenberg(restart_iter) + // hessenberg(restart_iter+1) = 0 + // End apply givens rotation + // Calculate residual norm + + restart_iter++; + } // closes while(true) + // Solve x + + auto hessenberg_small = hessenberg->create_submatrix( + span{0, restart_iter}, + span{0, dense_b->get_size()[1] * (restart_iter)}); + + exec->run(cb_gmres::make_step_2( + residual_norm_collection.get(), + krylov_bases_range.get_accessor().to_const(), + hessenberg_small.get(), y.get(), before_preconditioner.get(), + &final_iter_nums)); + // Solve upper triangular. + // y = hessenberg \ residual_norm_collection + this->get_preconditioner()->apply(before_preconditioner.get(), + after_preconditioner.get()); + dense_x->add_scaled(one_op.get(), after_preconditioner.get()); + // Solve x + // x = x + get_preconditioner() * krylov_bases * y + }; // End of apply_lambda + + // Look which precision to use as the storage type + helper::call(apply_templated, storage_precision_); +} + + +template +void CbGmres::apply_impl(const LinOp *alpha, const LinOp *b, + const LinOp *beta, LinOp *x) const +{ + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_dense_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); +} + +#define GKO_DECLARE_CB_GMRES(_type1) class CbGmres<_type1> +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES); + + +} // namespace solver +} // namespace gko diff --git a/core/solver/cb_gmres_accessor.hpp b/core/solver/cb_gmres_accessor.hpp new file mode 100644 index 00000000000..5c2e9329b99 --- /dev/null +++ b/core/solver/cb_gmres_accessor.hpp @@ -0,0 +1,212 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_SOLVER_CB_GMRES_ACCESSOR_HPP_ +#define GKO_CORE_SOLVER_CB_GMRES_ACCESSOR_HPP_ + + +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include + + +#include "accessor/range.hpp" +#include "accessor/reduced_row_major.hpp" +#include "accessor/scaled_reduced_row_major.hpp" + + +namespace gko { +namespace cb_gmres { + + +namespace detail { + + +template +struct has_3d_scaled_accessor : public std::false_type {}; + +template +struct has_3d_scaled_accessor< + acc::range>> + : public std::true_type {}; + +template ::value> +struct helper_require_scale {}; + +template +struct helper_require_scale : public std::false_type {}; + +template +struct helper_require_scale : public std::true_type {}; + + +} // namespace detail + + +template ::value> +class Range3dHelper {}; + + +template +class Range3dHelper { +public: + using Accessor = + acc::scaled_reduced_row_major<3, ValueType, StorageType, 0b101>; + using Range = acc::range; + + Range3dHelper() = default; + + Range3dHelper(std::shared_ptr exec, dim<3> krylov_dim) + : krylov_dim_{{krylov_dim[0], krylov_dim[1], krylov_dim[2]}}, + bases_{exec, krylov_dim_[0] * krylov_dim_[1] * krylov_dim_[2]}, + scale_{exec, krylov_dim_[0] * krylov_dim_[2]} + { + Array h_scale{exec->get_master(), + krylov_dim_[0] * krylov_dim_[2]}; + for (size_type i = 0; i < h_scale.get_num_elems(); ++i) { + h_scale.get_data()[i] = one(); + } + scale_ = h_scale; + } + + Range get_range() + { + return Range(krylov_dim_, bases_.get_data(), scale_.get_data()); + } + + gko::Array &get_bases() { return bases_; } + +private: + std::array krylov_dim_; + Array bases_; + Array scale_; +}; + + +template +class Range3dHelper { +public: + using Accessor = acc::reduced_row_major<3, ValueType, StorageType>; + using Range = acc::range; + + Range3dHelper() = default; + + Range3dHelper(std::shared_ptr exec, dim<3> krylov_dim) + : krylov_dim_{{krylov_dim[0], krylov_dim[1], krylov_dim[2]}}, + bases_{std::move(exec), + krylov_dim_[0] * krylov_dim_[1] * krylov_dim_[2]} + {} + + Range get_range() { return Range(krylov_dim_, bases_.get_data()); } + + gko::Array &get_bases() { return bases_; } + +private: + std::array krylov_dim_; + Array bases_; +}; + + +template ::value> +struct helper_functions_accessor {}; + +// Accessors having a scale +template +struct helper_functions_accessor { + using arithmetic_type = typename Accessor3d::accessor::arithmetic_type; + static constexpr size_type dimensionality = Accessor3d::dimensionality; + static_assert(detail::has_3d_scaled_accessor::value, + "Accessor must have a scalar here!"); + template + static inline GKO_ATTRIBUTES void write_scalar(Accessor3d krylov_bases, + IndexType vector_idx, + IndexType col_idx, + arithmetic_type value) + { + using storage_type = typename Accessor3d::accessor::storage_type; + constexpr arithmetic_type correction = + std::is_integral::value + // Use 2 instead of 1 here to allow for a bit more room + ? 2 / static_cast( + std::numeric_limits::max()) + : 1; + krylov_bases.get_accessor().write_scalar_direct(value * correction, + vector_idx, col_idx); + } + + static constexpr GKO_ATTRIBUTES std::array + get_stride(Accessor3d krylov_bases) + { + return krylov_bases.get_accessor().get_storage_stride(); + } +}; + +// Accessors not having a scale +template +struct helper_functions_accessor { + using arithmetic_type = typename Accessor3d::accessor::arithmetic_type; + static constexpr size_type dimensionality = Accessor3d::dimensionality; + static_assert(!detail::has_3d_scaled_accessor::value, + "Accessor must not have a scale here!"); + + template + static inline GKO_ATTRIBUTES void write_scalar(Accessor3d, IndexType, + IndexType, arithmetic_type) + { + // Since there is no scalar, there is nothing to write. + } + + static constexpr GKO_ATTRIBUTES std::array + get_stride(Accessor3d krylov_bases) + { + return krylov_bases.get_accessor().get_stride(); + } +}; + + +} // namespace cb_gmres +} // namespace gko + + +#endif // GKO_CORE_SOLVER_CB_GMRES_ACCESSOR_HPP_ diff --git a/core/solver/cb_gmres_kernels.hpp b/core/solver/cb_gmres_kernels.hpp new file mode 100644 index 00000000000..1a7a8765d8d --- /dev/null +++ b/core/solver/cb_gmres_kernels.hpp @@ -0,0 +1,232 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_SOLVER_CB_GMRES_KERNELS_HPP_ +#define GKO_CORE_SOLVER_CB_GMRES_KERNELS_HPP_ + + +#include +#include +#include +#include +#include + + +#include "accessor/reduced_row_major.hpp" +#include "accessor/scaled_reduced_row_major.hpp" +#include "core/base/extended_float.hpp" + + +// TODO Find way around using it! +#define GKO_UNPACK(...) __VA_ARGS__ +/** + * Instantiates a template for each value type with each lower precision type + * supported by Ginkgo for CbGmres. + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take two arguments: + * 1. the first will be used as the regular ValueType + * (precisions supported by Ginkgo), and + * 2. the second the value type of the reduced precision. + * @param _const qualifier used for the storage type, indicating if it is a + * const accessor, or not. + */ +#define GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE_HELPER(_macro, _const) \ + template _macro( \ + double, \ + GKO_UNPACK( \ + acc::range>)); \ + template _macro( \ + double, \ + GKO_UNPACK( \ + acc::range>)); \ + template _macro( \ + double, \ + GKO_UNPACK( \ + acc::range>)); \ + template _macro(double, \ + GKO_UNPACK(acc::range>)); \ + template _macro(double, \ + GKO_UNPACK(acc::range>)); \ + template _macro(double, \ + GKO_UNPACK(acc::range>)); \ + template _macro( \ + float, \ + GKO_UNPACK( \ + acc::range>)); \ + template _macro( \ + float, \ + GKO_UNPACK( \ + acc::range>)); \ + template _macro(float, \ + GKO_UNPACK(acc::range>)); \ + template _macro(float, \ + GKO_UNPACK(acc::range>)); \ + template _macro( \ + std::complex, \ + GKO_UNPACK( \ + acc::range, \ + _const std::complex>>)); \ + template _macro( \ + std::complex, \ + GKO_UNPACK( \ + acc::range, \ + _const std::complex>>)); \ + template _macro( \ + std::complex, \ + GKO_UNPACK( \ + acc::range, \ + _const std::complex>>)) + +#define GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(_macro) \ + GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE_HELPER(_macro, ) + +#define GKO_INSTANTIATE_FOR_EACH_CB_GMRES_CONST_TYPE(_macro) \ + GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE_HELPER(_macro, const) + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL(_type) \ + void initialize_1( \ + std::shared_ptr exec, \ + const matrix::Dense<_type> *b, matrix::Dense<_type> *residual, \ + matrix::Dense<_type> *givens_sin, matrix::Dense<_type> *givens_cos, \ + Array *stop_status, size_type krylov_dim) + + +#define GKO_DECLARE_CB_GMRES_INITIALIZE_2_KERNEL(_type1, _range) \ + void initialize_2(std::shared_ptr exec, \ + const matrix::Dense<_type1> *residual, \ + matrix::Dense> *residual_norm, \ + matrix::Dense<_type1> *residual_norm_collection, \ + matrix::Dense> *arnoldi_norm, \ + _range krylov_bases, \ + matrix::Dense<_type1> *next_krylov_basis, \ + Array *final_iter_nums, size_type krylov_dim) + + +#define GKO_DECLARE_CB_GMRES_STEP_1_KERNEL(_type1, _range) \ + void step_1( \ + std::shared_ptr exec, \ + matrix::Dense<_type1> *next_krylov_basis, \ + matrix::Dense<_type1> *givens_sin, matrix::Dense<_type1> *givens_cos, \ + matrix::Dense> *residual_norm, \ + matrix::Dense<_type1> *residual_norm_collection, _range krylov_bases, \ + matrix::Dense<_type1> *hessenberg_iter, \ + matrix::Dense<_type1> *buffer_iter, \ + matrix::Dense> *arnoldi_norm, size_type iter, \ + Array *final_iter_nums, \ + const Array *stop_status, \ + Array *reorth_status, Array *num_reorth) + +#define GKO_DECLARE_CB_GMRES_STEP_2_KERNEL(_type1, _range) \ + void step_2(std::shared_ptr exec, \ + const matrix::Dense<_type1> *residual_norm_collection, \ + _range krylov_bases, const matrix::Dense<_type1> *hessenberg, \ + matrix::Dense<_type1> *y, \ + matrix::Dense<_type1> *before_preconditioner, \ + const Array *final_iter_nums) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL(ValueType); \ + template \ + GKO_DECLARE_CB_GMRES_INITIALIZE_2_KERNEL(ValueType, Accessor3d); \ + template \ + GKO_DECLARE_CB_GMRES_STEP_1_KERNEL(ValueType, Accessor3d); \ + template \ + GKO_DECLARE_CB_GMRES_STEP_2_KERNEL(ValueType, Accessor3d) + + +namespace omp { +namespace cb_gmres { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace cb_gmres +} // namespace omp + + +namespace cuda { +namespace cb_gmres { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace cb_gmres +} // namespace cuda + + +namespace reference { +namespace cb_gmres { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace cb_gmres +} // namespace reference + + +namespace hip { +namespace cb_gmres { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace cb_gmres +} // namespace hip + + +namespace dpcpp { +namespace cb_gmres { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace cb_gmres +} // namespace dpcpp + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_SOLVER_CB_GMRES_KERNELS_HPP_ diff --git a/core/solver/cg.cpp b/core/solver/cg.cpp index 838ede4a882..4dc624584e8 100644 --- a/core/solver/cg.cpp +++ b/core/solver/cg.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -87,6 +88,18 @@ std::unique_ptr Cg::conj_transpose() const template void Cg::apply_impl(const LinOp *b, LinOp *x) const +{ + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->apply_dense_impl(dense_b, dense_x); + }, + b, x); +} + + +template +void Cg::apply_dense_impl(const matrix::Dense *dense_b, + matrix::Dense *dense_x) const { using std::swap; using Vector = matrix::Dense; @@ -98,8 +111,6 @@ void Cg::apply_impl(const LinOp *b, LinOp *x) const auto one_op = initialize({one()}, exec); auto neg_one_op = initialize({-one()}, exec); - auto dense_b = as(b); - auto dense_x = as(x); auto r = Vector::create_with_config_of(dense_b); auto z = Vector::create_with_config_of(dense_b); auto p = Vector::create_with_config_of(dense_b); @@ -124,36 +135,47 @@ void Cg::apply_impl(const LinOp *b, LinOp *x) const system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), r.get()); auto stop_criterion = stop_criterion_factory_->generate( - system_matrix_, std::shared_ptr(b, [](const LinOp *) {}), - x, r.get()); + system_matrix_, + std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + r.get()); int iter = -1; + /* Memory movement summary: + * 18n * values + matrix/preconditioner storage + * 1x SpMV: 2n * values + storage + * 1x Preconditioner: 2n * values + storage + * 2x dot 4n + * 1x step 1 (axpy) 3n + * 1x step 2 (axpys) 6n + * 1x norm2 residual n + */ while (true) { get_preconditioner()->apply(r.get(), z.get()); - r->compute_dot(z.get(), rho.get()); + r->compute_conj_dot(z.get(), rho.get()); ++iter; - this->template log(this, iter, r.get(), - dense_x); + this->template log( + this, iter, r.get(), dense_x, nullptr, rho.get()); if (stop_criterion->update() .num_iterations(iter) .residual(r.get()) + .implicit_sq_residual_norm(rho.get()) .solution(dense_x) .check(RelativeStoppingId, true, &stop_status, &one_changed)) { break; } - exec->run(cg::make_step_1(p.get(), z.get(), rho.get(), prev_rho.get(), - &stop_status)); // tmp = rho / prev_rho // p = z + tmp * p + exec->run(cg::make_step_1(p.get(), z.get(), rho.get(), prev_rho.get(), + &stop_status)); system_matrix_->apply(p.get(), q.get()); - p->compute_dot(q.get(), beta.get()); - exec->run(cg::make_step_2(dense_x, r.get(), p.get(), q.get(), - beta.get(), rho.get(), &stop_status)); + p->compute_conj_dot(q.get(), beta.get()); // tmp = rho / beta // x = x + tmp * p // r = r - tmp * q + exec->run(cg::make_step_2(dense_x, r.get(), p.get(), q.get(), + beta.get(), rho.get(), &stop_status)); swap(prev_rho, rho); } } @@ -163,12 +185,14 @@ template void Cg::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - auto dense_x = as>(x); - - auto x_clone = dense_x->clone(); - this->apply(b, x_clone.get()); - dense_x->scale(beta); - dense_x->add_scaled(alpha, x_clone.get()); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_dense_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); } diff --git a/core/solver/cg_kernels.hpp b/core/solver/cg_kernels.hpp index 3a52974033a..2cfa476c744 100644 --- a/core/solver/cg_kernels.hpp +++ b/core/solver/cg_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_SOLVER_CG_KERNELS_HPP_ +#include + + #include #include #include @@ -119,6 +122,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace cg { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace cg +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/solver/cgs.cpp b/core/solver/cgs.cpp index f92f9afc30f..c33a3b7b9c1 100644 --- a/core/solver/cgs.cpp +++ b/core/solver/cgs.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -87,11 +88,21 @@ std::unique_ptr Cgs::conj_transpose() const template void Cgs::apply_impl(const LinOp *b, LinOp *x) const +{ + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->apply_dense_impl(dense_b, dense_x); + }, + b, x); +} + + +template +void Cgs::apply_dense_impl(const matrix::Dense *dense_b, + matrix::Dense *dense_x) const { using std::swap; using Vector = matrix::Dense; - auto dense_b = as(b); - auto dense_x = as(x); constexpr uint8 RelativeStoppingId{1}; @@ -128,55 +139,64 @@ void Cgs::apply_impl(const LinOp *b, LinOp *x) const // r = dense_b // r_tld = r // rho = 0.0 - // rho_prev = 1.0 + // rho_prev = alpha = beta = gamma = 1.0 // p = q = u = u_hat = v_hat = t = 0 system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), r.get()); auto stop_criterion = stop_criterion_factory_->generate( - system_matrix_, std::shared_ptr(b, [](const LinOp *) {}), - x, r.get()); + system_matrix_, + std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + r.get()); r_tld->copy_from(r.get()); - int iter = 0; + int iter = -1; + /* Memory movement summary: + * 28n * values + 2 * matrix/preconditioner storage + * 2x SpMV: 4n * values + 2 * storage + * 2x Preconditioner: 4n * values + 2 * storage + * 2x dot 4n + * 1x step 1 (fused axpys) 5n + * 1x step 2 (fused axpys) 4n + * 1x step 3 (axpys) 6n + * 1x norm2 residual n + */ while (true) { - r->compute_dot(r_tld.get(), rho.get()); + r->compute_conj_dot(r_tld.get(), rho.get()); + + ++iter; + this->template log( + this, iter, r.get(), dense_x, nullptr, rho.get()); + if (stop_criterion->update() + .num_iterations(iter) + .residual(r.get()) + .implicit_sq_residual_norm(rho.get()) + .solution(dense_x) + .check(RelativeStoppingId, true, &stop_status, &one_changed)) { + break; + } + + // beta = rho / rho_prev + // u = r + beta * q + // p = u + beta * ( q + beta * p ) exec->run(cgs::make_step_1(r.get(), u.get(), p.get(), q.get(), beta.get(), rho.get(), rho_prev.get(), &stop_status)); - // beta = rho / rho_prev - // u = r + beta * q; - // p = u + beta * ( q + beta * p ); get_preconditioner()->apply(p.get(), t.get()); system_matrix_->apply(t.get(), v_hat.get()); - r_tld->compute_dot(v_hat.get(), gamma.get()); + r_tld->compute_conj_dot(v_hat.get(), gamma.get()); + // alpha = rho / gamma + // q = u - alpha * v_hat + // t = u + q exec->run(cgs::make_step_2(u.get(), v_hat.get(), q.get(), t.get(), alpha.get(), rho.get(), gamma.get(), &stop_status)); - ++iter; - this->template log(this, iter, r.get(), - dense_x); - - // alpha = rho / gamma - // q = u - alpha * v_hat - // t = u + q get_preconditioner()->apply(t.get(), u_hat.get()); system_matrix_->apply(u_hat.get(), t.get()); + // r = r - alpha * t + // x = x + alpha * u_hat exec->run(cgs::make_step_3(t.get(), u_hat.get(), r.get(), dense_x, alpha.get(), &stop_status)); - // r = r -alpha * t - // x = x + alpha * u_hat - - ++iter; - this->template log(this, iter, r.get(), - dense_x); - if (stop_criterion->update() - .num_iterations(iter) - .residual(r.get()) - .solution(dense_x) - .check(RelativeStoppingId, true, &stop_status, &one_changed)) { - break; - } swap(rho_prev, rho); } @@ -187,12 +207,14 @@ template void Cgs::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - auto dense_x = as>(x); - - auto x_clone = dense_x->clone(); - this->apply(b, x_clone.get()); - dense_x->scale(beta); - dense_x->add_scaled(alpha, x_clone.get()); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_dense_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); } diff --git a/core/solver/cgs_kernels.hpp b/core/solver/cgs_kernels.hpp index 1404303b2ce..3c10d782062 100644 --- a/core/solver/cgs_kernels.hpp +++ b/core/solver/cgs_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_SOLVER_CGS_KERNELS_HPP_ +#include + + #include #include #include @@ -135,6 +138,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace cgs { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace cgs +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/solver/fcg.cpp b/core/solver/fcg.cpp index 595476f1637..ee1eff61434 100644 --- a/core/solver/fcg.cpp +++ b/core/solver/fcg.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -84,11 +85,21 @@ std::unique_ptr Fcg::conj_transpose() const template void Fcg::apply_impl(const LinOp *b, LinOp *x) const +{ + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->apply_dense_impl(dense_b, dense_x); + }, + b, x); +} + + +template +void Fcg::apply_dense_impl(const matrix::Dense *dense_b, + matrix::Dense *dense_x) const { using std::swap; using Vector = matrix::Dense; - auto dense_b = as(b); - auto dense_x = as(x); constexpr uint8 RelativeStoppingId{1}; @@ -126,39 +137,50 @@ void Fcg::apply_impl(const LinOp *b, LinOp *x) const system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), r.get()); auto stop_criterion = stop_criterion_factory_->generate( - system_matrix_, std::shared_ptr(b, [](const LinOp *) {}), - x, r.get()); + system_matrix_, + std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + r.get()); int iter = -1; + /* Memory movement summary: + * 21n * values + matrix/preconditioner storage + * 1x SpMV: 2n * values + storage + * 1x Preconditioner: 2n * values + storage + * 3x dot 6n + * 1x step 1 (axpy) 3n + * 1x step 2 (fused axpys) 7n + * 1x norm2 residual n + */ while (true) { get_preconditioner()->apply(r.get(), z.get()); - r->compute_dot(z.get(), rho.get()); - t->compute_dot(z.get(), rho_t.get()); + r->compute_conj_dot(z.get(), rho.get()); + t->compute_conj_dot(z.get(), rho_t.get()); ++iter; - this->template log(this, iter, r.get(), - dense_x); + this->template log( + this, iter, r.get(), dense_x, nullptr, rho.get()); if (stop_criterion->update() .num_iterations(iter) .residual(r.get()) + .implicit_sq_residual_norm(rho.get()) .solution(dense_x) .check(RelativeStoppingId, true, &stop_status, &one_changed)) { break; } - exec->run(fcg::make_step_1(p.get(), z.get(), rho_t.get(), - prev_rho.get(), &stop_status)); // tmp = rho_t / prev_rho // p = z + tmp * p + exec->run(fcg::make_step_1(p.get(), z.get(), rho_t.get(), + prev_rho.get(), &stop_status)); system_matrix_->apply(p.get(), q.get()); - p->compute_dot(q.get(), beta.get()); - exec->run(fcg::make_step_2(dense_x, r.get(), t.get(), p.get(), q.get(), - beta.get(), rho.get(), &stop_status)); + p->compute_conj_dot(q.get(), beta.get()); // tmp = rho / beta // [prev_r = r] in registers // x = x + tmp * p // r = r - tmp * q // t = r - [prev_r] + exec->run(fcg::make_step_2(dense_x, r.get(), t.get(), p.get(), q.get(), + beta.get(), rho.get(), &stop_status)); swap(prev_rho, rho); } } @@ -168,11 +190,14 @@ template void Fcg::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - auto dense_x = as>(x); - auto x_clone = dense_x->clone(); - this->apply(b, x_clone.get()); - dense_x->scale(beta); - dense_x->add_scaled(alpha, x_clone.get()); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_dense_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); } diff --git a/core/solver/fcg_kernels.hpp b/core/solver/fcg_kernels.hpp index dc269f2fa19..04c385f0e3e 100644 --- a/core/solver/fcg_kernels.hpp +++ b/core/solver/fcg_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_SOLVER_FCG_KERNELS_HPP_ +#include + + #include #include #include @@ -120,6 +123,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace fcg { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace fcg +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index 9e9c39c3848..ac0c484a34b 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -94,8 +95,18 @@ std::unique_ptr Gmres::conj_transpose() const template void Gmres::apply_impl(const LinOp *b, LinOp *x) const { - GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix_); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->apply_dense_impl(dense_b, dense_x); + }, + b, x); +} + +template +void Gmres::apply_dense_impl(const matrix::Dense *dense_b, + matrix::Dense *dense_x) const +{ using Vector = matrix::Dense; using NormVector = matrix::Dense>; @@ -106,12 +117,11 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const auto one_op = initialize({one()}, exec); auto neg_one_op = initialize({-one()}, exec); - auto dense_b = as(b); - auto dense_x = as(x); auto residual = Vector::create_with_config_of(dense_b); - auto krylov_bases = Vector::create( - exec, dim<2>{system_matrix_->get_size()[1] * (krylov_dim_ + 1), - dense_b->get_size()[1]}); + auto krylov_bases = Vector::create_with_type_of( + dense_b, exec, + dim<2>{system_matrix_->get_size()[1] * (krylov_dim_ + 1), + dense_b->get_size()[1]}); std::shared_ptr> preconditioned_vector = Vector::create_with_config_of(dense_b); auto hessenberg = Vector::create( @@ -150,8 +160,9 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const // final_iter_nums = {0, ..., 0} auto stop_criterion = stop_criterion_factory_->generate( - system_matrix_, std::shared_ptr(b, [](const LinOp *) {}), - x, residual.get()); + system_matrix_, + std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + residual.get()); int total_iter = -1; size_type restart_iter = 0; @@ -161,6 +172,24 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const auto after_preconditioner = matrix::Dense::create_with_config_of(dense_x); + /* Memory movement summary for average iteration with krylov_dim d: + * (5/2d+21/2+14/d)n * values + (1+1/d) * matrix/preconditioner storage + * 1x SpMV: 2n * values + storage + * 1x Preconditioner: 2n * values + storage + * MGS: (5/2d+11/2)n = sum k=0 to d-1 of (5k+8)n/d + * 1x dots 2(k+1)n in iteration k (0-based) + * 1x axpys 3(k+1)n in iteration k (0-based) + * 1x norm2 n + * 1x scal 2n + * Restart: (1+14/d)n (every dth iteration) + * 1x gemv (d+1)n + * 1x Preconditioner 2n * values + storage + * 1x axpy 3n + * 1x copy 2n + * 1x Advanced SpMV 3n * values + storage + * 1x norm2 n + * 1x scal 2n + */ while (true) { ++total_iter; this->template log( @@ -177,32 +206,31 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const if (restart_iter == krylov_dim_) { // Restart + // Solve upper triangular. + // y = hessenberg \ residual_norm_collection + // before_preconditioner = krylov_bases * y exec->run(gmres::make_step_2(residual_norm_collection.get(), krylov_bases.get(), hessenberg.get(), y.get(), before_preconditioner.get(), &final_iter_nums)); - // Solve upper triangular. - // y = hessenberg \ residual_norm_collection - // before_preconditioner = krylov_bases * y + // x = x + get_preconditioner() * before_preconditioner get_preconditioner()->apply(before_preconditioner.get(), after_preconditioner.get()); dense_x->add_scaled(one_op.get(), after_preconditioner.get()); - // Solve x - // x = x + get_preconditioner() * before_preconditioner - residual->copy_from(dense_b); // residual = dense_b + residual->copy_from(dense_b); + // residual = residual - Ax system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), residual.get()); - // residual = residual - Ax - exec->run(gmres::make_initialize_2( - residual.get(), residual_norm.get(), - residual_norm_collection.get(), krylov_bases.get(), - &final_iter_nums, krylov_dim_)); // residual_norm = norm(residual) // residual_norm_collection = {residual_norm, unchanged} // krylov_bases(:, 1) = residual / residual_norm // final_iter_nums = {0, ..., 0} + exec->run(gmres::make_initialize_2( + residual.get(), residual_norm.get(), + residual_norm_collection.get(), krylov_bases.get(), + &final_iter_nums, krylov_dim_)); restart_iter = 0; } auto this_krylov = krylov_bases->create_submatrix( @@ -214,9 +242,9 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const span{system_matrix_->get_size()[0] * (restart_iter + 1), system_matrix_->get_size()[0] * (restart_iter + 2)}, span{0, dense_b->get_size()[1]}); + // preconditioned_vector = get_preconditioner() * this_krylov get_preconditioner()->apply(this_krylov.get(), preconditioned_vector.get()); - // preconditioned_vector = get_preconditioner() * this_krylov // Do Arnoldi and givens rotation auto hessenberg_iter = hessenberg->create_submatrix( @@ -225,14 +253,9 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const dense_b->get_size()[1] * (restart_iter + 1)}); // Start of arnoldi - system_matrix_->apply(preconditioned_vector.get(), next_krylov.get()); // next_krylov = A * preconditioned_vector + system_matrix_->apply(preconditioned_vector.get(), next_krylov.get()); - exec->run(gmres::make_step_1( - dense_b->get_size()[0], givens_sin.get(), givens_cos.get(), - residual_norm.get(), residual_norm_collection.get(), - krylov_bases.get(), hessenberg_iter.get(), restart_iter, - &final_iter_nums, &stop_status)); // final_iter_nums += 1 (unconverged) // next_krylov_basis is alias for (restart_iter + 1)-th krylov_bases // for i in 0:restart_iter(include) @@ -269,6 +292,11 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const // residual_norm_collection(restart_iter) = cos(restart_iter) * this_rnc // residual_norm = abs(next_rnc) // residual_norm_collection(restart_iter + 1) = next_rnc + exec->run(gmres::make_step_1( + dense_b->get_size()[0], givens_sin.get(), givens_cos.get(), + residual_norm.get(), residual_norm_collection.get(), + krylov_bases.get(), hessenberg_iter.get(), restart_iter, + &final_iter_nums, &stop_status)); restart_iter++; } @@ -281,32 +309,32 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const span{0, restart_iter}, span{0, dense_b->get_size()[1] * (restart_iter)}); + // Solve upper triangular. + // y = hessenberg \ residual_norm_collection + // before_preconditioner = krylov_bases * y exec->run(gmres::make_step_2( residual_norm_collection.get(), krylov_bases_small.get(), hessenberg_small.get(), y.get(), before_preconditioner.get(), &final_iter_nums)); - // Solve upper triangular. - // y = hessenberg \ residual_norm_collection - // before_preconditioner = krylov_bases * y + // x = x + get_preconditioner() * before_preconditioner get_preconditioner()->apply(before_preconditioner.get(), after_preconditioner.get()); dense_x->add_scaled(one_op.get(), after_preconditioner.get()); - // Solve x - // x = x + get_preconditioner() * before_preconditioner } template void Gmres::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *residual_norm_collection, - LinOp *x) const + const LinOp *beta, LinOp *x) const { - auto dense_x = as>(x); - - auto x_clone = dense_x->clone(); - this->apply(b, x_clone.get()); - dense_x->scale(residual_norm_collection); - dense_x->add_scaled(alpha, x_clone.get()); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_dense_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); } diff --git a/core/solver/gmres_kernels.hpp b/core/solver/gmres_kernels.hpp index 644a8cf708e..81ca349a99e 100644 --- a/core/solver/gmres_kernels.hpp +++ b/core/solver/gmres_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -135,6 +135,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace gmres { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace gmres +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/solver/idr.cpp b/core/solver/idr.cpp new file mode 100644 index 00000000000..411ab892dd9 --- /dev/null +++ b/core/solver/idr.cpp @@ -0,0 +1,313 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/components/fill_array.hpp" +#include "core/solver/idr_kernels.hpp" + + +namespace gko { +namespace solver { +namespace idr { + + +GKO_REGISTER_OPERATION(initialize, idr::initialize); +GKO_REGISTER_OPERATION(step_1, idr::step_1); +GKO_REGISTER_OPERATION(step_2, idr::step_2); +GKO_REGISTER_OPERATION(step_3, idr::step_3); +GKO_REGISTER_OPERATION(compute_omega, idr::compute_omega); +GKO_REGISTER_OPERATION(fill_array, components::fill_array); + + +} // namespace idr + + +template +std::unique_ptr Idr::transpose() const +{ + return build() + .with_generated_preconditioner( + share(as(this->get_preconditioner())->transpose())) + .with_criteria(this->stop_criterion_factory_) + .on(this->get_executor()) + ->generate( + share(as(this->get_system_matrix())->transpose())); +} + + +template +std::unique_ptr Idr::conj_transpose() const +{ + return build() + .with_generated_preconditioner(share( + as(this->get_preconditioner())->conj_transpose())) + .with_criteria(this->stop_criterion_factory_) + .on(this->get_executor()) + ->generate(share( + as(this->get_system_matrix())->conj_transpose())); +} + + +template +template +void Idr::iterate(const matrix::Dense *dense_b, + matrix::Dense *dense_x) const +{ + using std::swap; + using Vector = matrix::Dense; + using NormVector = matrix::Dense>; + + auto exec = this->get_executor(); + + auto one_op = + initialize>({one()}, exec); + auto neg_one_op = + initialize>({-one()}, exec); + auto subspace_neg_one_op = initialize({-one()}, exec); + + constexpr uint8 RelativeStoppingId{1}; + + const auto problem_size = system_matrix_->get_size()[0]; + const auto nrhs = dense_b->get_size()[1]; + + auto residual = Vector::create_with_config_of(dense_b); + auto v = Vector::create_with_config_of(dense_b); + auto t = Vector::create_with_config_of(dense_b); + auto helper = Vector::create_with_config_of(dense_b); + + auto m = + Vector::create(exec, gko::dim<2>{subspace_dim_, subspace_dim_ * nrhs}); + + auto g = + Vector::create(exec, gko::dim<2>{problem_size, subspace_dim_ * nrhs}); + auto u = + Vector::create(exec, gko::dim<2>{problem_size, subspace_dim_ * nrhs}); + + auto f = Vector::create(exec, gko::dim<2>{subspace_dim_, nrhs}); + auto c = Vector::create(exec, gko::dim<2>{subspace_dim_, nrhs}); + + auto omega = Vector::create(exec, gko::dim<2>{1, nrhs}); + auto residual_norm = NormVector::create(exec, dim<2>{1, nrhs}); + auto tht = Vector::create(exec, dim<2>{1, nrhs}); + auto t_norm = NormVector::create(exec, dim<2>{1, nrhs}); + auto alpha = Vector::create(exec, gko::dim<2>{1, nrhs}); + + bool one_changed{}; + Array stop_status(exec, nrhs); + + // The dense matrix containing the randomly generated subspace vectors. + // Stored in column major order and complex conjugated. So, if the + // matrix containing the subspace vectors in row major order is called P, + // subspace_vectors actually contains P^H. + auto subspace_vectors = + Vector::create(exec, gko::dim<2>(subspace_dim_, problem_size)); + + // Initialization + // m = identity + exec->run(idr::make_initialize(nrhs, m.get(), subspace_vectors.get(), + deterministic_, &stop_status)); + + // omega = 1 + exec->run( + idr::make_fill_array(omega->get_values(), nrhs, one())); + + // residual = b - Ax + residual->copy_from(dense_b); + system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), + residual.get()); + residual->compute_norm2(residual_norm.get()); + + // g = u = 0 + exec->run(idr::make_fill_array( + g->get_values(), problem_size * g->get_stride(), zero())); + exec->run(idr::make_fill_array( + u->get_values(), problem_size * u->get_stride(), zero())); + + + auto stop_criterion = stop_criterion_factory_->generate( + system_matrix_, + std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + residual.get()); + + int total_iter = -1; + + /* Memory movement summary for iteration with subspace dimension s + * Per iteration: + * (11/2s^2+31/2s+18)n * values + (s+1) * matrix/preconditioner storage + * (s+1)x SpMV: 2(s+1)n * values + (s+1) * storage + * (s+1)x Preconditioner: 2(s+1)n * values + (s+1) * storage + * 1x multidot (gemv) (s+1)n + * sx step 1 (fused axpys) s(s/2+5/2)n = sum k=[0,s) of (s-k+2)n + * sx step 2 (fused axpys) s(s/2+5/2)n = sum k=[0,s) of (s-k+2)n + * sx step 3: s(9/2s+11/2)n = sum k=[0,s) of (8k+2+s-k+1+6)n + * 1x orthogonalize g+u (8k+2)n in iteration k (0-based) + * 1x multidot (gemv) (s-k+1)n in iteration k (0-based) + * 2x axpy 6n + * 1x dot 2n + * 2x norm2 2n + * 1x scale 2n + * 2x axpy 6n + * 1x norm2 residual n + */ + while (true) { + ++total_iter; + this->template log( + this, total_iter, residual.get(), dense_x); + + if (stop_criterion->update() + .num_iterations(total_iter) + .residual(residual.get()) + .residual_norm(residual_norm.get()) + .solution(dense_x) + .check(RelativeStoppingId, true, &stop_status, &one_changed)) { + break; + } + + // f = P^H * residual + subspace_vectors->apply(residual.get(), f.get()); + + for (size_type k = 0; k < subspace_dim_; k++) { + // c = M \ f = (c_1, ..., c_s)^T + // v = residual - sum i=[k,s) of (c_i * g_i) + exec->run(idr::make_step_1(nrhs, k, m.get(), f.get(), + residual.get(), g.get(), c.get(), + v.get(), &stop_status)); + + get_preconditioner()->apply(v.get(), helper.get()); + + // u_k = omega * precond_vector + sum i=[k,s) of (c_i * u_i) + exec->run(idr::make_step_2(nrhs, k, omega.get(), helper.get(), + c.get(), u.get(), &stop_status)); + + auto u_k = u->create_submatrix(span{0, problem_size}, + span{k * nrhs, (k + 1) * nrhs}); + + // g_k = Au_k + system_matrix_->apply(u_k.get(), helper.get()); + + // for i = [0,k) + // alpha = p^H_i * g_k / m_i,i + // g_k -= alpha * g_i + // u_k -= alpha * u_i + // end for + // store g_k to g + // for i = [k,s) + // m_i,k = p^H_i * g_k + // end for + // beta = f_k / m_k,k + // residual -= beta * g_k + // dense_x += beta * u_k + // f = (0,...,0,f_k+1 - beta * m_k+1,k,...,f_s-1 - beta * m_s-1,k) + exec->run(idr::make_step_3(nrhs, k, subspace_vectors.get(), g.get(), + helper.get(), u.get(), m.get(), f.get(), + alpha.get(), residual.get(), dense_x, + &stop_status)); + } + + get_preconditioner()->apply(residual.get(), helper.get()); + system_matrix_->apply(helper.get(), t.get()); + + t->compute_conj_dot(residual.get(), omega.get()); + t->compute_conj_dot(t.get(), tht.get()); + residual->compute_norm2(residual_norm.get()); + + // omega = (t^H * residual) / (t^H * t) + // rho = (t^H * residual) / (norm(t) * norm(residual)) + // if abs(rho) < kappa then + // omega *= kappa / abs(rho) + // end if + // residual -= omega * t + // dense_x += omega * v + exec->run(idr::make_compute_omega(nrhs, kappa_, tht.get(), + residual_norm.get(), omega.get(), + &stop_status)); + + t->scale(subspace_neg_one_op.get()); + residual->add_scaled(omega.get(), t.get()); + dense_x->add_scaled(omega.get(), helper.get()); + } +} + + +template +void Idr::apply_impl(const LinOp *b, LinOp *x) const +{ + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + // If ValueType is complex, the subspace matrix P will be complex + // anyway. + if (!is_complex() && complex_subspace_) { + auto complex_b = dense_b->make_complex(); + auto complex_x = dense_x->make_complex(); + this->iterate(complex_b.get(), complex_x.get()); + complex_x->get_real( + dynamic_cast> *>( + dense_x)); + } else { + this->iterate(dense_b, dense_x); + } + }, + b, x); +} + + +template +void Idr::apply_impl(const LinOp *alpha, const LinOp *b, + const LinOp *beta, LinOp *x) const +{ + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); +} + + +#define GKO_DECLARE_IDR(_type) class Idr<_type> +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR); + + +} // namespace solver +} // namespace gko diff --git a/core/solver/idr_kernels.hpp b/core/solver/idr_kernels.hpp new file mode 100644 index 00000000000..6a6e43f45ef --- /dev/null +++ b/core/solver/idr_kernels.hpp @@ -0,0 +1,162 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_SOLVER_IDR_KERNELS_HPP_ +#define GKO_CORE_SOLVER_IDR_KERNELS_HPP_ + + +#include +#include +#include +#include +#include + + +namespace gko { +namespace kernels { +namespace idr { + + +#define GKO_DECLARE_IDR_INITIALIZE_KERNEL(_type) \ + void initialize(std::shared_ptr exec, \ + const size_type nrhs, matrix::Dense<_type> *m, \ + matrix::Dense<_type> *subspace_vectors, \ + bool deterministic, Array *stop_status) + + +#define GKO_DECLARE_IDR_STEP_1_KERNEL(_type) \ + void step_1( \ + std::shared_ptr exec, const size_type nrhs, \ + const size_type k, const matrix::Dense<_type> *m, \ + const matrix::Dense<_type> *f, const matrix::Dense<_type> *residual, \ + const matrix::Dense<_type> *g, matrix::Dense<_type> *c, \ + matrix::Dense<_type> *v, const Array *stop_status) + + +#define GKO_DECLARE_IDR_STEP_2_KERNEL(_type) \ + void step_2(std::shared_ptr exec, \ + const size_type nrhs, const size_type k, \ + const matrix::Dense<_type> *omega, \ + const matrix::Dense<_type> *preconditioned_vector, \ + const matrix::Dense<_type> *c, matrix::Dense<_type> *u, \ + const Array *stop_status) + + +#define GKO_DECLARE_IDR_STEP_3_KERNEL(_type) \ + void step_3(std::shared_ptr exec, \ + const size_type nrhs, const size_type k, \ + const matrix::Dense<_type> *p, matrix::Dense<_type> *g, \ + matrix::Dense<_type> *g_k, matrix::Dense<_type> *u, \ + matrix::Dense<_type> *m, matrix::Dense<_type> *f, \ + matrix::Dense<_type> *alpha, matrix::Dense<_type> *residual, \ + matrix::Dense<_type> *x, \ + const Array *stop_status) + + +#define GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL(_type) \ + void compute_omega( \ + std::shared_ptr exec, const size_type nrhs, \ + const remove_complex<_type> kappa, const matrix::Dense<_type> *tht, \ + const matrix::Dense> *residual_norm, \ + matrix::Dense<_type> *omega, \ + const Array *stop_status) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_IDR_INITIALIZE_KERNEL(ValueType); \ + template \ + GKO_DECLARE_IDR_STEP_1_KERNEL(ValueType); \ + template \ + GKO_DECLARE_IDR_STEP_2_KERNEL(ValueType); \ + template \ + GKO_DECLARE_IDR_STEP_3_KERNEL(ValueType); \ + template \ + GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL(ValueType) + + +} // namespace idr + + +namespace omp { +namespace idr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace idr +} // namespace omp + + +namespace cuda { +namespace idr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace idr +} // namespace cuda + + +namespace reference { +namespace idr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace idr +} // namespace reference + + +namespace hip { +namespace idr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace idr +} // namespace hip + + +namespace dpcpp { +namespace idr { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace idr +} // namespace dpcpp + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_SOLVER_IDR_KERNELS_HPP_ diff --git a/core/solver/ir.cpp b/core/solver/ir.cpp index 63e80f86c04..5d7aa749b87 100644 --- a/core/solver/ir.cpp +++ b/core/solver/ir.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -80,6 +81,18 @@ std::unique_ptr Ir::conj_transpose() const template void Ir::apply_impl(const LinOp *b, LinOp *x) const +{ + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->apply_dense_impl(dense_b, dense_x); + }, + b, x); +} + + +template +void Ir::apply_dense_impl(const matrix::Dense *dense_b, + matrix::Dense *dense_x) const { using Vector = matrix::Dense; constexpr uint8 relative_stopping_id{1}; @@ -88,8 +101,6 @@ void Ir::apply_impl(const LinOp *b, LinOp *x) const auto one_op = initialize({one()}, exec); auto neg_one_op = initialize({-one()}, exec); - auto dense_b = as(b); - auto dense_x = as(x); auto residual = Vector::create_with_config_of(dense_b); auto inner_solution = Vector::create_with_config_of(dense_b); @@ -102,8 +113,9 @@ void Ir::apply_impl(const LinOp *b, LinOp *x) const lend(residual)); auto stop_criterion = stop_criterion_factory_->generate( - system_matrix_, std::shared_ptr(b, [](const LinOp *) {}), - x, lend(residual)); + system_matrix_, + std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + lend(residual)); int iter = -1; while (true) { @@ -152,12 +164,14 @@ template void Ir::apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, LinOp *x) const { - auto dense_x = as>(x); - - auto x_clone = dense_x->clone(); - this->apply(b, x_clone.get()); - dense_x->scale(beta); - dense_x->add_scaled(alpha, x_clone.get()); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_dense_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); } diff --git a/core/solver/ir_kernels.hpp b/core/solver/ir_kernels.hpp index 9fe59ba4a6c..2f96a445e3d 100644 --- a/core/solver/ir_kernels.hpp +++ b/core/solver/ir_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CORE_SOLVER_IR_KERNELS_HPP_ +#include + + #include #include #include @@ -92,6 +95,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace ir { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace ir +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/solver/lower_trs.cpp b/core/solver/lower_trs.cpp index bb4bb19c25b..9c9f8306a92 100644 --- a/core/solver/lower_trs.cpp +++ b/core/solver/lower_trs.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -101,32 +102,36 @@ void LowerTrs::generate() template void LowerTrs::apply_impl(const LinOp *b, LinOp *x) const { - using Vector = matrix::Dense; - const auto exec = this->get_executor(); - - auto dense_b = as(b); - auto dense_x = as(x); - - // This kernel checks if a transpose is needed for the multiple rhs case. - // Currently only the algorithm for CUDA version <=9.1 needs this - // transposition due to the limitation in the cusparse algorithm. The other - // executors (omp and reference) do not use the transpose (trans_x and - // trans_b) and hence are passed in empty pointers. - bool do_transpose = false; - std::shared_ptr trans_b; - std::shared_ptr trans_x; - this->get_executor()->run( - lower_trs::make_should_perform_transpose(do_transpose)); - if (do_transpose) { - trans_b = Vector::create(exec, gko::transpose(dense_b->get_size())); - trans_x = Vector::create(exec, gko::transpose(dense_x->get_size())); - } else { - trans_b = Vector::create(exec); - trans_x = Vector::create(exec); - } - exec->run(lower_trs::make_solve( - gko::lend(system_matrix_), gko::lend(this->solve_struct_), - gko::lend(trans_b), gko::lend(trans_x), dense_b, dense_x)); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + using Vector = matrix::Dense; + const auto exec = this->get_executor(); + + // This kernel checks if a transpose is needed for the multiple rhs + // case. Currently only the algorithm for CUDA version <=9.1 needs + // this transposition due to the limitation in the cusparse + // algorithm. The other executors (omp and reference) do not use the + // transpose (trans_x and trans_b) and hence are passed in empty + // pointers. + bool do_transpose = false; + std::shared_ptr trans_b; + std::shared_ptr trans_x; + this->get_executor()->run( + lower_trs::make_should_perform_transpose(do_transpose)); + if (do_transpose) { + trans_b = + Vector::create(exec, gko::transpose(dense_b->get_size())); + trans_x = + Vector::create(exec, gko::transpose(dense_x->get_size())); + } else { + trans_b = Vector::create(exec); + trans_x = Vector::create(exec); + } + exec->run(lower_trs::make_solve( + gko::lend(system_matrix_), gko::lend(this->solve_struct_), + gko::lend(trans_b), gko::lend(trans_x), dense_b, dense_x)); + }, + b, x); } @@ -136,12 +141,14 @@ void LowerTrs::apply_impl(const LinOp *alpha, const LinOp *beta, LinOp *x) const { - auto dense_x = as>(x); - - auto x_clone = dense_x->clone(); - this->apply(b, x_clone.get()); - dense_x->scale(beta); - dense_x->add_scaled(alpha, gko::lend(x_clone)); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); } diff --git a/core/solver/lower_trs_kernels.hpp b/core/solver/lower_trs_kernels.hpp index 799c50129e0..5f9f272417a 100644 --- a/core/solver/lower_trs_kernels.hpp +++ b/core/solver/lower_trs_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -123,6 +123,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace lower_trs { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace lower_trs +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/solver/upper_trs.cpp b/core/solver/upper_trs.cpp index 236de82a27b..d4529fefd69 100644 --- a/core/solver/upper_trs.cpp +++ b/core/solver/upper_trs.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -101,32 +102,36 @@ void UpperTrs::generate() template void UpperTrs::apply_impl(const LinOp *b, LinOp *x) const { - using Vector = matrix::Dense; - const auto exec = this->get_executor(); - - auto dense_b = as(b); - auto dense_x = as(x); - - // This kernel checks if a transpose is needed for the multiple rhs case. - // Currently only the algorithm for CUDA version <=9.1 needs this - // transposition due to the limitation in the cusparse algorithm. The other - // executors (omp and reference) do not use the transpose (trans_x and - // trans_b) and hence are passed in empty pointers. - bool do_transpose = false; - std::shared_ptr trans_b; - std::shared_ptr trans_x; - this->get_executor()->run( - upper_trs::make_should_perform_transpose(do_transpose)); - if (do_transpose) { - trans_b = Vector::create(exec, gko::transpose(dense_b->get_size())); - trans_x = Vector::create(exec, gko::transpose(dense_x->get_size())); - } else { - trans_b = Vector::create(exec); - trans_x = Vector::create(exec); - } - exec->run(upper_trs::make_solve( - gko::lend(system_matrix_), gko::lend(this->solve_struct_), - gko::lend(trans_b), gko::lend(trans_x), dense_b, dense_x)); + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + using Vector = matrix::Dense; + const auto exec = this->get_executor(); + + // This kernel checks if a transpose is needed for the multiple rhs + // case. Currently only the algorithm for CUDA version <=9.1 needs + // this transposition due to the limitation in the cusparse + // algorithm. The other executors (omp and reference) do not use the + // transpose (trans_x and trans_b) and hence are passed in empty + // pointers. + bool do_transpose = false; + std::shared_ptr trans_b; + std::shared_ptr trans_x; + this->get_executor()->run( + upper_trs::make_should_perform_transpose(do_transpose)); + if (do_transpose) { + trans_b = + Vector::create(exec, gko::transpose(dense_b->get_size())); + trans_x = + Vector::create(exec, gko::transpose(dense_x->get_size())); + } else { + trans_b = Vector::create(exec); + trans_x = Vector::create(exec); + } + exec->run(upper_trs::make_solve( + gko::lend(system_matrix_), gko::lend(this->solve_struct_), + gko::lend(trans_b), gko::lend(trans_x), dense_b, dense_x)); + }, + b, x); } @@ -136,12 +141,14 @@ void UpperTrs::apply_impl(const LinOp *alpha, const LinOp *beta, LinOp *x) const { - auto dense_x = as>(x); - - auto x_clone = dense_x->clone(); - this->apply(b, x_clone.get()); - dense_x->scale(beta); - dense_x->add_scaled(alpha, gko::lend(x_clone)); + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { + auto x_clone = dense_x->clone(); + this->apply_impl(dense_b, x_clone.get()); + dense_x->scale(dense_beta); + dense_x->add_scaled(dense_alpha, x_clone.get()); + }, + alpha, b, beta, x); } diff --git a/core/solver/upper_trs_kernels.hpp b/core/solver/upper_trs_kernels.hpp index cce48ea2812..bdbc4a9b1d7 100644 --- a/core/solver/upper_trs_kernels.hpp +++ b/core/solver/upper_trs_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -123,6 +123,15 @@ GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace hip +namespace dpcpp { +namespace upper_trs { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace upper_trs +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES diff --git a/core/stop/combined.cpp b/core/stop/combined.cpp index f80df54b90b..898c6ea56d9 100644 --- a/core/stop/combined.cpp +++ b/core/stop/combined.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/stop/criterion.cpp b/core/stop/criterion.cpp index 25019d7d0d7..59d1db2839e 100644 --- a/core/stop/criterion.cpp +++ b/core/stop/criterion.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/stop/criterion_kernels.hpp b/core/stop/criterion_kernels.hpp index 07eb8f2798c..0844caba099 100644 --- a/core/stop/criterion_kernels.hpp +++ b/core/stop/criterion_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -87,6 +87,15 @@ GKO_DECLARE_SET_ALL_STATUSES_KERNEL(); } // namespace set_all_statuses } // namespace hip + + +namespace dpcpp { +namespace set_all_statuses { + +GKO_DECLARE_SET_ALL_STATUSES_KERNEL(); + +} // namespace set_all_statuses +} // namespace dpcpp } // namespace kernels } // namespace gko diff --git a/core/stop/iteration.cpp b/core/stop/iteration.cpp index 8c1a6bc5a7d..dabe099d112 100644 --- a/core/stop/iteration.cpp +++ b/core/stop/iteration.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/stop/residual_norm.cpp b/core/stop/residual_norm.cpp index 5c928bbf48d..73630204a22 100644 --- a/core/stop/residual_norm.cpp +++ b/core/stop/residual_norm.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,24 +43,57 @@ namespace residual_norm { GKO_REGISTER_OPERATION(residual_norm, residual_norm::residual_norm); -GKO_REGISTER_OPERATION(fill_array, components::fill_array); } // namespace residual_norm +namespace implicit_residual_norm { + + +GKO_REGISTER_OPERATION(implicit_residual_norm, + implicit_residual_norm::implicit_residual_norm); + + +} // namespace implicit_residual_norm + + template -bool ResidualNorm::check_impl(uint8 stoppingId, bool setFinalized, - Array *stop_status, - bool *one_changed, - const Criterion::Updater &updater) +bool ResidualNormBase::check_impl( + uint8 stopping_id, bool set_finalized, Array *stop_status, + bool *one_changed, const Criterion::Updater &updater) { const NormVector *dense_tau; if (updater.residual_norm_ != nullptr) { dense_tau = as(updater.residual_norm_); } else if (updater.residual_ != nullptr) { - auto *dense_r = as(updater.residual_); - dense_r->compute_norm2(u_dense_tau_.get()); + if (dynamic_cast(updater.residual_)) { + auto *dense_r = as(updater.residual_); + dense_r->compute_norm2(u_dense_tau_.get()); + } else { + auto *dense_r = as(updater.residual_); + dense_r->compute_norm2(u_dense_tau_.get()); + } + dense_tau = u_dense_tau_.get(); + } else if (updater.solution_ != nullptr && system_matrix_ != nullptr && + b_ != nullptr) { + auto exec = this->get_executor(); + // when LinOp is real but rhs is complex, we use real view on complex, + // so it still uses the same type of scalar in apply. + if (auto vec_b = std::dynamic_pointer_cast(b_)) { + auto dense_r = vec_b->clone(); + system_matrix_->apply(neg_one_.get(), updater.solution_, one_.get(), + dense_r.get()); + dense_r->compute_norm2(u_dense_tau_.get()); + } else if (auto vec_b = + std::dynamic_pointer_cast(b_)) { + auto dense_r = vec_b->clone(); + system_matrix_->apply(neg_one_.get(), updater.solution_, one_.get(), + dense_r.get()); + dense_r->compute_norm2(u_dense_tau_.get()); + } else { + GKO_NOT_SUPPORTED(nullptr); + } dense_tau = u_dense_tau_.get(); } else { GKO_NOT_SUPPORTED(nullptr); @@ -68,28 +101,44 @@ bool ResidualNorm::check_impl(uint8 stoppingId, bool setFinalized, bool all_converged = true; this->get_executor()->run(residual_norm::make_residual_norm( - dense_tau, starting_tau_.get(), tolerance_, stoppingId, setFinalized, - stop_status, &device_storage_, &all_converged, one_changed)); + dense_tau, starting_tau_.get(), reduction_factor_, stopping_id, + set_finalized, stop_status, &device_storage_, &all_converged, + one_changed)); return all_converged; } + template -void AbsoluteResidualNorm::initialize_starting_tau() +bool ImplicitResidualNorm::check_impl( + uint8 stopping_id, bool set_finalized, Array *stop_status, + bool *one_changed, const Criterion::Updater &updater) { - this->get_executor()->run(residual_norm::make_fill_array( - this->starting_tau_->get_values(), this->starting_tau_->get_size()[1], - gko::one>())); + const Vector *dense_tau; + if (updater.implicit_sq_residual_norm_ != nullptr) { + dense_tau = as(updater.implicit_sq_residual_norm_); + } else { + GKO_NOT_SUPPORTED(nullptr); + } + bool all_converged = true; + + this->get_executor()->run( + implicit_residual_norm::make_implicit_residual_norm( + dense_tau, this->starting_tau_.get(), this->reduction_factor_, + stopping_id, set_finalized, stop_status, &this->device_storage_, + &all_converged, one_changed)); + + return all_converged; } -#define GKO_DECLARE_RESIDUAL_NORM(_type) class ResidualNorm<_type> +#define GKO_DECLARE_RESIDUAL_NORM(_type) class ResidualNormBase<_type> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_RESIDUAL_NORM); -#define GKO_DECLARE_ABSOLUTE_RESIDUAL_NORM(_type) \ - class AbsoluteResidualNorm<_type> -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_ABSOLUTE_RESIDUAL_NORM); +#define GKO_DECLARE_IMPLICIT_RESIDUAL_NORM(_type) \ + class ImplicitResidualNorm<_type> +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM); } // namespace stop diff --git a/core/stop/residual_norm_kernels.hpp b/core/stop/residual_norm_kernels.hpp index 30407cf9b9f..cb0496b5aef 100644 --- a/core/stop/residual_norm_kernels.hpp +++ b/core/stop/residual_norm_kernels.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -63,12 +63,40 @@ namespace residual_norm { } // namespace residual_norm +namespace implicit_residual_norm { + + +#define GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL(_type) \ + void implicit_residual_norm( \ + std::shared_ptr exec, \ + const matrix::Dense<_type> *tau, \ + const matrix::Dense> *orig_tau, \ + remove_complex<_type> rel_residual_goal, uint8 stoppingId, \ + bool setFinalized, Array *stop_status, \ + Array *device_storage, bool *all_converged, bool *one_changed) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES2 \ + template \ + GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL(ValueType) + + +} // namespace implicit_residual_norm + + namespace omp { namespace residual_norm { GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace residual_norm + + +namespace implicit_residual_norm { + +GKO_DECLARE_ALL_AS_TEMPLATES2; + +} // namespace implicit_residual_norm } // namespace omp @@ -78,6 +106,13 @@ namespace residual_norm { GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace residual_norm + + +namespace implicit_residual_norm { + +GKO_DECLARE_ALL_AS_TEMPLATES2; + +} // namespace implicit_residual_norm } // namespace cuda @@ -87,6 +122,13 @@ namespace residual_norm { GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace residual_norm + + +namespace implicit_residual_norm { + +GKO_DECLARE_ALL_AS_TEMPLATES2; + +} // namespace implicit_residual_norm } // namespace reference @@ -96,10 +138,34 @@ namespace residual_norm { GKO_DECLARE_ALL_AS_TEMPLATES; } // namespace residual_norm + + +namespace implicit_residual_norm { + +GKO_DECLARE_ALL_AS_TEMPLATES2; + +} // namespace implicit_residual_norm } // namespace hip +namespace dpcpp { +namespace residual_norm { + +GKO_DECLARE_ALL_AS_TEMPLATES; + +} // namespace residual_norm + + +namespace implicit_residual_norm { + +GKO_DECLARE_ALL_AS_TEMPLATES2; + +} // namespace implicit_residual_norm +} // namespace dpcpp + + #undef GKO_DECLARE_ALL_AS_TEMPLATES +#undef GKO_DECLARE_ALL_AS_TEMPLATES2 } // namespace kernels } // namespace gko diff --git a/core/stop/time.cpp b/core/stop/time.cpp index 8ec4ad4948a..f9f28b5365d 100644 --- a/core/stop/time.cpp +++ b/core/stop/time.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/synthesizer/implementation_selection.hpp b/core/synthesizer/implementation_selection.hpp index a1c1a493029..2965ceb34a2 100644 --- a/core/synthesizer/implementation_selection.hpp +++ b/core/synthesizer/implementation_selection.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -70,6 +70,38 @@ namespace syn { } \ } +#define GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(_name, _callable) \ + template \ + inline void _name(::gko::syn::value_list, Predicate, \ + ::gko::syn::value_list, \ + ::gko::syn::value_list, \ + ::gko::syn::value_list, \ + ::gko::syn::type_list, InferredArgs...) \ + GKO_KERNEL_NOT_FOUND; \ + \ + template \ + inline void _name( \ + ::gko::syn::value_list, \ + Predicate is_eligible, \ + ::gko::syn::value_list bool_args, \ + ::gko::syn::value_list int_args, \ + ::gko::syn::value_list size_args, \ + ::gko::syn::type_list type_args, InferredArgs... args) \ + { \ + if (is_eligible(K)) { \ + _callable( \ + std::forward(args)...); \ + } else { \ + _name(::gko::syn::value_list(), \ + is_eligible, bool_args, int_args, size_args, type_args, \ + std::forward(args)...); \ + } \ + } + } // namespace syn } // namespace gko diff --git a/core/test/CMakeLists.txt b/core/test/CMakeLists.txt index f0e39d5c568..fcf1cf64777 100644 --- a/core/test/CMakeLists.txt +++ b/core/test/CMakeLists.txt @@ -1,10 +1,13 @@ -include(${CMAKE_SOURCE_DIR}/cmake/create_test.cmake) +include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) +add_subdirectory(accessor) add_subdirectory(base) add_subdirectory(factorization) add_subdirectory(log) add_subdirectory(matrix) +add_subdirectory(multigrid) add_subdirectory(preconditioner) +add_subdirectory(reorder) add_subdirectory(solver) add_subdirectory(stop) add_subdirectory(utils) diff --git a/core/test/accessor/CMakeLists.txt b/core/test/accessor/CMakeLists.txt new file mode 100644 index 00000000000..34745096ac2 --- /dev/null +++ b/core/test/accessor/CMakeLists.txt @@ -0,0 +1,29 @@ +# Creates a test that only adds `Ginkgo_SOURCE_DIR` to the include directories +# to make sure the accessors are truly independent of Ginkgo and can be used +# as a header-only library. +function(create_accessor_test test_name) + file(RELATIVE_PATH REL_BINARY_DIR + ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}") + add_executable("${TEST_TARGET_NAME}" "${test_name}.cpp") + target_compile_features("${TEST_TARGET_NAME}" PUBLIC cxx_std_14) + target_include_directories("${TEST_TARGET_NAME}" + PRIVATE + "${Ginkgo_SOURCE_DIR}" + ) + set_target_properties("${TEST_TARGET_NAME}" PROPERTIES + OUTPUT_NAME "${test_name}") + target_link_libraries("${TEST_TARGET_NAME}" PRIVATE GTest::Main GTest::GTest ${ARGN}) + add_test(NAME "${REL_BINARY_DIR}/${test_name}" + COMMAND "${TEST_TARGET_NAME}" + WORKING_DIRECTORY "$") +endfunction(create_accessor_test) + +create_accessor_test(index_span) +create_accessor_test(range) +# Accessor tests: +create_accessor_test(block_col_major) +create_accessor_test(reduced_row_major) +ginkgo_create_test(reduced_row_major_ginkgo) # needs Ginkgo for gko::half +create_accessor_test(row_major) +create_accessor_test(scaled_reduced_row_major) diff --git a/core/test/accessor/block_col_major.cpp b/core/test/accessor/block_col_major.cpp new file mode 100644 index 00000000000..7a0572c04c5 --- /dev/null +++ b/core/test/accessor/block_col_major.cpp @@ -0,0 +1,154 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include + + +#include + + +#include "accessor/block_col_major.hpp" +#include "accessor/index_span.hpp" +#include "accessor/range.hpp" + + +namespace { + + +class BlockColMajorAccessor3d : public ::testing::Test { +protected: + using span = gko::acc::index_span; + static constexpr gko::acc::size_type dimensionality{3}; + + using blk_col_major_range = + gko::acc::range>; + + // clang-format off + int data[2 * 3 * 4]{ + 1, 3, 5, + 2, 4, 6, + -1,-2,-3, + 11,12,13, + + 21,25,29, + 22,26,30, + 23,27,31, + 24,28,32 + + /* This matrix actually looks like + 1, 2, -1, 11, + 3, 4, -2, 12, + 5, 6, -3, 13, + + 21, 22, 23, 24, + 25, 26, 27, 28, + 29, 30, 31, 32 + */ + }; + // clang-format on + const std::array dim1{{2, 3, 4}}; + const std::array dim2{{2, 2, 3}}; + blk_col_major_range default_r{dim1, data}; + blk_col_major_range custom_r{ + dim2, data, + std::array{{12, 3}}}; +}; + + +TEST_F(BlockColMajorAccessor3d, ComputesCorrectStride) +{ + auto range_stride = default_r.get_accessor().stride; + auto check_stride = std::array{{12, 3}}; + + ASSERT_EQ(range_stride, check_stride); +} + + +TEST_F(BlockColMajorAccessor3d, CanAccessData) +{ + EXPECT_EQ(default_r(0, 0, 0), 1); + EXPECT_EQ(custom_r(0, 0, 0), 1); + EXPECT_EQ(default_r(0, 1, 0), 3); + EXPECT_EQ(custom_r(0, 1, 0), 3); + EXPECT_EQ(default_r(0, 1, 1), 4); + EXPECT_EQ(default_r(0, 1, 3), 12); + EXPECT_EQ(default_r(0, 2, 2), -3); + EXPECT_EQ(default_r(1, 2, 1), 30); + EXPECT_EQ(default_r(1, 2, 2), 31); + EXPECT_EQ(default_r(1, 2, 3), 32); +} + + +TEST_F(BlockColMajorAccessor3d, CanWriteData) +{ + default_r(0, 0, 0) = 4; + custom_r(1, 1, 1) = 100; + + EXPECT_EQ(default_r(0, 0, 0), 4); + EXPECT_EQ(custom_r(0, 0, 0), 4); + EXPECT_EQ(default_r(1, 1, 1), 100); + EXPECT_EQ(custom_r(1, 1, 1), 100); +} + + +TEST_F(BlockColMajorAccessor3d, CanCreateSubrange) +{ + auto subr = custom_r(span{0u, 2u}, span{1u, 2u}, span{1u, 3u}); + + EXPECT_EQ(subr(0, 0, 0), 4); + EXPECT_EQ(subr(0, 0, 1), -2); + EXPECT_EQ(subr(1, 0, 0), 26); + EXPECT_EQ(subr(1, 0, 1), 27); +} + + +TEST_F(BlockColMajorAccessor3d, CanCreateRowVector) +{ + auto subr = default_r(1u, 2u, span{0u, 2u}); + + EXPECT_EQ(subr(0, 0, 0), 29); + EXPECT_EQ(subr(0, 0, 1), 30); +} + + +TEST_F(BlockColMajorAccessor3d, CanCreateColumnVector) +{ + auto subr = default_r(span{0u, 2u}, 1u, 3u); + + EXPECT_EQ(subr(0, 0, 0), 12); + EXPECT_EQ(subr(1, 0, 0), 28); +} + + +} // namespace diff --git a/core/test/accessor/index_span.cpp b/core/test/accessor/index_span.cpp new file mode 100644 index 00000000000..3c69141d764 --- /dev/null +++ b/core/test/accessor/index_span.cpp @@ -0,0 +1,132 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include "accessor/index_span.hpp" + + +namespace { + + +TEST(IndexSpan, CreatesSpan) +{ + gko::acc::index_span s{3, 5}; + + ASSERT_EQ(s.begin, 3); + ASSERT_EQ(s.end, 5); +} + + +TEST(IndexSpan, CreatesPoint) +{ + gko::acc::index_span s{3}; + + ASSERT_EQ(s.begin, 3); + ASSERT_EQ(s.end, 4); +} + + +TEST(IndexSpan, LessThanEvaluatesToTrue) +{ + ASSERT_TRUE(gko::acc::index_span(2, 3) < gko::acc::index_span(4, 7)); +} + + +TEST(IndexSpan, LessThanEvaluatesToFalse) +{ + ASSERT_FALSE(gko::acc::index_span(2, 4) < gko::acc::index_span(4, 7)); +} + + +TEST(IndexSpan, LessOrEqualEvaluatesToTrue) +{ + ASSERT_TRUE(gko::acc::index_span(2, 4) <= gko::acc::index_span(4, 7)); +} + + +TEST(IndexSpan, LessOrEqualEvaluatesToFalse) +{ + ASSERT_FALSE(gko::acc::index_span(2, 5) <= gko::acc::index_span(4, 7)); +} + + +TEST(IndexSpan, GreaterThanEvaluatesToTrue) +{ + ASSERT_TRUE(gko::acc::index_span(4, 7) > gko::acc::index_span(2, 3)); +} + + +TEST(IndexSpan, GreaterThanEvaluatesToFalse) +{ + ASSERT_FALSE(gko::acc::index_span(4, 7) > gko::acc::index_span(2, 4)); +} + + +TEST(IndexSpan, GreaterOrEqualEvaluatesToTrue) +{ + ASSERT_TRUE(gko::acc::index_span(4, 7) >= gko::acc::index_span(2, 4)); +} + + +TEST(IndexSpan, GreaterOrEqualEvaluatesToFalse) +{ + ASSERT_FALSE(gko::acc::index_span(4, 7) >= gko::acc::index_span(2, 5)); +} + + +TEST(IndexSpan, EqualityEvaluatesToTrue) +{ + ASSERT_TRUE(gko::acc::index_span(2, 4) == gko::acc::index_span(2, 4)); +} + + +TEST(IndexSpan, EqualityEvaluatesToFalse) +{ + ASSERT_FALSE(gko::acc::index_span(3, 4) == gko::acc::index_span(2, 5)); +} + + +TEST(IndexSpan, NotEqualEvaluatesToTrue) +{ + ASSERT_TRUE(gko::acc::index_span(3, 4) != gko::acc::index_span(2, 5)); +} + + +TEST(IndexSpan, NotEqualEvaluatesToFalse) +{ + ASSERT_FALSE(gko::acc::index_span(2, 4) != gko::acc::index_span(2, 4)); +} + + +} // namespace diff --git a/core/test/accessor/range.cpp b/core/test/accessor/range.cpp new file mode 100644 index 00000000000..3708486b0f2 --- /dev/null +++ b/core/test/accessor/range.cpp @@ -0,0 +1,111 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include + + +#include + + +#include "accessor/range.hpp" + + +namespace { + + +// 0-memory constant accessor, which "stores" x*i + y*j + k at location +// (i, j, k) +struct dummy_accessor { + static constexpr std::size_t dimensionality = 3; + + dummy_accessor(std::size_t size, int x, int y) + : sizes{size, size, size}, x{x}, y{y} + {} + + dummy_accessor(std::size_t size_x, std::size_t size_y, std::size_t size_z, + int x, int y) + : sizes{size_x, size_y, size_z}, x{x}, y{y} + {} + + int operator()(int a, int b, int c) const { return x * a + y * b + c; } + + gko::acc::size_type length(std::size_t dim) const { return sizes[dim]; } + + std::array sizes; + mutable int x; + mutable int y; +}; + + +using dummy_range = gko::acc::range; + + +TEST(Range, CreatesRange) +{ + dummy_range r{5u, 2, 3}; + + EXPECT_EQ(r->x, 2); + ASSERT_EQ(r->y, 3); +} + + +TEST(Range, ForwardsCallsToAccessor) +{ + dummy_range r{5u, 2, 3}; + + EXPECT_EQ(r(1, 2, 3), 2 * 1 + 3 * 2 + 3); + ASSERT_EQ(r(4, 2, 5), 2 * 4 + 3 * 2 + 5); +} + + +TEST(Range, ForwardsCopyToAccessor) +{ + dummy_range r{5u, 2, 3}; + r = dummy_range{5u, 2, 5}; + + EXPECT_EQ(r->x, 2); + ASSERT_EQ(r->y, 5); +} + + +TEST(Range, ForwardsLength) +{ + dummy_range r{5u, 2, 3}; + + EXPECT_EQ(r->length(0), 5); + EXPECT_EQ(r->length(1), 5); + ASSERT_EQ(r->length(2), 5); +} + + +} // namespace diff --git a/core/test/accessor/reduced_row_major.cpp b/core/test/accessor/reduced_row_major.cpp new file mode 100644 index 00000000000..c60b085908e --- /dev/null +++ b/core/test/accessor/reduced_row_major.cpp @@ -0,0 +1,143 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include +#include +#include + + +#include + + +#include "accessor/index_span.hpp" +#include "accessor/range.hpp" +#include "accessor/reduced_row_major.hpp" +#include "accessor/utils.hpp" + + +namespace { + + +/** + * This test makes sure reduced_row_major works independent of Ginkgo and with + * dimensionalities 1 and 2. + */ +class ReducedStorageXd : public ::testing::Test { +protected: + using ar_type = double; + using st_type = float; + using size_type = gko::acc::size_type; + static constexpr ar_type delta{std::numeric_limits::epsilon() * + 1e1}; + + using accessor1d = gko::acc::reduced_row_major<1, ar_type, st_type>; + using accessor2d = gko::acc::reduced_row_major<2, ar_type, st_type>; + using const_accessor1d = + gko::acc::reduced_row_major<1, ar_type, const st_type>; + using const_accessor2d = + gko::acc::reduced_row_major<2, ar_type, const st_type>; + static_assert(std::is_same::value, + "Const accessors must be the same!"); + static_assert(std::is_same::value, + "Const accessors must be the same!"); + + using reduced_storage1d = gko::acc::range; + using reduced_storage2d = gko::acc::range; + using const_reduced_storage2d = gko::acc::range; + using const_reduced_storage1d = gko::acc::range; + + const std::array stride0{{}}; + const std::array stride1{{4}}; + const std::array size_1d{{8u}}; + const std::array size_2d{{2u, 4u}}; + static constexpr gko::acc::size_type data_elements{8}; + st_type data[data_elements]{1.1f, 2.2f, 3.3f, 4.4f, + 5.5f, 6.6f, 7.7f, -8.8f}; + reduced_storage1d r1{size_1d, data}; + reduced_storage2d r2{size_2d, data, stride1[0]}; + const_reduced_storage1d cr1{size_1d, data, stride0}; + const_reduced_storage2d cr2{size_2d, data, stride1}; + + template + static ar_type c_st_ar(T val) + { + return static_cast(static_cast(val)); + } + + void data_equal_except_for(int idx) + { + // clang-format off + if (idx != 0) { EXPECT_EQ(data[0], c_st_ar(1.1)); } + if (idx != 1) { EXPECT_EQ(data[1], c_st_ar(2.2)); } + if (idx != 2) { EXPECT_EQ(data[2], c_st_ar(3.3)); } + if (idx != 3) { EXPECT_EQ(data[3], c_st_ar(4.4)); } + if (idx != 4) { EXPECT_EQ(data[4], c_st_ar(5.5)); } + if (idx != 5) { EXPECT_EQ(data[5], c_st_ar(6.6)); } + if (idx != 6) { EXPECT_EQ(data[6], c_st_ar(7.7)); } + if (idx != 7) { EXPECT_EQ(data[7], c_st_ar(-8.8)); } + // clang-format on + } +}; + + +TEST_F(ReducedStorageXd, CanRead) +{ + EXPECT_EQ(cr1(1), this->c_st_ar(2.2)); + EXPECT_EQ(cr2(0, 1), this->c_st_ar(2.2)); + EXPECT_EQ(r1(1), this->c_st_ar(2.2)); + EXPECT_EQ(r2(0, 1), this->c_st_ar(2.2)); +} + + +TEST_F(ReducedStorageXd, CanWrite1) +{ + r1(2) = 0.25; + + data_equal_except_for(2); + EXPECT_EQ(r1(2), 0.25); // expect exact since easy to store +} + + +TEST_F(ReducedStorageXd, CanWrite2) +{ + r2(1, 1) = 0.75; + + data_equal_except_for(5); + EXPECT_EQ(r2(1, 1), 0.75); // expect exact since easy to store +} + + +} // namespace diff --git a/core/test/accessor/reduced_row_major_ginkgo.cpp b/core/test/accessor/reduced_row_major_ginkgo.cpp new file mode 100644 index 00000000000..c64c2f4cefe --- /dev/null +++ b/core/test/accessor/reduced_row_major_ginkgo.cpp @@ -0,0 +1,445 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include +#include +#include + + +#include + + +#include "accessor/index_span.hpp" +#include "accessor/range.hpp" +#include "accessor/reduced_row_major.hpp" +#include "accessor/utils.hpp" +#include "core/base/extended_float.hpp" // necessary for gko::half + + +namespace { + + +/** + * This test makes sure reduced_row_major works properly with various types. + * Note that this tests has a dependency on Ginkgo because of gko::half. + */ +template +class ReducedStorage3d : public ::testing::Test { +protected: + using ar_type = + typename std::tuple_element<0, decltype(ArithmeticStorageType{})>::type; + using st_type = + typename std::tuple_element<1, decltype(ArithmeticStorageType{})>::type; + using rcar_type = gko::acc::remove_complex_t; + static constexpr rcar_type delta{ + std::is_same::value + ? 0 + : std::numeric_limits< + gko::acc::remove_complex_t>::epsilon() * + 1e1}; + + // Type for `check_accessor_correctness` to forward the indices + using t = std::tuple; + using i_span = gko::acc::index_span; + + using accessor = gko::acc::reduced_row_major<3, ar_type, st_type>; + using const_accessor = + gko::acc::reduced_row_major<3, ar_type, const st_type>; + + using reduced_storage = gko::acc::range; + using const_reduced_storage = gko::acc::range; + + const std::array size{{4u, 3u, 2u}}; + static constexpr gko::acc::size_type data_elements{4 * 3 * 2}; + // clang-format off + st_type data[data_elements] { + // 0, y, z + 1.0, 2.01, + -1.02, 3.03, + 4.04, -2.05, + // 1, y, z + 5.06, 6.07, + 2.08, 3.09, + -1.1, -9.11, + // 2, y, z + -2.12, 2.13, + 0.14, 15.15, + -9.16, 8.17, + // 3, y, z + 7.18, -6.19, + 5.2, -4.21, + 3.22, -2.23 + }; + // clang-format on + reduced_storage r{size, data}; + const_reduced_storage cr{size, data}; + + // Casts val first to `st_type`, then to `ar_type` in order to be allowed + // to test for equality + template + static ar_type c_st_ar(T val) + { + return static_cast(static_cast(val)); + } + + template + void check_accessor_correctness(const Accessor &a, + std::tuple ignore = t(99, 99, + 99)) + { + // Test for equality is fine here since they should not be modified + // clang-format off + if (ignore != t(0, 0, 0)) { EXPECT_EQ(a(0, 0, 0), c_st_ar(1.0)); } + if (ignore != t(0, 0, 1)) { EXPECT_EQ(a(0, 0, 1), c_st_ar(2.01)); } + if (ignore != t(0, 1, 0)) { EXPECT_EQ(a(0, 1, 0), c_st_ar(-1.02)); } + if (ignore != t(0, 1, 1)) { EXPECT_EQ(a(0, 1, 1), c_st_ar(3.03)); } + if (ignore != t(0, 2, 0)) { EXPECT_EQ(a(0, 2, 0), c_st_ar(4.04)); } + if (ignore != t(0, 2, 1)) { EXPECT_EQ(a(0, 2, 1), c_st_ar(-2.05)); } + if (ignore != t(1, 0, 0)) { EXPECT_EQ(a(1, 0, 0), c_st_ar(5.06)); } + if (ignore != t(1, 0, 1)) { EXPECT_EQ(a(1, 0, 1), c_st_ar(6.07)); } + if (ignore != t(1, 1, 0)) { EXPECT_EQ(a(1, 1, 0), c_st_ar(2.08)); } + if (ignore != t(1, 1, 1)) { EXPECT_EQ(a(1, 1, 1), c_st_ar(3.09)); } + if (ignore != t(1, 2, 0)) { EXPECT_EQ(a(1, 2, 0), c_st_ar(-1.1)); } + if (ignore != t(1, 2, 1)) { EXPECT_EQ(a(1, 2, 1), c_st_ar(-9.11)); } + if (ignore != t(2, 0, 0)) { EXPECT_EQ(a(2, 0, 0), c_st_ar(-2.12)); } + if (ignore != t(2, 0, 1)) { EXPECT_EQ(a(2, 0, 1), c_st_ar(2.13)); } + if (ignore != t(2, 1, 0)) { EXPECT_EQ(a(2, 1, 0), c_st_ar(0.14)); } + if (ignore != t(2, 1, 1)) { EXPECT_EQ(a(2, 1, 1), c_st_ar(15.15)); } + if (ignore != t(2, 2, 0)) { EXPECT_EQ(a(2, 2, 0), c_st_ar(-9.16)); } + if (ignore != t(2, 2, 1)) { EXPECT_EQ(a(2, 2, 1), c_st_ar(8.17)); } + if (ignore != t(3, 0, 0)) { EXPECT_EQ(a(3, 0, 0), c_st_ar(7.18)); } + if (ignore != t(3, 0, 1)) { EXPECT_EQ(a(3, 0, 1), c_st_ar(-6.19)); } + if (ignore != t(3, 1, 0)) { EXPECT_EQ(a(3, 1, 0), c_st_ar(5.2)); } + if (ignore != t(3, 1, 1)) { EXPECT_EQ(a(3, 1, 1), c_st_ar(-4.21)); } + if (ignore != t(3, 2, 0)) { EXPECT_EQ(a(3, 2, 0), c_st_ar(3.22)); } + if (ignore != t(3, 2, 1)) { EXPECT_EQ(a(3, 2, 1), c_st_ar(-2.23)); } + // clang-format on + } +}; + +using ReducedStorage3dTypes = + ::testing::Types, std::tuple, + std::tuple, std::tuple, + std::tuple, + std::tuple, std::complex>, + std::tuple, std::complex>, + std::tuple, std::complex>>; + +TYPED_TEST_SUITE(ReducedStorage3d, ReducedStorage3dTypes); + + +TYPED_TEST(ReducedStorage3d, CorrectLengths) +{ + EXPECT_EQ(this->r.length(0), this->size[0]); + EXPECT_EQ(this->r.length(1), this->size[1]); + EXPECT_EQ(this->r.length(2), this->size[2]); + EXPECT_EQ(this->r.length(3), 1); + EXPECT_EQ(this->r->get_size(), this->size); +} + + +TYPED_TEST(ReducedStorage3d, CorrectStride) +{ + EXPECT_EQ(this->r->get_stride()[0], this->size[1] * this->size[2]); + EXPECT_EQ(this->r->get_stride().at(0), this->size[1] * this->size[2]); + EXPECT_EQ(this->r->get_stride()[1], this->size[2]); + EXPECT_EQ(this->r->get_stride().at(1), this->size[2]); +} + + +TYPED_TEST(ReducedStorage3d, CorrectStorage) +{ + EXPECT_EQ(this->r->get_stored_data(), this->data); + EXPECT_EQ(this->r->get_const_storage(), this->data); +} + + +TYPED_TEST(ReducedStorage3d, CanReadData) +{ + this->check_accessor_correctness(this->r); + this->check_accessor_correctness(this->cr); +} + + +TYPED_TEST(ReducedStorage3d, CanImplicitlyConvertToConst) +{ + using const_reduced_storage = typename TestFixture::const_reduced_storage; + + const_reduced_storage const_rs = this->r->to_const(); + const_reduced_storage const_rs2 = this->cr; + + this->check_accessor_correctness(const_rs); + this->check_accessor_correctness(const_rs2); +} + + +TYPED_TEST(ReducedStorage3d, ToConstWorks) +{ + using const_reduced_storage = typename TestFixture::const_reduced_storage; + + auto cr2 = this->r->to_const(); + + static_assert(std::is_same::value, + "Types must be equal!"); + this->check_accessor_correctness(cr2); +} + + +TYPED_TEST(ReducedStorage3d, CanCreateWithStride) +{ + using reduced_storage = typename TestFixture::reduced_storage; + using ar_type = typename TestFixture::ar_type; + auto size = std::array{{2, 2, 2}}; + auto stride = std::array{{12, 2}}; + + auto range = reduced_storage{size, this->data, stride}; + range(1, 1, 0) = ar_type{2.}; + + EXPECT_EQ(range(0, 0, 0), this->c_st_ar(1.0)); + EXPECT_EQ(range(0, 0, 1), this->c_st_ar(2.01)); + EXPECT_EQ(range(0, 1, 0), this->c_st_ar(-1.02)); + EXPECT_EQ(range(0, 1, 1), this->c_st_ar(3.03)); + EXPECT_EQ(range(1, 0, 0), this->c_st_ar(-2.12)); + EXPECT_EQ(range(1, 0, 1), this->c_st_ar(2.13)); + EXPECT_EQ(range(1, 1, 0), this->c_st_ar(2.)); + EXPECT_EQ(range(1, 1, 1), this->c_st_ar(15.15)); +} + + +TYPED_TEST(ReducedStorage3d, CanWriteData) +{ + using t = typename TestFixture::t; + + this->r(0, 1, 0) = 100.25; + + this->check_accessor_correctness(this->r, t(0, 1, 0)); + EXPECT_EQ(this->r(0, 1, 0), this->c_st_ar(100.25)); +} + + +TYPED_TEST(ReducedStorage3d, Assignment) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + const ar_type expected = 1.2; + + this->r(0, 0, 1) = expected; + + this->check_accessor_correctness(this->r, t(0, 0, 1)); + EXPECT_EQ(this->r(0, 0, 1), this->c_st_ar(expected)); +} + + +TYPED_TEST(ReducedStorage3d, Assignment2) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + const ar_type expected = -1.02; + + this->r(0, 0, 1) = this->r(0, 1, 0); + + this->check_accessor_correctness(this->r, t(0, 0, 1)); + EXPECT_EQ(this->r(0, 0, 1), this->c_st_ar(expected)); +} + + +TYPED_TEST(ReducedStorage3d, Addition) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + using std::abs; + const ar_type expected = 1.2 + 2.01; + + ar_type result = this->r(0, 0, 1) + ar_type{1.2}; + this->r(0, 0, 1) += 1.2; + + this->check_accessor_correctness(this->r, t(0, 0, 1)); + EXPECT_NEAR(abs(this->r(0, 0, 1)), abs(expected), TestFixture::delta); + EXPECT_NEAR(abs(result), abs(expected), TestFixture::delta); +} + + +TYPED_TEST(ReducedStorage3d, Addition2) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + using std::abs; + const ar_type expected = 2.01 + -1.02; + + auto result = this->r(0, 0, 1) + this->r(0, 1, 0); + this->r(0, 0, 1) += this->r(0, 1, 0); + + this->check_accessor_correctness(this->r, t(0, 0, 1)); + EXPECT_NEAR(abs(this->r(0, 0, 1)), abs(this->c_st_ar(expected)), + TestFixture::delta); + EXPECT_NEAR(abs(result), abs(this->c_st_ar(expected)), TestFixture::delta); +} + + +TYPED_TEST(ReducedStorage3d, Subtraction) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + using std::abs; + const ar_type expected = -2.23 - 1; + + auto result = this->r(3, 2, 1) - ar_type{1.}; + this->r(3, 2, 1) -= 1; + + this->check_accessor_correctness(this->r, t(3, 2, 1)); + EXPECT_NEAR(abs(this->r(3, 2, 1)), abs(this->c_st_ar(expected)), + TestFixture::delta); + EXPECT_NEAR(abs(result), abs(this->c_st_ar(expected)), TestFixture::delta); +} + + +TYPED_TEST(ReducedStorage3d, Subtraction2) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + using std::abs; + const ar_type expected = 3.22 - -2.23; + + auto result = this->cr(3, 2, 0) - this->r(3, 2, 1); + this->r(3, 2, 0) -= this->r(3, 2, 1); + + this->check_accessor_correctness(this->r, t(3, 2, 0)); + EXPECT_NEAR(abs(this->r(3, 2, 0)), abs(this->c_st_ar(expected)), + TestFixture::delta); + EXPECT_NEAR(abs(result), abs(this->c_st_ar(expected)), TestFixture::delta); +} + + +TYPED_TEST(ReducedStorage3d, Multiplication) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + const ar_type expected = 1 * 2; + + auto result = this->r(0, 0, 0) * ar_type{2.}; + this->r(0, 0, 0) *= 2; + + this->check_accessor_correctness(this->r, t(0, 0, 0)); + EXPECT_EQ(this->r(0, 0, 0), this->c_st_ar(expected)); + EXPECT_EQ(result, this->c_st_ar(expected)); +} + + +TYPED_TEST(ReducedStorage3d, Multiplication2) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + using std::abs; + const ar_type expected = 2.01 * 3.03; + + auto result = this->r(0, 0, 1) * this->cr(0, 1, 1); + this->r(0, 0, 1) *= this->r(0, 1, 1); + + this->check_accessor_correctness(this->r, t(0, 0, 1)); + EXPECT_NEAR(abs(this->r(0, 0, 1)), abs(expected), TestFixture::delta); + EXPECT_NEAR(abs(result), abs(expected), TestFixture::delta); +} + + +TYPED_TEST(ReducedStorage3d, Division) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + using std::abs; + const ar_type expected = 2.01 / 2.0; + + auto result = this->cr(0, 0, 1) / ar_type{2.}; + this->r(0, 0, 1) /= 2.; + + this->check_accessor_correctness(this->r, t(0, 0, 1)); + EXPECT_NEAR(abs(this->r(0, 0, 1)), abs(expected), TestFixture::delta); + EXPECT_NEAR(abs(result), abs(expected), TestFixture::delta); +} + + +TYPED_TEST(ReducedStorage3d, Division2) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + using std::abs; + const ar_type expected = 5.06 / 4.04; + + auto result = this->r(1, 0, 0) / this->cr(0, 2, 0); + this->r(1, 0, 0) /= this->r(0, 2, 0); + + this->check_accessor_correctness(this->r, t(1, 0, 0)); + EXPECT_NEAR(abs(this->r(1, 0, 0)), abs(expected), TestFixture::delta); + EXPECT_NEAR(abs(result), abs(expected), TestFixture::delta); +} + + +TYPED_TEST(ReducedStorage3d, UnaryMinus) +{ + using t = typename TestFixture::t; + using ar_type = typename TestFixture::ar_type; + const ar_type neg_expected = this->r(2, 0, 0); + const ar_type expected = -neg_expected; + + auto result = -this->r(2, 0, 0); + + this->check_accessor_correctness(this->r); + EXPECT_EQ(result, expected); +} + + +TYPED_TEST(ReducedStorage3d, CanCreateSubrange) +{ + using i_span = typename TestFixture::i_span; + auto subr = this->r(i_span{1u, 3u}, i_span{0u, 2u}, 0u); + + EXPECT_EQ(subr(0, 0, 0), this->c_st_ar(5.06)); + EXPECT_EQ(subr(0, 1, 0), this->c_st_ar(2.08)); + EXPECT_EQ(subr(1, 0, 0), this->c_st_ar(-2.12)); + EXPECT_EQ(subr(1, 1, 0), this->c_st_ar(0.14)); +} + + +TYPED_TEST(ReducedStorage3d, CanCreateSubrange2) +{ + using i_span = typename TestFixture::i_span; + auto subr = this->cr(i_span{1u, 3u}, i_span{0u, 2u}, i_span{0u, 1u}); + + EXPECT_EQ(subr(0, 0, 0), this->c_st_ar(5.06)); + EXPECT_EQ(subr(0, 1, 0), this->c_st_ar(2.08)); + EXPECT_EQ(subr(1, 0, 0), this->c_st_ar(-2.12)); + EXPECT_EQ(subr(1, 1, 0), this->c_st_ar(0.14)); +} + + +} // namespace diff --git a/core/test/accessor/row_major.cpp b/core/test/accessor/row_major.cpp new file mode 100644 index 00000000000..dd85904b634 --- /dev/null +++ b/core/test/accessor/row_major.cpp @@ -0,0 +1,232 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include + + +#include + + +#include "accessor/index_span.hpp" +#include "accessor/range.hpp" +#include "accessor/row_major.hpp" + + +namespace { + + +class RowMajorAccessor : public ::testing::Test { +protected: + using span = gko::acc::index_span; + using dim_type = std::array; + using stride_type = std::array; + + using row_major_int_range = gko::acc::range>; + + // clang-format off + int data[9]{ + 1, 2, -1, + 3, 4, -2, + 5, 6, -3 + }; + // clang-format on + row_major_int_range r{dim_type{{3u, 2u}}, data, stride_type{{3u}}}; +}; + + +TEST_F(RowMajorAccessor, CanCreateDefaultStride) +{ + row_major_int_range r2{dim_type{{3, 3}}, data}; + + EXPECT_EQ(r2(0, 0), 1); + EXPECT_EQ(r2(0, 1), 2); + EXPECT_EQ(r2(0, 2), -1); + EXPECT_EQ(r2(1, 0), 3); + EXPECT_EQ(r2(1, 1), 4); + EXPECT_EQ(r2(1, 2), -2); + EXPECT_EQ(r2(2, 0), 5); + EXPECT_EQ(r2(2, 1), 6); + EXPECT_EQ(r2(2, 2), -3); +} + + +TEST_F(RowMajorAccessor, CanAccessData) +{ + EXPECT_EQ(r(0, 0), 1); + EXPECT_EQ(r(0, 1), 2); + EXPECT_EQ(r(1, 0), 3); + EXPECT_EQ(r(1, 1), 4); + EXPECT_EQ(r(2, 0), 5); + EXPECT_EQ(r(2, 1), 6); +} + + +TEST_F(RowMajorAccessor, CanWriteData) +{ + r(0, 0) = 4; + + EXPECT_EQ(r(0, 0), 4); +} + + +TEST_F(RowMajorAccessor, CanCreateSubrange) +{ + auto subr = r(span{1u, 3u}, span{0u, 2u}); + + EXPECT_EQ(subr(0, 0), 3); + EXPECT_EQ(subr(0, 1), 4); + EXPECT_EQ(subr(1, 0), 5); + EXPECT_EQ(subr(1, 1), 6); +} + + +TEST_F(RowMajorAccessor, CanCreateRowVector) +{ + auto subr = r(2u, span{0u, 2u}); + + EXPECT_EQ(subr(0, 0), 5); + EXPECT_EQ(subr(0, 1), 6); +} + + +TEST_F(RowMajorAccessor, CanCreateColumnVector) +{ + auto subr = r(span{0u, 3u}, 0u); + + EXPECT_EQ(subr(0, 0), 1); + EXPECT_EQ(subr(1, 0), 3); + EXPECT_EQ(subr(2, 0), 5); +} + + +TEST_F(RowMajorAccessor, CanAssignValues) +{ + r(1, 1) = r(0, 0); + + EXPECT_EQ(data[4], 1); +} + + +class RowMajorAccessor3d : public ::testing::Test { +protected: + using span = gko::acc::index_span; + static constexpr gko::acc::size_type dimensionality{3}; + + using row_major_int_range = + gko::acc::range>; + + // clang-format off + int data[2 * 3 * 4]{ + 1, 2, -1, 11, + 3, 4, -2, 12, + 5, 6, -3, 13, + + 21, 22, 23, 24, + 25, 26, 27, 28, + 29, 30, 31, 32 + }; + // clang-format on + const std::array dim1{{2, 3, 4}}; + const std::array dim2{{2, 2, 3}}; + row_major_int_range default_r{dim1, data}; + row_major_int_range custom_r{ + dim2, data, + std::array{{12, 4}}}; +}; + + +TEST_F(RowMajorAccessor3d, CanAccessData) +{ + EXPECT_EQ(default_r(0, 0, 0), 1); + EXPECT_EQ(custom_r(0, 0, 0), 1); + EXPECT_EQ(default_r(0, 1, 0), 3); + EXPECT_EQ(custom_r(0, 1, 0), 3); + EXPECT_EQ(default_r(0, 1, 3), 12); + EXPECT_EQ(default_r(0, 2, 2), -3); + EXPECT_EQ(default_r(1, 2, 1), 30); + EXPECT_EQ(default_r(1, 2, 2), 31); + EXPECT_EQ(default_r(1, 2, 3), 32); +} + + +TEST_F(RowMajorAccessor3d, CanWriteData) +{ + default_r(0, 0, 0) = 4; + custom_r(1, 1, 1) = 100; + + EXPECT_EQ(default_r(0, 0, 0), 4); + EXPECT_EQ(custom_r(0, 0, 0), 4); + EXPECT_EQ(default_r(1, 1, 1), 100); + EXPECT_EQ(custom_r(1, 1, 1), 100); +} + + +TEST_F(RowMajorAccessor3d, CanCreateSubrange) +{ + auto subr = custom_r(span{0u, 2u}, span{1u, 2u}, span{1u, 3u}); + + EXPECT_EQ(subr(0, 0, 0), 4); + EXPECT_EQ(subr(0, 0, 1), -2); + EXPECT_EQ(subr(1, 0, 0), 26); + EXPECT_EQ(subr(1, 0, 1), 27); +} + + +TEST_F(RowMajorAccessor3d, CanCreateRowVector) +{ + auto subr = default_r(1u, 2u, span{0u, 2u}); + + EXPECT_EQ(subr(0, 0, 0), 29); + EXPECT_EQ(subr(0, 0, 1), 30); +} + + +TEST_F(RowMajorAccessor3d, CanCreateColumnVector) +{ + auto subr = default_r(span{0u, 2u}, 1u, 3u); + + EXPECT_EQ(subr(0, 0, 0), 12); + EXPECT_EQ(subr(1, 0, 0), 28); +} + + +TEST_F(RowMajorAccessor3d, CanAssignValues) +{ + default_r(1, 1, 1) = default_r(0, 0, 0); + + EXPECT_EQ(data[17], 1); +} + + +} // namespace diff --git a/core/test/accessor/scaled_reduced_row_major.cpp b/core/test/accessor/scaled_reduced_row_major.cpp new file mode 100644 index 00000000000..2cf14d5732b --- /dev/null +++ b/core/test/accessor/scaled_reduced_row_major.cpp @@ -0,0 +1,490 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include +#include +#include + + +#include + + +#include "accessor/index_span.hpp" +#include "accessor/range.hpp" +#include "accessor/scaled_reduced_row_major.hpp" + + +namespace { + + +template +class ScaledReducedStorage3d : public ::testing::Test { +protected: + using ar_type = + typename std::tuple_element<0, decltype(ArithmeticStorageType{})>::type; + using st_type = + typename std::tuple_element<1, decltype(ArithmeticStorageType{})>::type; + // Type for `check_accessor_correctness` to forward the indices + using t = std::tuple; + using i_span = gko::acc::index_span; + + static constexpr ar_type delta{std::numeric_limits::epsilon() * + 1e1}; + + using accessor = + gko::acc::scaled_reduced_row_major<3, ar_type, st_type, 0b0101>; + using const_accessor = + gko::acc::scaled_reduced_row_major<3, ar_type, const st_type, 0b0101>; + + using reduced_storage = gko::acc::range; + using const_reduced_storage = gko::acc::range; + + const std::array size{{1u, 4u, 2u}}; + static constexpr gko::acc::size_type data_elements{8}; + static constexpr gko::acc::size_type scalar_elements{8}; + // clang-format off + st_type data[8]{ + 10, 11, + -12, 13, + 14, -115, + 6, 77 + }; + ar_type scalar[scalar_elements]{ + 1., 2., 3., 4., 5., 6., 7., 8. + }; + // clang-format on + const std::array storage_stride{{8, 2}}; + const std::array scalar_stride{{2}}; + reduced_storage r{size, data, storage_stride, scalar, scalar_stride}; + const_reduced_storage cr{size, data, scalar}; + + template + static void check_accessor_correctness( + const Accessor &a, + std::tuple ignore = std::tuple(99, 99, + 99)) + { + // Test for equality is fine here since they should not be modified + // clang-format off + if (ignore != t(0, 0, 0)) { EXPECT_EQ(a(0, 0, 0), ar_type{10.}); } + if (ignore != t(0, 0, 1)) { EXPECT_EQ(a(0, 0, 1), ar_type{22.}); } + if (ignore != t(0, 1, 0)) { EXPECT_EQ(a(0, 1, 0), ar_type{-12.}); } + if (ignore != t(0, 1, 1)) { EXPECT_EQ(a(0, 1, 1), ar_type{26.}); } + if (ignore != t(0, 2, 0)) { EXPECT_EQ(a(0, 2, 0), ar_type{14.}); } + if (ignore != t(0, 2, 1)) { EXPECT_EQ(a(0, 2, 1), ar_type{-230.}); } + if (ignore != t(0, 3, 0)) { EXPECT_EQ(a(0, 3, 0), ar_type{6.}); } + if (ignore != t(0, 3, 1)) { EXPECT_EQ(a(0, 3, 1), ar_type{154.}); } + // clang-format on + } +}; + +using ScaledReducedStorage3dTypes = ::testing::Types< + std::tuple, std::tuple, + std::tuple, std::tuple, + std::tuple>; + +TYPED_TEST_SUITE(ScaledReducedStorage3d, ScaledReducedStorage3dTypes); + + +TYPED_TEST(ScaledReducedStorage3d, CorrectLengths) +{ + EXPECT_EQ(this->r.length(0), this->size[0]); + EXPECT_EQ(this->r.length(1), this->size[1]); + EXPECT_EQ(this->r.length(2), this->size[2]); + EXPECT_EQ(this->r.length(3), 1); + EXPECT_EQ(this->r->get_size(), this->size); +} + + +TYPED_TEST(ScaledReducedStorage3d, CorrectStride) +{ + EXPECT_EQ(this->r->get_scalar_stride(), this->scalar_stride); + EXPECT_EQ(this->r->get_storage_stride(), this->storage_stride); +} + + +TYPED_TEST(ScaledReducedStorage3d, CorrectStorage) +{ + EXPECT_EQ(this->r->get_stored_data(), this->data); + EXPECT_EQ(this->r->get_const_storage(), this->data); +} + + +TYPED_TEST(ScaledReducedStorage3d, CorrectScale) +{ + EXPECT_EQ(this->r->get_scalar(), this->scalar); + EXPECT_EQ(this->r->get_const_scalar(), this->scalar); +} + + +TYPED_TEST(ScaledReducedStorage3d, CanReadData) +{ + this->check_accessor_correctness(this->r); + this->check_accessor_correctness(this->cr); +} + + +TYPED_TEST(ScaledReducedStorage3d, CanImplicitlyConvertToConst) +{ + using const_reduced_storage = typename TestFixture::const_reduced_storage; + + const_reduced_storage const_rs = this->r->to_const(); + const_reduced_storage const_rs2 = this->cr; + + this->check_accessor_correctness(const_rs); + this->check_accessor_correctness(const_rs2); +} + + +TYPED_TEST(ScaledReducedStorage3d, ToConstWorks) +{ + using const_reduced_storage = typename TestFixture::const_reduced_storage; + auto cr2 = this->r->to_const(); + + static_assert(std::is_same::value, + "Types must be equal!"); + this->check_accessor_correctness(cr2); +} + + +TYPED_TEST(ScaledReducedStorage3d, CanCreateWithStride) +{ + using reduced_storage = typename TestFixture::reduced_storage; + using ar_type = typename TestFixture::ar_type; + std::array size{{2, 1, 2}}; + std::array stride_storage{{5, 2}}; + std::array stride_scalar{{4}}; + + reduced_storage range{size, this->data, stride_storage, this->scalar, + stride_scalar}; + range(1, 0, 0) = ar_type{15}; + + EXPECT_EQ(range(0, 0, 0), ar_type{10}); + EXPECT_EQ(range(0, 0, 1), ar_type{22}); + EXPECT_EQ(range(1, 0, 0), ar_type{15}); + EXPECT_EQ(range(1, 0, 1), ar_type{36}); +} + + +TYPED_TEST(ScaledReducedStorage3d, Subrange) +{ + using i_span = typename TestFixture::i_span; + auto subr = this->cr(0u, i_span{0u, 2u}, 1u); + + EXPECT_EQ(subr(0, 0, 0), 22.); + EXPECT_EQ(subr(0, 1, 0), 26.); +} + + +TYPED_TEST(ScaledReducedStorage3d, CanWriteScale) +{ + using ar_type = typename TestFixture::ar_type; + + this->r->write_scalar_masked(10., 0, 0, 0); + + EXPECT_EQ(this->r(0, 0, 0), ar_type{100.}); + EXPECT_EQ(this->r(0, 0, 1), ar_type{22.}); + EXPECT_EQ(this->r(0, 1, 0), ar_type{-120.}); + EXPECT_EQ(this->r(0, 1, 1), ar_type{26.}); + EXPECT_EQ(this->r(0, 2, 0), ar_type{140.}); + EXPECT_EQ(this->r(0, 2, 1), ar_type{-230.}); + EXPECT_EQ(this->r(0, 3, 0), ar_type{60.}); + EXPECT_EQ(this->r(0, 3, 1), ar_type{154.}); +} + + +TYPED_TEST(ScaledReducedStorage3d, CanWriteMaskedScale) +{ + using ar_type = typename TestFixture::ar_type; + + this->r->write_scalar_direct(10., 0, 0); + + EXPECT_EQ(this->r(0, 0, 0), ar_type{100.}); + EXPECT_EQ(this->r(0, 0, 1), ar_type{22.}); + EXPECT_EQ(this->r(0, 1, 0), ar_type{-120.}); + EXPECT_EQ(this->r(0, 1, 1), ar_type{26.}); + EXPECT_EQ(this->r(0, 2, 0), ar_type{140.}); + EXPECT_EQ(this->r(0, 2, 1), ar_type{-230.}); + EXPECT_EQ(this->r(0, 3, 0), ar_type{60.}); + EXPECT_EQ(this->r(0, 3, 1), ar_type{154.}); +} + + +TYPED_TEST(ScaledReducedStorage3d, CanReadScale) +{ + EXPECT_EQ(this->r->read_scalar_masked(0, 0, 0), 1.); + EXPECT_EQ(this->r->read_scalar_masked(0, 0, 1), 2.); + EXPECT_EQ(this->r->read_scalar_direct(0, 0), 1.); + EXPECT_EQ(this->r->read_scalar_direct(0, 1), 2.); +} + + +TYPED_TEST(ScaledReducedStorage3d, Addition) +{ + using ar_type = typename TestFixture::ar_type; + using t = typename TestFixture::t; + const ar_type expected = 10. + 3.; + + const auto result = this->cr(0, 0, 0) + 3.; + this->r(0, 0, 0) += 3.; + + this->check_accessor_correctness(this->r, t(0, 0, 0)); + EXPECT_NEAR(this->r(0, 0, 0), expected, TestFixture::delta); + EXPECT_NEAR(result, expected, TestFixture::delta); +} + + +TYPED_TEST(ScaledReducedStorage3d, Addition2) +{ + using ar_type = typename TestFixture::ar_type; + using t = typename TestFixture::t; + const ar_type expected = 10. + 22.; + + const auto result = this->cr(0, 0, 0) + this->r(0, 0, 1); + this->r(0, 0, 0) += this->cr(0, 0, 1); + + this->check_accessor_correctness(this->r, t(0, 0, 0)); + EXPECT_NEAR(this->r(0, 0, 0), expected, TestFixture::delta); + EXPECT_NEAR(result, expected, TestFixture::delta); +} + + +TYPED_TEST(ScaledReducedStorage3d, Subtraction) +{ + using ar_type = typename TestFixture::ar_type; + using t = typename TestFixture::t; + const ar_type expected = 22. - 2.; + + const auto result = this->cr(0, 0, 1) - 2.; + this->r(0, 0, 1) -= 2.; + + this->check_accessor_correctness(this->r, t(0, 0, 1)); + EXPECT_NEAR(this->r(0, 0, 1), expected, TestFixture::delta); + EXPECT_NEAR(result, expected, TestFixture::delta); +} + + +TYPED_TEST(ScaledReducedStorage3d, Subtraction2) +{ + using ar_type = typename TestFixture::ar_type; + using t = typename TestFixture::t; + const ar_type expected = -12. - 26.; + + const auto result = this->cr(0, 1, 0) - this->r(0, 1, 1); + this->r(0, 1, 0) -= this->r(0, 1, 1); + + this->check_accessor_correctness(this->r, t(0, 1, 0)); + EXPECT_NEAR(this->r(0, 1, 0), expected, TestFixture::delta); + EXPECT_NEAR(result, expected, TestFixture::delta); +} + + +TYPED_TEST(ScaledReducedStorage3d, Multiplication) +{ + using ar_type = typename TestFixture::ar_type; + using t = typename TestFixture::t; + const ar_type expected = 26. * 3.; + + const auto result = this->cr(0, 1, 1) * 3.; + this->r(0, 1, 1) *= 3.; + + this->check_accessor_correctness(this->r, t(0, 1, 1)); + EXPECT_NEAR(this->r(0, 1, 1), expected, TestFixture::delta); + EXPECT_NEAR(result, expected, TestFixture::delta); +} + + +TYPED_TEST(ScaledReducedStorage3d, Multiplication2) +{ + using ar_type = typename TestFixture::ar_type; + using t = typename TestFixture::t; + const ar_type expected = 14. * 10.; + + const auto result = this->r(0, 2, 0) * this->r(0, 0, 0); + this->r(0, 2, 0) *= this->r(0, 0, 0); + + this->check_accessor_correctness(this->r, t(0, 2, 0)); + EXPECT_NEAR(this->r(0, 2, 0), expected, TestFixture::delta); + EXPECT_NEAR(result, expected, TestFixture::delta); +} + + +TYPED_TEST(ScaledReducedStorage3d, Division) +{ + using ar_type = typename TestFixture::ar_type; + using t = typename TestFixture::t; + const ar_type expected = 10. / 2.; + + const auto result = this->cr(0, 0, 0) / 2.; + this->r(0, 0, 0) /= 2.; + + this->check_accessor_correctness(this->r, t(0, 0, 0)); + EXPECT_NEAR(this->r(0, 0, 0), expected, TestFixture::delta); + EXPECT_NEAR(result, expected, TestFixture::delta); +} + + +TYPED_TEST(ScaledReducedStorage3d, Division2) +{ + using ar_type = typename TestFixture::ar_type; + using t = typename TestFixture::t; + const ar_type expected = -12. / 6.; + + const auto result = this->r(0, 1, 0) / this->r(0, 3, 0); + this->r(0, 1, 0) /= this->r(0, 3, 0); + + this->check_accessor_correctness(this->r, t(0, 1, 0)); + EXPECT_NEAR(this->r(0, 1, 0), expected, TestFixture::delta); + EXPECT_NEAR(result, expected, TestFixture::delta); +} + + +TYPED_TEST(ScaledReducedStorage3d, UnaryMinus) +{ + using ar_type = typename TestFixture::ar_type; + using t = typename TestFixture::t; + const ar_type neg_expected = this->r(0, 1, 1); + const ar_type expected = -neg_expected; + + auto result = -this->r(0, 1, 1); + + this->check_accessor_correctness(this->r); + EXPECT_EQ(result, expected); +} + + +class ScaledReducedStorageXd : public ::testing::Test { +protected: + using ar_type = double; + using st_type = int; + using size_type = gko::acc::size_type; + static constexpr ar_type delta{0.1}; + + using accessor1d = + gko::acc::scaled_reduced_row_major<1, ar_type, st_type, 1>; + using accessor2d = + gko::acc::scaled_reduced_row_major<2, ar_type, st_type, 3>; + using const_accessor1d = + gko::acc::scaled_reduced_row_major<1, ar_type, const st_type, 1>; + using const_accessor2d = + gko::acc::scaled_reduced_row_major<2, ar_type, const st_type, 3>; + static_assert(std::is_same::value, + "Const accessors must be the same!"); + static_assert(std::is_same::value, + "Const accessors must be the same!"); + + using reduced_storage1d = gko::acc::range; + using reduced_storage2d = gko::acc::range; + using const_reduced_storage2d = gko::acc::range; + using const_reduced_storage1d = gko::acc::range; + + const std::array stride0{{}}; + const std::array stride1{{4}}; + const std::array stride_sc{{5}}; + const std::array size_1d{{8u}}; + const std::array size_2d{{2u, 2u}}; + + static constexpr gko::acc::size_type data_elements{8}; + st_type data[data_elements]{10, 22, 32, 44, 54, 66, 76, -88}; + ar_type scalar[data_elements]{1e0, 5e-1, 1e-1, 5e-2, + 1e-2, 5e-3, 1e-3, 5e-4}; + + reduced_storage1d r1{size_1d, data, scalar}; + reduced_storage2d r2{size_2d, data, stride1, scalar, stride_sc}; + const_reduced_storage1d cr1{size_1d, data, stride0, scalar}; + const_reduced_storage2d cr2{size_2d, data, stride1, scalar, stride_sc}; + + void data_equal_except_for(int idx) + { + // clang-format off + if (idx != 0) { EXPECT_EQ(data[0], 10); } + if (idx != 1) { EXPECT_EQ(data[1], 22); } + if (idx != 2) { EXPECT_EQ(data[2], 32); } + if (idx != 3) { EXPECT_EQ(data[3], 44); } + if (idx != 4) { EXPECT_EQ(data[4], 54); } + if (idx != 5) { EXPECT_EQ(data[5], 66); } + if (idx != 6) { EXPECT_EQ(data[6], 76); } + if (idx != 7) { EXPECT_EQ(data[7], -88); } + // clang-format on + } + void scalar_equal_except_for(int idx) + { + // clang-format off + if (idx != 0) { EXPECT_EQ(scalar[0], ar_type{1e0}); } + if (idx != 1) { EXPECT_EQ(scalar[1], ar_type{5e-1}); } + if (idx != 2) { EXPECT_EQ(scalar[2], ar_type{1e-1}); } + if (idx != 3) { EXPECT_EQ(scalar[3], ar_type{5e-2}); } + if (idx != 4) { EXPECT_EQ(scalar[4], ar_type{1e-2}); } + if (idx != 5) { EXPECT_EQ(scalar[5], ar_type{5e-3}); } + if (idx != 6) { EXPECT_EQ(scalar[6], ar_type{1e-3}); } + if (idx != 7) { EXPECT_EQ(scalar[7], ar_type{5e-4}); } + // clang-format on + } +}; + + +TEST_F(ScaledReducedStorageXd, CanRead) +{ + EXPECT_NEAR(cr1(1), 11., delta); + EXPECT_NEAR(cr2(0, 1), 11., delta); + EXPECT_NEAR(cr2(1, 1), 66e-3, delta); + EXPECT_NEAR(r1(1), 11., delta); + EXPECT_NEAR(r2(0, 1), 11., delta); + EXPECT_NEAR(r2(1, 1), 66e-3, delta); +} + + +TEST_F(ScaledReducedStorageXd, CanWrite1) +{ + r1(2) = 0.2; + + data_equal_except_for(2); + scalar_equal_except_for(99); + EXPECT_NEAR(r1(2), 0.2, delta); +} + + +TEST_F(ScaledReducedStorageXd, CanWrite2) +{ + r2(1, 1) = 0.5; + + data_equal_except_for(5); + scalar_equal_except_for(99); + EXPECT_NEAR(r2(1, 1), 0.5, delta); +} + + +} // namespace diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt index 1183b339e07..6f8c7291165 100644 --- a/core/test/base/CMakeLists.txt +++ b/core/test/base/CMakeLists.txt @@ -11,6 +11,7 @@ ginkgo_create_test(executor) ginkgo_create_test(iterator_factory) ginkgo_create_test(lin_op) ginkgo_create_test(math) +ginkgo_create_test(matrix_assembly_data) ginkgo_create_test(matrix_data) ginkgo_create_test(mtx_io) ginkgo_create_test(perturbation) diff --git a/core/test/base/abstract_factory.cpp b/core/test/base/abstract_factory.cpp index cf57531e7fe..15ee287d742 100644 --- a/core/test/base/abstract_factory.cpp +++ b/core/test/base/abstract_factory.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/base/allocator.cpp b/core/test/base/allocator.cpp index 32e4c9db85d..3d09052487f 100644 --- a/core/test/base/allocator.cpp +++ b/core/test/base/allocator.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/base/array.cpp b/core/test/base/array.cpp index b1e04dd2f39..708bfcf5dad 100644 --- a/core/test/base/array.cpp +++ b/core/test/base/array.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -57,12 +57,13 @@ class Array : public ::testing::Test { x.get_data()[1] = 2; } - static void assert_equal_to_original_x(gko::Array &a) + static void assert_equal_to_original_x(gko::Array &a, + bool check_zero = true) { ASSERT_EQ(a.get_num_elems(), 2); - EXPECT_EQ(a.get_data()[0], T{5}); + if (check_zero) EXPECT_EQ(a.get_data()[0], T{5}); EXPECT_EQ(a.get_data()[1], T{2}); - EXPECT_EQ(a.get_const_data()[0], T{5}); + if (check_zero) EXPECT_EQ(a.get_const_data()[0], T{5}); EXPECT_EQ(a.get_const_data()[1], T{2}); } @@ -70,7 +71,7 @@ class Array : public ::testing::Test { gko::Array x; }; -TYPED_TEST_CASE(Array, gko::test::ValueAndIndexTypes); +TYPED_TEST_SUITE(Array, gko::test::ValueAndIndexTypes); TYPED_TEST(Array, CanBeCreatedWithoutAnExecutor) @@ -263,6 +264,70 @@ TYPED_TEST(Array, CanBeMovedFromExecutorlessArray) } +TYPED_TEST(Array, CanCreateTemporaryCloneOnSameExecutor) +{ + auto tmp_clone = make_temporary_clone(this->exec, &this->x); + + ASSERT_EQ(tmp_clone.get(), &this->x); +} + + +TYPED_TEST(Array, CanCreateTemporaryOutputCloneOnSameExecutor) +{ + auto tmp_clone = make_temporary_output_clone(this->exec, &this->x); + + ASSERT_EQ(tmp_clone.get(), &this->x); +} + + +// For tests between different memory, check cuda/test/base/array.cu +TYPED_TEST(Array, DoesNotCreateATemporaryCloneBetweenSameMemory) +{ + auto other = gko::ReferenceExecutor::create(); + + auto tmp_clone = make_temporary_clone(other, &this->x); + + this->assert_equal_to_original_x(*tmp_clone.get()); + ASSERT_EQ(tmp_clone.get(), &this->x); +} + + +TYPED_TEST(Array, DoesNotCopyBackTemporaryCloneBetweenSameMemory) +{ + auto other = gko::ReferenceExecutor::create(); + + { + auto tmp_clone = make_temporary_clone(other, &this->x); + // change x, and check that there is no copy-back to overwrite it again + this->x.get_data()[0] = 0; + } + + this->assert_equal_to_original_x(this->x, false); + EXPECT_EQ(this->x.get_data()[0], TypeParam{0}); +} + + +TYPED_TEST(Array, CanCreateTemporaryOutputCloneOnDifferentExecutors) +{ + auto other = gko::OmpExecutor::create(); + + { + auto tmp_clone = make_temporary_output_clone(other, &this->x); + tmp_clone->get_data()[0] = 4; + tmp_clone->get_data()[1] = 5; + + // there is no reliable way to check the memory is uninitialized + ASSERT_EQ(tmp_clone->get_num_elems(), this->x.get_num_elems()); + ASSERT_EQ(tmp_clone->get_executor(), other); + ASSERT_EQ(this->x.get_executor(), this->exec); + ASSERT_EQ(this->x.get_data()[0], TypeParam{5}); + ASSERT_EQ(this->x.get_data()[1], TypeParam{2}); + } + ASSERT_EQ(this->x.get_data()[0], TypeParam{4}); + ASSERT_EQ(this->x.get_data()[1], TypeParam{5}); +} + + TYPED_TEST(Array, CanBeCleared) { this->x.clear(); diff --git a/core/test/base/combination.cpp b/core/test/base/combination.cpp index 8c2eaf35b6f..de24b3eba99 100644 --- a/core/test/base/combination.cpp +++ b/core/test/base/combination.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -74,7 +74,7 @@ class Combination : public ::testing::Test { std::vector> coefficients; }; -TYPED_TEST_CASE(Combination, gko::test::ValueTypes); +TYPED_TEST_SUITE(Combination, gko::test::ValueTypes); TYPED_TEST(Combination, CanBeEmpty) diff --git a/core/test/base/composition.cpp b/core/test/base/composition.cpp index aa9df458456..628be53b08c 100644 --- a/core/test/base/composition.cpp +++ b/core/test/base/composition.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -72,7 +72,7 @@ class Composition : public ::testing::Test { std::vector> operators; }; -TYPED_TEST_CASE(Composition, gko::test::ValueTypes); +TYPED_TEST_SUITE(Composition, gko::test::ValueTypes); TYPED_TEST(Composition, CanBeEmpty) diff --git a/core/test/base/dim.cpp b/core/test/base/dim.cpp index b94ef6672c1..f78191967d9 100644 --- a/core/test/base/dim.cpp +++ b/core/test/base/dim.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -117,6 +117,26 @@ TEST(Dim, ConvertsToBool) } +TEST(Dim, CanAppendToStream1) +{ + gko::dim<2> d2{2, 3}; + + std::ostringstream os; + os << d2; + ASSERT_EQ(os.str(), "(2, 3)"); +} + + +TEST(Dim, CanAppendToStream2) +{ + gko::dim<3> d2{2, 3, 4}; + + std::ostringstream os; + os << d2; + ASSERT_EQ(os.str(), "(2, 3, 4)"); +} + + TEST(Dim, EqualityReturnsTrueWhenEqual) { ASSERT_TRUE(gko::dim<2>(2, 3) == gko::dim<2>(2, 3)); diff --git a/core/test/base/exception.cpp b/core/test/base/exception.cpp index b04d7553103..bbbe623dc30 100644 --- a/core/test/base/exception.cpp +++ b/core/test/base/exception.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -90,6 +90,14 @@ TEST(ExceptionClasses, CublasErrorReturnsCorrectWhatMessage) } +TEST(ExceptionClasses, CurandErrorReturnsCorrectWhatMessage) +{ + gko::CurandError error("test_file.cpp", 123, "test_func", 1); + std::string expected = "test_file.cpp:123: test_func: "; + ASSERT_EQ(expected, std::string(error.what()).substr(0, expected.size())); +} + + TEST(ExceptionClasses, CusparseErrorReturnsCorrectWhatMessage) { gko::CusparseError error("test_file.cpp", 123, "test_func", 1); @@ -114,6 +122,14 @@ TEST(ExceptionClasses, HipblasErrorReturnsCorrectWhatMessage) } +TEST(ExceptionClasses, HiprandErrorReturnsCorrectWhatMessage) +{ + gko::HipblasError error("test_file.cpp", 123, "test_func", 1); + std::string expected = "test_file.cpp:123: test_func: "; + ASSERT_EQ(expected, std::string(error.what()).substr(0, expected.size())); +} + + TEST(ExceptionClasses, HipsparseErrorReturnsCorrectWhatMessage) { gko::HipsparseError error("test_file.cpp", 123, "test_func", 1); @@ -132,6 +148,14 @@ TEST(ExceptionClasses, DimensionMismatchReturnsCorrectWhatMessage) } +TEST(ExceptionClasses, BlockSizeErrorCorrectWhatMessage) +{ + gko::BlockSizeError error("test_file.cpp", 243, 3, 20); + ASSERT_EQ(std::string("test_file.cpp:243: block size = 3, size = 20"), + error.what()); +} + + TEST(ExceptionClasses, AllocationErrorReturnsCorrectWhatMessage) { gko::AllocationError error("test_file.cpp", 42, "OMP", 135); diff --git a/core/test/base/exception_helpers.cpp b/core/test/base/exception_helpers.cpp index cad8a3d5684..c9f961f807b 100644 --- a/core/test/base/exception_helpers.cpp +++ b/core/test/base/exception_helpers.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -116,6 +116,14 @@ TEST(CudaError, ReturnsCublasError) } +void throws_curand_error() { throw GKO_CURAND_ERROR(0); } + +TEST(CudaError, ReturnsCurandError) +{ + ASSERT_THROW(throws_curand_error(), gko::CurandError); +} + + void throws_cusparse_error() { throw GKO_CUSPARSE_ERROR(0); } TEST(CudaError, ReturnsCusparseError) @@ -140,6 +148,14 @@ TEST(HipError, ReturnsHipblasError) } +void throws_hiprand_error() { throw GKO_HIPRAND_ERROR(0); } + +TEST(HipError, ReturnsHiprandError) +{ + ASSERT_THROW(throws_hiprand_error(), gko::HiprandError); +} + + void throws_hipsparse_error() { throw GKO_HIPSPARSE_ERROR(0); } TEST(HipError, ReturnsHipsparseError) diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp index 1b2e1b0698e..5f1ec600cd2 100644 --- a/core/test/base/executor.cpp +++ b/core/test/base/executor.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,9 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include +#if defined(__unix__) || defined(__APPLE__) +#include +#endif + + #include @@ -63,10 +69,14 @@ class ExampleOperation : public gko::Operation { { value = 3; } - void run(std::shared_ptr) const override + void run(std::shared_ptr) const override { value = 4; } + void run(std::shared_ptr) const override + { + value = 5; + } int &value; }; @@ -78,6 +88,7 @@ TEST(OmpExecutor, RunsCorrectOperation) exec_ptr omp = gko::OmpExecutor::create(); omp->run(ExampleOperation(value)); + ASSERT_EQ(1, value); } @@ -88,9 +99,11 @@ TEST(OmpExecutor, RunsCorrectLambdaOperation) auto omp_lambda = [&value]() { value = 1; }; auto cuda_lambda = [&value]() { value = 2; }; auto hip_lambda = [&value]() { value = 3; }; + auto dpcpp_lambda = [&value]() { value = 4; }; exec_ptr omp = gko::OmpExecutor::create(); - omp->run(omp_lambda, cuda_lambda, hip_lambda); + omp->run(omp_lambda, cuda_lambda, hip_lambda, dpcpp_lambda); + ASSERT_EQ(1, value); } @@ -149,13 +162,61 @@ TEST(OmpExecutor, IsItsOwnMaster) } +#if GKO_HAVE_HWLOC + + +TEST(OmpExecutor, CanGetNumCpusFromExecInfo) +{ + auto omp = gko::OmpExecutor::create(); + + auto num_cpus = omp->get_num_cores() * omp->get_num_threads_per_core(); + + ASSERT_EQ(std::thread::hardware_concurrency(), num_cpus); +} + + +inline int get_os_id(int log_id) +{ + return gko::MachineTopology::get_instance()->get_core(log_id)->os_id; +} + + +TEST(MachineTopology, CanBindToASpecificCore) +{ + auto cpu_sys = sched_getcpu(); + + const int bind_core = 3; + gko::MachineTopology::get_instance()->bind_to_cores( + std::vector{bind_core}); + + cpu_sys = sched_getcpu(); + ASSERT_EQ(cpu_sys, get_os_id(bind_core)); +} + + +TEST(MachineTopology, CanBindToARangeofCores) +{ + auto cpu_sys = sched_getcpu(); + + const std::vector bind_core = {1, 3}; + gko::MachineTopology::get_instance()->bind_to_cores(bind_core); + + cpu_sys = sched_getcpu(); + ASSERT_TRUE(cpu_sys == get_os_id(3) || cpu_sys == get_os_id(1)); +} + + +#endif + + TEST(ReferenceExecutor, RunsCorrectOperation) { int value = 0; exec_ptr ref = gko::ReferenceExecutor::create(); ref->run(ExampleOperation(value)); - ASSERT_EQ(4, value); + + ASSERT_EQ(5, value); } @@ -165,9 +226,11 @@ TEST(ReferenceExecutor, RunsCorrectLambdaOperation) auto omp_lambda = [&value]() { value = 1; }; auto cuda_lambda = [&value]() { value = 2; }; auto hip_lambda = [&value]() { value = 3; }; + auto dpcpp_lambda = [&value]() { value = 4; }; exec_ptr ref = gko::ReferenceExecutor::create(); - ref->run(omp_lambda, cuda_lambda, hip_lambda); + ref->run(omp_lambda, cuda_lambda, hip_lambda, dpcpp_lambda); + ASSERT_EQ(1, value); } @@ -279,6 +342,7 @@ TEST(CudaExecutor, RunsCorrectOperation) gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); cuda->run(ExampleOperation(value)); + ASSERT_EQ(2, value); } @@ -289,10 +353,12 @@ TEST(CudaExecutor, RunsCorrectLambdaOperation) auto omp_lambda = [&value]() { value = 1; }; auto cuda_lambda = [&value]() { value = 2; }; auto hip_lambda = [&value]() { value = 3; }; + auto dpcpp_lambda = [&value]() { value = 4; }; exec_ptr cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - cuda->run(omp_lambda, cuda_lambda, hip_lambda); + cuda->run(omp_lambda, cuda_lambda, hip_lambda, dpcpp_lambda); + ASSERT_EQ(2, value); } @@ -350,6 +416,7 @@ TEST(HipExecutor, RunsCorrectOperation) exec_ptr hip = gko::HipExecutor::create(0, gko::OmpExecutor::create()); hip->run(ExampleOperation(value)); + ASSERT_EQ(3, value); } @@ -360,9 +427,11 @@ TEST(HipExecutor, RunsCorrectLambdaOperation) auto omp_lambda = [&value]() { value = 1; }; auto cuda_lambda = [&value]() { value = 2; }; auto hip_lambda = [&value]() { value = 3; }; + auto dpcpp_lambda = [&value]() { value = 4; }; exec_ptr hip = gko::HipExecutor::create(0, gko::OmpExecutor::create()); - hip->run(omp_lambda, cuda_lambda, hip_lambda); + hip->run(omp_lambda, cuda_lambda, hip_lambda, dpcpp_lambda); + ASSERT_EQ(3, value); } @@ -414,6 +483,135 @@ TEST(HipExecutor, CanSetDeviceResetBoolean) } +TEST(DpcppExecutor, RunsCorrectOperation) +{ + int value = 0; + exec_ptr dpcpp = gko::DpcppExecutor::create(0, gko::OmpExecutor::create()); + + dpcpp->run(ExampleOperation(value)); + + ASSERT_EQ(4, value); +} + + +TEST(DpcppExecutor, RunsCorrectLambdaOperation) +{ + int value = 0; + auto omp_lambda = [&value]() { value = 1; }; + auto cuda_lambda = [&value]() { value = 2; }; + auto hip_lambda = [&value]() { value = 3; }; + auto dpcpp_lambda = [&value]() { value = 4; }; + exec_ptr dpcpp = gko::DpcppExecutor::create(0, gko::OmpExecutor::create()); + + dpcpp->run(omp_lambda, cuda_lambda, hip_lambda, dpcpp_lambda); + + ASSERT_EQ(4, value); +} + + +TEST(DpcppExecutor, KnowsItsMaster) +{ + auto omp = gko::OmpExecutor::create(); + exec_ptr dpcpp = gko::DpcppExecutor::create(0, omp); + + ASSERT_EQ(omp, dpcpp->get_master()); +} + + +TEST(DpcppExecutor, KnowsItsDeviceId) +{ + auto omp = gko::OmpExecutor::create(); + auto dpcpp = gko::DpcppExecutor::create(0, omp); + + ASSERT_EQ(0, dpcpp->get_device_id()); +} + + +TEST(Executor, CanVerifyMemory) +{ + auto ref = gko::ReferenceExecutor::create(); + auto omp = gko::OmpExecutor::create(); + auto hip = gko::HipExecutor::create(0, omp); + auto cuda = gko::CudaExecutor::create(0, omp); + auto omp2 = gko::OmpExecutor::create(); + auto hip2 = gko::HipExecutor::create(0, omp); + auto cuda2 = gko::CudaExecutor::create(0, omp); + auto hip_1 = gko::HipExecutor::create(1, omp); + auto cuda_1 = gko::CudaExecutor::create(1, omp); + std::shared_ptr host_dpcpp; + std::shared_ptr cpu_dpcpp; + std::shared_ptr gpu_dpcpp; + std::shared_ptr host_dpcpp_dup; + std::shared_ptr cpu_dpcpp_dup; + std::shared_ptr gpu_dpcpp_dup; + if (gko::DpcppExecutor::get_num_devices("host")) { + host_dpcpp = gko::DpcppExecutor::create(0, omp, "host"); + host_dpcpp_dup = gko::DpcppExecutor::create(0, omp, "host"); + } + if (gko::DpcppExecutor::get_num_devices("cpu")) { + cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu"); + cpu_dpcpp_dup = gko::DpcppExecutor::create(0, omp, "cpu"); + } + if (gko::DpcppExecutor::get_num_devices("gpu")) { + gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu"); + gpu_dpcpp_dup = gko::DpcppExecutor::create(0, omp, "gpu"); + } + + ASSERT_EQ(false, ref->memory_accessible(omp)); + ASSERT_EQ(false, omp->memory_accessible(ref)); + ASSERT_EQ(false, ref->memory_accessible(hip)); + ASSERT_EQ(false, hip->memory_accessible(ref)); + ASSERT_EQ(false, omp->memory_accessible(hip)); + ASSERT_EQ(false, hip->memory_accessible(omp)); + ASSERT_EQ(false, ref->memory_accessible(cuda)); + ASSERT_EQ(false, cuda->memory_accessible(ref)); + ASSERT_EQ(false, omp->memory_accessible(cuda)); + ASSERT_EQ(false, cuda->memory_accessible(omp)); + if (gko::DpcppExecutor::get_num_devices("host")) { + ASSERT_EQ(false, host_dpcpp->memory_accessible(ref)); + ASSERT_EQ(false, ref->memory_accessible(host_dpcpp)); + ASSERT_EQ(true, host_dpcpp->memory_accessible(omp)); + ASSERT_EQ(true, omp->memory_accessible(host_dpcpp)); + ASSERT_EQ(true, host_dpcpp->memory_accessible(host_dpcpp_dup)); + ASSERT_EQ(true, host_dpcpp_dup->memory_accessible(host_dpcpp)); + } + if (gko::DpcppExecutor::get_num_devices("cpu")) { + ASSERT_EQ(false, ref->memory_accessible(cpu_dpcpp)); + ASSERT_EQ(false, cpu_dpcpp->memory_accessible(ref)); + ASSERT_EQ(true, cpu_dpcpp->memory_accessible(omp)); + ASSERT_EQ(true, omp->memory_accessible(cpu_dpcpp)); + ASSERT_EQ(true, cpu_dpcpp->memory_accessible(cpu_dpcpp_dup)); + ASSERT_EQ(true, cpu_dpcpp_dup->memory_accessible(cpu_dpcpp)); + } + if (gko::DpcppExecutor::get_num_devices("gpu")) { + ASSERT_EQ(false, gpu_dpcpp->memory_accessible(ref)); + ASSERT_EQ(false, ref->memory_accessible(gpu_dpcpp)); + ASSERT_EQ(false, gpu_dpcpp->memory_accessible(omp)); + ASSERT_EQ(false, omp->memory_accessible(gpu_dpcpp)); + ASSERT_EQ(false, gpu_dpcpp->memory_accessible(gpu_dpcpp_dup)); + ASSERT_EQ(false, gpu_dpcpp_dup->memory_accessible(gpu_dpcpp)); + } +#if GINKGO_HIP_PLATFORM_NVCC + ASSERT_EQ(true, hip->memory_accessible(cuda)); + ASSERT_EQ(true, cuda->memory_accessible(hip)); + ASSERT_EQ(true, hip_1->memory_accessible(cuda_1)); + ASSERT_EQ(true, cuda_1->memory_accessible(hip_1)); +#else + ASSERT_EQ(false, hip->memory_accessible(cuda)); + ASSERT_EQ(false, cuda->memory_accessible(hip)); + ASSERT_EQ(false, hip_1->memory_accessible(cuda_1)); + ASSERT_EQ(false, cuda_1->memory_accessible(hip_1)); +#endif + ASSERT_EQ(true, omp->memory_accessible(omp2)); + ASSERT_EQ(true, hip->memory_accessible(hip2)); + ASSERT_EQ(true, cuda->memory_accessible(cuda2)); + ASSERT_EQ(false, hip->memory_accessible(hip_1)); + ASSERT_EQ(false, cuda->memory_accessible(hip_1)); + ASSERT_EQ(false, cuda->memory_accessible(cuda_1)); + ASSERT_EQ(false, hip->memory_accessible(cuda_1)); +} + + template struct mock_free : T { /** @@ -448,6 +646,7 @@ TEST(ExecutorDeleter, DeletesObject) TEST(ExecutorDeleter, AvoidsDeletionForNullExecutor) { int x[5]; + ASSERT_NO_THROW(gko::executor_deleter{nullptr}(x)); } diff --git a/core/test/base/extended_float.cpp b/core/test/base/extended_float.cpp index f2f7da597bf..f1c38f8b983 100644 --- a/core/test/base/extended_float.cpp +++ b/core/test/base/extended_float.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/base/iterator_factory.cpp b/core/test/base/iterator_factory.cpp index b2b873a627a..340398d64c9 100644 --- a/core/test/base/iterator_factory.cpp +++ b/core/test/base/iterator_factory.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -96,7 +96,7 @@ class IteratorFactory : public ::testing::Test { const std::vector ordered_value; }; -TYPED_TEST_CASE(IteratorFactory, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(IteratorFactory, gko::test::ValueIndexTypes); TYPED_TEST(IteratorFactory, EmptyIterator) diff --git a/core/test/base/lin_op.cpp b/core/test/base/lin_op.cpp index 622158d9b42..1c72b536778 100644 --- a/core/test/base/lin_op.cpp +++ b/core/test/base/lin_op.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,9 +33,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include +#include +#include + + #include +#include + + namespace { @@ -85,8 +93,8 @@ class EnableLinOp : public ::testing::Test { protected: EnableLinOp() : ref{gko::ReferenceExecutor::create()}, - omp{gko::OmpExecutor::create()}, - op{DummyLinOp::create(omp, gko::dim<2>{3, 5})}, + ref2{gko::ReferenceExecutor::create()}, + op{DummyLinOp::create(ref2, gko::dim<2>{3, 5})}, alpha{DummyLinOp::create(ref, gko::dim<2>{1})}, beta{DummyLinOp::create(ref, gko::dim<2>{1})}, b{DummyLinOp::create(ref, gko::dim<2>{5, 4})}, @@ -94,7 +102,7 @@ class EnableLinOp : public ::testing::Test { {} std::shared_ptr ref; - std::shared_ptr omp; + std::shared_ptr ref2; std::unique_ptr op; std::unique_ptr alpha; std::unique_ptr beta; @@ -107,7 +115,7 @@ TEST_F(EnableLinOp, CallsApplyImpl) { op->apply(gko::lend(b), gko::lend(x)); - ASSERT_EQ(op->last_access, omp); + ASSERT_EQ(op->last_access, ref2); } @@ -115,7 +123,7 @@ TEST_F(EnableLinOp, CallsExtendedApplyImpl) { op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x)); - ASSERT_EQ(op->last_access, omp); + ASSERT_EQ(op->last_access, ref2); } @@ -196,43 +204,44 @@ TEST_F(EnableLinOp, ExtendedApplyFailsOnWrongBetaDimension) } -TEST_F(EnableLinOp, ApplyCopiesDataToCorrectExecutor) +// For tests between different memory, check cuda/test/base/lin_op.cu +TEST_F(EnableLinOp, ApplyDoesNotCopyBetweenSameMemory) { op->apply(gko::lend(b), gko::lend(x)); - ASSERT_EQ(op->last_b_access, omp); - ASSERT_EQ(op->last_x_access, omp); + ASSERT_EQ(op->last_b_access, ref); + ASSERT_EQ(op->last_x_access, ref); } -TEST_F(EnableLinOp, ApplyCopiesBackOnlyX) +TEST_F(EnableLinOp, ApplyNoCopyBackBetweenSameMemory) { op->apply(gko::lend(b), gko::lend(x)); - ASSERT_EQ(b->last_access, nullptr); - ASSERT_EQ(x->last_access, omp); + ASSERT_EQ(b->last_access, ref); + ASSERT_EQ(x->last_access, ref); } -TEST_F(EnableLinOp, ExtendedApplyCopiesDataToCorrectExecutor) +TEST_F(EnableLinOp, ExtendedApplyDoesNotCopyBetweenSameMemory) { op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x)); - ASSERT_EQ(op->last_alpha_access, omp); - ASSERT_EQ(op->last_b_access, omp); - ASSERT_EQ(op->last_beta_access, omp); - ASSERT_EQ(op->last_x_access, omp); + ASSERT_EQ(op->last_alpha_access, ref); + ASSERT_EQ(op->last_b_access, ref); + ASSERT_EQ(op->last_beta_access, ref); + ASSERT_EQ(op->last_x_access, ref); } -TEST_F(EnableLinOp, ExtendedApplyCopiesBackOnlyX) +TEST_F(EnableLinOp, ExtendedApplyNoCopyBackBetweenSameMemory) { - op->apply(gko::lend(b), gko::lend(x)); + op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x)); - ASSERT_EQ(alpha->last_access, nullptr); - ASSERT_EQ(b->last_access, nullptr); - ASSERT_EQ(beta->last_access, nullptr); - ASSERT_EQ(x->last_access, omp); + ASSERT_EQ(alpha->last_access, ref); + ASSERT_EQ(b->last_access, ref); + ASSERT_EQ(beta->last_access, ref); + ASSERT_EQ(x->last_access, ref); } @@ -314,4 +323,108 @@ TEST_F(EnableLinOpFactory, PassesParametersToLinOp) } +template +class DummyLinOpWithType + : public gko::EnableLinOp>, + public gko::EnableCreateMethod>, + public gko::EnableAbsoluteComputation< + gko::remove_complex>> { +public: + using absolute_type = gko::remove_complex; + DummyLinOpWithType(std::shared_ptr exec) + : gko::EnableLinOp(exec) + {} + + DummyLinOpWithType(std::shared_ptr exec, + gko::dim<2> size, Type value) + : gko::EnableLinOp(exec, size), value_(value) + {} + + void compute_absolute_inplace() override { value_ = gko::abs(value_); } + + std::unique_ptr compute_absolute() const override + { + return std::make_unique( + this->get_executor(), this->get_size(), gko::abs(value_)); + } + + Type get_value() const { return value_; } + +protected: + void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + + void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, + const gko::LinOp *beta, gko::LinOp *x) const override + {} + +private: + Type value_; +}; + + +class EnableAbsoluteComputation : public ::testing::Test { +protected: + using dummy_type = DummyLinOpWithType>; + EnableAbsoluteComputation() + : ref{gko::ReferenceExecutor::create()}, + op{dummy_type::create(ref, gko::dim<2>{1, 1}, + std::complex{-3.0, 4.0})} + {} + + std::shared_ptr ref; + std::shared_ptr op; +}; + + +TEST_F(EnableAbsoluteComputation, InplaceAbsoluteOnConcreteType) +{ + op->compute_absolute_inplace(); + + ASSERT_EQ(op->get_value(), std::complex{5.0}); +} + + +TEST_F(EnableAbsoluteComputation, OutplaceAbsoluteOnConcreteType) +{ + auto abs_op = op->compute_absolute(); + + static_assert( + std::is_same>>::value, + "Types must match."); + ASSERT_EQ(abs_op->get_value(), 5.0); +} + + +TEST_F(EnableAbsoluteComputation, InplaceAbsoluteOnAbsoluteComputable) +{ + auto linop = gko::as(op); + + gko::as(linop)->compute_absolute_inplace(); + + ASSERT_EQ(gko::as(linop)->get_value(), + std::complex{5.0}); +} + + +TEST_F(EnableAbsoluteComputation, OutplaceAbsoluteOnAbsoluteComputable) +{ + auto abs_op = op->compute_absolute(); + + static_assert( + std::is_same>>::value, + "Types must match."); + ASSERT_EQ(abs_op->get_value(), 5.0); +} + + +TEST_F(EnableAbsoluteComputation, ThrowWithoutAbsoluteComputableInterface) +{ + std::shared_ptr linop = DummyLinOp::create(ref); + + ASSERT_THROW(gko::as(linop), gko::NotSupported); +} + + } // namespace diff --git a/core/test/base/math.cpp b/core/test/base/math.cpp index c63cd4ae8e9..c8f6e8725f5 100644 --- a/core/test/base/math.cpp +++ b/core/test/base/math.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -53,6 +53,10 @@ static_assert( "imag must return a real type"); +template +class DummyClass {}; + + template void test_real_is_finite() { @@ -135,4 +139,48 @@ TEST(Conjugate, DoubleComplex) } +TEST(RemoveComplexClass, Float) +{ + using origin = DummyClass; + using expect = DummyClass; + + bool check = std::is_same>::value; + + ASSERT_TRUE(check); +} + + +TEST(RemoveComplexClass, Double) +{ + using origin = DummyClass; + using expect = DummyClass; + + bool check = std::is_same>::value; + + ASSERT_TRUE(check); +} + + +TEST(RemoveComplexClass, FloatComplex) +{ + using origin = DummyClass, int>; + using expect = DummyClass; + + bool check = std::is_same>::value; + + ASSERT_TRUE(check); +} + + +TEST(RemoveComplexClass, DoubleComplex) +{ + using origin = DummyClass, int>; + using expect = DummyClass; + + bool check = std::is_same>::value; + + ASSERT_TRUE(check); +} + + } // namespace diff --git a/core/test/base/matrix_assembly_data.cpp b/core/test/base/matrix_assembly_data.cpp new file mode 100644 index 00000000000..d696a008410 --- /dev/null +++ b/core/test/base/matrix_assembly_data.cpp @@ -0,0 +1,129 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +namespace { + + +TEST(MatrixAssemblyData, InitializesWithZeros) +{ + gko::matrix_assembly_data m(gko::dim<2>{3, 5}); + + ASSERT_EQ(m.get_size(), gko::dim<2>(3, 5)); + ASSERT_EQ(m.get_num_stored_elements(), 0); + ASSERT_EQ(m.get_value(0, 0), 0.0); + ASSERT_FALSE(m.contains(0, 0)); +} + + +TEST(MatrixAssemblyData, InsertsValuesWithoutAdding) +{ + gko::matrix_assembly_data m(gko::dim<2>{3, 5}); + + m.add_value(0, 0, 1.3); + m.add_value(2, 3, 2.2); + m.add_value(1, 4, 1.1); + m.add_value(1, 2, 3.6); + + ASSERT_EQ(m.get_size(), gko::dim<2>(3, 5)); + ASSERT_EQ(m.get_num_stored_elements(), 4); + ASSERT_EQ(m.get_value(0, 0), 1.3); + ASSERT_EQ(m.get_value(2, 3), 2.2); + ASSERT_EQ(m.get_value(1, 4), 1.1); + ASSERT_EQ(m.get_value(1, 2), 3.6); + ASSERT_TRUE(m.contains(0, 0)); +} + + +TEST(MatrixAssemblyData, InsertsValuesWithAdding) +{ + gko::matrix_assembly_data m(gko::dim<2>{3, 5}); + + m.add_value(0, 0, 1.3); + m.add_value(2, 3, 2.2); + m.add_value(1, 4, 1.1); + m.add_value(1, 2, 3.6); + m.add_value(1, 4, 9.1); + m.add_value(2, 3, 1.3); + + ASSERT_EQ(m.get_size(), gko::dim<2>(3, 5)); + ASSERT_EQ(m.get_num_stored_elements(), 4); + ASSERT_EQ(m.get_value(0, 0), 1.3); + ASSERT_EQ(m.get_value(2, 3), 3.5); + ASSERT_EQ(m.get_value(1, 4), 10.2); + ASSERT_EQ(m.get_value(1, 2), 3.6); +} + + +TEST(MatrixAssemblyData, OverwritesValuesWhenNotAdding) +{ + gko::matrix_assembly_data m(gko::dim<2>{3, 5}); + + m.set_value(0, 0, 1.3); + m.set_value(2, 3, 2.2); + m.set_value(1, 4, 1.1); + m.set_value(1, 2, 3.6); + m.set_value(1, 4, 9.1); + m.set_value(2, 3, 1.4); + + ASSERT_EQ(m.get_size(), gko::dim<2>(3, 5)); + ASSERT_EQ(m.get_num_stored_elements(), 4); + ASSERT_EQ(m.get_value(0, 0), 1.3); + ASSERT_EQ(m.get_value(2, 3), 1.4); + ASSERT_EQ(m.get_value(1, 4), 9.1); + ASSERT_EQ(m.get_value(1, 2), 3.6); +} + + +TEST(MatrixAssemblyData, GetsSortedData) +{ + gko::matrix_assembly_data m(gko::dim<2>{3, 5}); + std::vector::nonzero_type> reference{ + {0, 0, 1.3}, {1, 2, 3.6}, {1, 4, 1.1}, {2, 3, 2.2}}; + m.set_value(0, 0, 1.3); + m.set_value(2, 3, 2.2); + m.set_value(1, 4, 1.1); + m.set_value(1, 2, 3.6); + + auto sorted = m.get_ordered_data(); + + ASSERT_EQ(sorted.size, m.get_size()); + ASSERT_EQ(sorted.nonzeros, reference); +} + + +} // namespace diff --git a/core/test/base/matrix_data.cpp b/core/test/base/matrix_data.cpp index fcb2f48f29b..23181ce77bd 100644 --- a/core/test/base/matrix_data.cpp +++ b/core/test/base/matrix_data.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/base/mtx_io.cpp b/core/test/base/mtx_io.cpp index a5a17beeefc..83da9f28b21 100644 --- a/core/test/base/mtx_io.cpp +++ b/core/test/base/mtx_io.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -640,7 +640,7 @@ class RealDummyLinOpTest : public ::testing::Test { typename std::tuple_element<1, decltype(ValueIndexType())>::type; }; -TYPED_TEST_CASE(RealDummyLinOpTest, gko::test::RealValueIndexTypes); +TYPED_TEST_SUITE(RealDummyLinOpTest, gko::test::RealValueIndexTypes); TYPED_TEST(RealDummyLinOpTest, ReadsLinOpFromStream) @@ -713,7 +713,7 @@ class ComplexDummyLinOpTest : public ::testing::Test { typename std::tuple_element<1, decltype(ValueIndexType())>::type; }; -TYPED_TEST_CASE(ComplexDummyLinOpTest, gko::test::ComplexValueIndexTypes); +TYPED_TEST_SUITE(ComplexDummyLinOpTest, gko::test::ComplexValueIndexTypes); TYPED_TEST(ComplexDummyLinOpTest, ReadsLinOpFromStream) diff --git a/core/test/base/perturbation.cpp b/core/test/base/perturbation.cpp index c0ddbd73cdc..c9a6f1a1440 100644 --- a/core/test/base/perturbation.cpp +++ b/core/test/base/perturbation.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/base/polymorphic_object.cpp b/core/test/base/polymorphic_object.cpp index 4fcb52c7039..62c4445d7fb 100644 --- a/core/test/base/polymorphic_object.cpp +++ b/core/test/base/polymorphic_object.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/base/range.cpp b/core/test/base/range.cpp index 6be342ca380..c15c9c412da 100644 --- a/core/test/base/range.cpp +++ b/core/test/base/range.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -60,6 +60,13 @@ TEST(Span, CreatesPoint) } +TEST(Span, KnowsItsLength) +{ + gko::span s{3, 5}; + ASSERT_EQ(2, s.length()); +} + + TEST(Span, LessThanEvaluatesToTrue) { ASSERT_TRUE(gko::span(2, 3) < gko::span(4, 7)); @@ -645,7 +652,7 @@ TEST(Range, DividesScalarAndRange) } -TEST(Range, AddsRangeAndSclar) +TEST(Range, AddsRangeAndScalar) { dummy_range r{5u, 1, 2}; @@ -656,7 +663,7 @@ TEST(Range, AddsRangeAndSclar) } -TEST(Range, SubtractsRangeAndSclar) +TEST(Range, SubtractsRangeAndScalar) { dummy_range r{5u, 1, 2}; @@ -667,7 +674,7 @@ TEST(Range, SubtractsRangeAndSclar) } -TEST(Range, MultipliesRangeAndSclar) +TEST(Range, MultipliesRangeAndScalar) { dummy_range r{5u, 1, 2}; @@ -678,7 +685,7 @@ TEST(Range, MultipliesRangeAndSclar) } -TEST(Range, DividesRangeAndSclar) +TEST(Range, DividesRangeAndScalar) { dummy_range r{5u, 1, 2}; diff --git a/core/test/base/range_accessors.cpp b/core/test/base/range_accessors.cpp index 9066566a33a..eecac4ae60f 100644 --- a/core/test/base/range_accessors.cpp +++ b/core/test/base/range_accessors.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -36,6 +36,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + +#include "core/test/utils.hpp" + + namespace { @@ -51,7 +57,7 @@ class RowMajorAccessor : public ::testing::Test { 3, 4, -2, 5, 6, -3 }; - //clang-format on + // clang-format on row_major_int_range r{data, 3u, 2u, 3u}; }; @@ -67,6 +73,14 @@ TEST_F(RowMajorAccessor, CanAccessData) } +TEST_F(RowMajorAccessor, CanWriteData) +{ + r(0, 0) = 4; + + EXPECT_EQ(r(0, 0), 4); +} + + TEST_F(RowMajorAccessor, CanCreateSubrange) { auto subr = r(span{1, 3}, span{0, 2}); diff --git a/core/test/base/sanitizers.cpp b/core/test/base/sanitizers.cpp index 724b7f38871..e3b7a23b628 100644 --- a/core/test/base/sanitizers.cpp +++ b/core/test/base/sanitizers.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/base/types.cpp b/core/test/base/types.cpp index 50979cd8a67..7f8a0924025 100644 --- a/core/test/base/types.cpp +++ b/core/test/base/types.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,9 +33,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include +#include +#include +#include + + #include +#include "core/base/types.hpp" + + namespace { @@ -100,4 +109,108 @@ TEST(PrecisionReduction, ComputesCommonEncoding) } +TEST(ConfigSet, MaskCorrectly) +{ + constexpr auto mask3_u = gko::detail::mask<3>(); + constexpr auto fullmask_u = gko::detail::mask<32>(); + constexpr auto mask3_u64 = gko::detail::mask<3, std::uint64_t>(); + constexpr auto fullmask_u64 = gko::detail::mask<64, std::uint64_t>(); + + ASSERT_EQ(mask3_u, 7u); + ASSERT_EQ(fullmask_u, 0xffffffffu); + ASSERT_TRUE((std::is_same::value)); + ASSERT_TRUE( + (std::is_same::value)); + ASSERT_EQ(mask3_u64, 7ull); + ASSERT_EQ(fullmask_u64, 0xffffffffffffffffull); + ASSERT_TRUE( + (std::is_same::value)); + ASSERT_TRUE( + (std::is_same::value)); +} + + +TEST(ConfigSet, ShiftCorrectly) +{ + constexpr std::array bits{3, 5, 7}; + + + constexpr auto shift0 = gko::detail::shift<0, 3>(bits); + constexpr auto shift1 = gko::detail::shift<1, 3>(bits); + constexpr auto shift2 = gko::detail::shift<2, 3>(bits); + + ASSERT_EQ(shift0, 12); + ASSERT_EQ(shift1, 7); + ASSERT_EQ(shift2, 0); +} + + +TEST(ConfigSet, ConfigSet1Correctly) +{ + using Cfg = gko::ConfigSet<3>; + + constexpr auto encoded = Cfg::encode(2); + constexpr auto decoded = Cfg::decode<0>(encoded); + + ASSERT_EQ(encoded, 2); + ASSERT_EQ(decoded, 2); +} + + +TEST(ConfigSet, ConfigSet1FullCorrectly) +{ + using Cfg = gko::ConfigSet<32>; + + constexpr auto encoded = Cfg::encode(0xffffffff); + constexpr auto decoded = Cfg::decode<0>(encoded); + + ASSERT_EQ(encoded, 0xffffffff); + ASSERT_EQ(decoded, 0xffffffff); +} + + +TEST(ConfigSet, ConfigSet2FullCorrectly) +{ + using Cfg = gko::ConfigSet<1, 31>; + + constexpr auto encoded = Cfg::encode(1, 33); + + ASSERT_EQ(encoded, (1u << 31) + 33); +} + + +TEST(ConfigSet, ConfigSetSomeCorrectly) +{ + using Cfg = gko::ConfigSet<3, 5, 7>; + + constexpr auto encoded = Cfg::encode(2, 11, 13); + constexpr auto decoded_0 = Cfg::decode<0>(encoded); + constexpr auto decoded_1 = Cfg::decode<1>(encoded); + constexpr auto decoded_2 = Cfg::decode<2>(encoded); + + ASSERT_EQ(encoded, (2 << 12) + (11 << 7) + 13); + ASSERT_EQ(decoded_0, 2); + ASSERT_EQ(decoded_1, 11); + ASSERT_EQ(decoded_2, 13); +} + + +TEST(ConfigSet, ConfigSetSomeFullCorrectly) +{ + using Cfg = gko::ConfigSet<2, 6, 7, 17>; + + constexpr auto encoded = Cfg::encode(2, 11, 13, 19); + constexpr auto decoded_0 = Cfg::decode<0>(encoded); + constexpr auto decoded_1 = Cfg::decode<1>(encoded); + constexpr auto decoded_2 = Cfg::decode<2>(encoded); + constexpr auto decoded_3 = Cfg::decode<3>(encoded); + + ASSERT_EQ(encoded, (2 << 30) + (11 << 24) + (13 << 17) + 19); + ASSERT_EQ(decoded_0, 2); + ASSERT_EQ(decoded_1, 11); + ASSERT_EQ(decoded_2, 13); + ASSERT_EQ(decoded_3, 19); +} + + } // namespace diff --git a/core/test/base/utils.cpp b/core/test/base/utils.cpp index df187cfb81f..6d8985e1dff 100644 --- a/core/test/base/utils.cpp +++ b/core/test/base/utils.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -400,20 +400,60 @@ class TemporaryClone : public ::testing::Test { }; -TEST_F(TemporaryClone, CopiesToAnotherExecutor) +TEST_F(TemporaryClone, DoesNotCopyToSameMemory) { - auto clone = make_temporary_clone(omp, gko::lend(obj)); + auto other = gko::ReferenceExecutor::create(); + auto clone = make_temporary_clone(other, gko::lend(obj)); - ASSERT_EQ(clone.get()->get_executor(), omp); + ASSERT_NE(clone.get()->get_executor(), other); + ASSERT_EQ(obj->get_executor(), ref); +} + + +TEST_F(TemporaryClone, OutputDoesNotCopyToSameMemory) +{ + auto other = gko::ReferenceExecutor::create(); + auto clone = make_temporary_output_clone(other, gko::lend(obj)); + + ASSERT_NE(clone.get()->get_executor(), other); ASSERT_EQ(obj->get_executor(), ref); } TEST_F(TemporaryClone, CopiesBackAfterLeavingScope) { + obj->data = 4; { auto clone = make_temporary_clone(omp, gko::lend(obj)); clone.get()->data = 7; + + ASSERT_EQ(obj->data, 4); + } + ASSERT_EQ(obj->get_executor(), ref); + ASSERT_EQ(obj->data, 7); +} + + +TEST_F(TemporaryClone, OutputCopiesBackAfterLeavingScope) +{ + obj->data = 4; + { + auto clone = make_temporary_output_clone(omp, gko::lend(obj)); + clone.get()->data = 7; + + ASSERT_EQ(obj->data, 4); + } + ASSERT_EQ(obj->get_executor(), ref); + ASSERT_EQ(obj->data, 7); +} + + +TEST_F(TemporaryClone, DoesntCopyBackConstAfterLeavingScope) +{ + { + auto clone = make_temporary_clone( + omp, static_cast(gko::lend(obj))); + obj->data = 7; } ASSERT_EQ(obj->get_executor(), ref); diff --git a/core/test/base/version.cpp b/core/test/base/version.cpp index 0fc86a03b7f..13418277cf9 100644 --- a/core/test/base/version.cpp +++ b/core/test/base/version.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/factorization/CMakeLists.txt b/core/test/factorization/CMakeLists.txt index 9b2e3082e51..8d9529eec9d 100644 --- a/core/test/factorization/CMakeLists.txt +++ b/core/test/factorization/CMakeLists.txt @@ -1,2 +1,4 @@ +ginkgo_create_test(par_ic) +ginkgo_create_test(par_ict) ginkgo_create_test(par_ilu) ginkgo_create_test(par_ilut) diff --git a/core/test/factorization/par_ic.cpp b/core/test/factorization/par_ic.cpp new file mode 100644 index 00000000000..2bd4bc4fab4 --- /dev/null +++ b/core/test/factorization/par_ic.cpp @@ -0,0 +1,137 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class ParIc : public ::testing::Test { +public: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using ic_factory_type = gko::factorization::ParIc; + using strategy_type = typename ic_factory_type::matrix_type::classical; + +protected: + ParIc() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + +TYPED_TEST_SUITE(ParIc, gko::test::ValueIndexTypes); + + +TYPED_TEST(ParIc, SetIterations) +{ + auto factory = + TestFixture::ic_factory_type::build().with_iterations(5u).on(this->ref); + + ASSERT_EQ(factory->get_parameters().iterations, 5u); +} + + +TYPED_TEST(ParIc, SetSkip) +{ + auto factory = + TestFixture::ic_factory_type::build().with_skip_sorting(true).on( + this->ref); + + ASSERT_EQ(factory->get_parameters().skip_sorting, true); +} + + +TYPED_TEST(ParIc, SetLStrategy) +{ + auto strategy = std::make_shared(); + + auto factory = + TestFixture::ic_factory_type::build().with_l_strategy(strategy).on( + this->ref); + + ASSERT_EQ(factory->get_parameters().l_strategy, strategy); +} + + +TYPED_TEST(ParIc, SetBothFactors) +{ + auto factory = + TestFixture::ic_factory_type::build().with_both_factors(false).on( + this->ref); + + ASSERT_FALSE(factory->get_parameters().both_factors); +} + + +TYPED_TEST(ParIc, SetDefaults) +{ + auto factory = TestFixture::ic_factory_type::build().on(this->ref); + + ASSERT_EQ(factory->get_parameters().iterations, 0u); + ASSERT_EQ(factory->get_parameters().skip_sorting, false); + ASSERT_EQ(factory->get_parameters().l_strategy, nullptr); + ASSERT_TRUE(factory->get_parameters().both_factors); +} + + +TYPED_TEST(ParIc, SetEverything) +{ + auto strategy = std::make_shared(); + + auto factory = TestFixture::ic_factory_type::build() + .with_iterations(7u) + .with_skip_sorting(false) + .with_l_strategy(strategy) + .with_both_factors(false) + .on(this->ref); + + ASSERT_EQ(factory->get_parameters().iterations, 7u); + ASSERT_EQ(factory->get_parameters().skip_sorting, false); + ASSERT_EQ(factory->get_parameters().l_strategy, strategy); + ASSERT_FALSE(factory->get_parameters().both_factors); +} + + +} // namespace diff --git a/core/test/factorization/par_ict.cpp b/core/test/factorization/par_ict.cpp new file mode 100644 index 00000000000..b06281a8a30 --- /dev/null +++ b/core/test/factorization/par_ict.cpp @@ -0,0 +1,164 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class ParIct : public ::testing::Test { +public: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using ict_factory_type = gko::factorization::ParIct; + using strategy_type = typename ict_factory_type::matrix_type::classical; + +protected: + ParIct() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + +TYPED_TEST_SUITE(ParIct, gko::test::ValueIndexTypes); + + +TYPED_TEST(ParIct, SetIterations) +{ + auto factory = + TestFixture::ict_factory_type::build().with_iterations(6u).on( + this->ref); + + ASSERT_EQ(factory->get_parameters().iterations, 6u); +} + + +TYPED_TEST(ParIct, SetSkip) +{ + auto factory = + TestFixture::ict_factory_type::build().with_skip_sorting(true).on( + this->ref); + + ASSERT_EQ(factory->get_parameters().skip_sorting, true); +} + + +TYPED_TEST(ParIct, SetApprox) +{ + auto factory = TestFixture::ict_factory_type::build() + .with_approximate_select(false) + .on(this->ref); + + ASSERT_EQ(factory->get_parameters().approximate_select, false); +} + + +TYPED_TEST(ParIct, SetDeterministic) +{ + auto factory = TestFixture::ict_factory_type::build() + .with_deterministic_sample(true) + .on(this->ref); + + ASSERT_EQ(factory->get_parameters().deterministic_sample, true); +} + + +TYPED_TEST(ParIct, SetFillIn) +{ + auto factory = + TestFixture::ict_factory_type::build().with_fill_in_limit(1.2).on( + this->ref); + + ASSERT_EQ(factory->get_parameters().fill_in_limit, 1.2); +} + + +TYPED_TEST(ParIct, SetLStrategy) +{ + auto strategy = std::make_shared(); + + auto factory = + TestFixture::ict_factory_type::build().with_l_strategy(strategy).on( + this->ref); + + ASSERT_EQ(factory->get_parameters().l_strategy, strategy); +} + + +TYPED_TEST(ParIct, SetDefaults) +{ + auto factory = TestFixture::ict_factory_type::build().on(this->ref); + + ASSERT_EQ(factory->get_parameters().iterations, 5u); + ASSERT_EQ(factory->get_parameters().skip_sorting, false); + ASSERT_EQ(factory->get_parameters().approximate_select, true); + ASSERT_EQ(factory->get_parameters().deterministic_sample, false); + ASSERT_EQ(factory->get_parameters().fill_in_limit, 2.0); + ASSERT_EQ(factory->get_parameters().l_strategy, nullptr); +} + + +TYPED_TEST(ParIct, SetEverything) +{ + auto strategy = std::make_shared(); + + auto factory = TestFixture::ict_factory_type::build() + .with_iterations(7u) + .with_skip_sorting(true) + .with_approximate_select(false) + .with_deterministic_sample(true) + .with_fill_in_limit(1.2) + .with_l_strategy(strategy) + .on(this->ref); + + ASSERT_EQ(factory->get_parameters().iterations, 7u); + ASSERT_EQ(factory->get_parameters().skip_sorting, true); + ASSERT_EQ(factory->get_parameters().approximate_select, false); + ASSERT_EQ(factory->get_parameters().deterministic_sample, true); + ASSERT_EQ(factory->get_parameters().fill_in_limit, 1.2); + ASSERT_EQ(factory->get_parameters().l_strategy, strategy); +} + + +} // namespace diff --git a/core/test/factorization/par_ilu.cpp b/core/test/factorization/par_ilu.cpp index 75c714fcab1..6a1022d49db 100644 --- a/core/test/factorization/par_ilu.cpp +++ b/core/test/factorization/par_ilu.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -53,6 +53,7 @@ class ParIlu : public ::testing::Test { using index_type = typename std::tuple_element<1, decltype(ValueIndexType())>::type; using ilu_factory_type = gko::factorization::ParIlu; + using strategy_type = typename ilu_factory_type::matrix_type::classical; protected: ParIlu() : ref(gko::ReferenceExecutor::create()) {} @@ -60,7 +61,7 @@ class ParIlu : public ::testing::Test { std::shared_ptr ref; }; -TYPED_TEST_CASE(ParIlu, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(ParIlu, gko::test::ValueIndexTypes); TYPED_TEST(ParIlu, SetIterations) @@ -83,15 +84,57 @@ TYPED_TEST(ParIlu, SetSkip) } +TYPED_TEST(ParIlu, SetLStrategy) +{ + auto strategy = std::make_shared(); + + auto factory = + TestFixture::ilu_factory_type::build().with_l_strategy(strategy).on( + this->ref); + + ASSERT_EQ(factory->get_parameters().l_strategy, strategy); +} + + +TYPED_TEST(ParIlu, SetUStrategy) +{ + auto strategy = std::make_shared(); + + auto factory = + TestFixture::ilu_factory_type::build().with_u_strategy(strategy).on( + this->ref); + + ASSERT_EQ(factory->get_parameters().u_strategy, strategy); +} + + +TYPED_TEST(ParIlu, SetDefaults) +{ + auto factory = TestFixture::ilu_factory_type::build().on(this->ref); + + ASSERT_EQ(factory->get_parameters().iterations, 0u); + ASSERT_EQ(factory->get_parameters().skip_sorting, false); + ASSERT_EQ(factory->get_parameters().l_strategy, nullptr); + ASSERT_EQ(factory->get_parameters().u_strategy, nullptr); +} + + TYPED_TEST(ParIlu, SetEverything) { + auto strategy = std::make_shared(); + auto strategy2 = std::make_shared(); + auto factory = TestFixture::ilu_factory_type::build() - .with_skip_sorting(false) .with_iterations(7u) + .with_skip_sorting(false) + .with_l_strategy(strategy) + .with_u_strategy(strategy2) .on(this->ref); - ASSERT_EQ(factory->get_parameters().skip_sorting, false); ASSERT_EQ(factory->get_parameters().iterations, 7u); + ASSERT_EQ(factory->get_parameters().skip_sorting, false); + ASSERT_EQ(factory->get_parameters().l_strategy, strategy); + ASSERT_EQ(factory->get_parameters().u_strategy, strategy2); } diff --git a/core/test/factorization/par_ilut.cpp b/core/test/factorization/par_ilut.cpp index e33f0bc35b5..293763cf0af 100644 --- a/core/test/factorization/par_ilut.cpp +++ b/core/test/factorization/par_ilut.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -39,15 +39,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/test/utils.hpp" + + namespace { +template class ParIlut : public ::testing::Test { public: - using value_type = gko::default_precision; - using index_type = gko::int32; + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; using ilut_factory_type = gko::factorization::ParIlut; + using strategy_type = typename ilut_factory_type::matrix_type::classical; protected: ParIlut() : ref(gko::ReferenceExecutor::create()) {} @@ -55,76 +62,119 @@ class ParIlut : public ::testing::Test { std::shared_ptr ref; }; +TYPED_TEST_SUITE(ParIlut, gko::test::ValueIndexTypes); -TEST_F(ParIlut, SetIterations) + +TYPED_TEST(ParIlut, SetIterations) { - auto factory = ilut_factory_type::build().with_iterations(6u).on(ref); + auto factory = + TestFixture::ilut_factory_type::build().with_iterations(6u).on( + this->ref); ASSERT_EQ(factory->get_parameters().iterations, 6u); } -TEST_F(ParIlut, SetSkip) +TYPED_TEST(ParIlut, SetSkip) { - auto factory = ilut_factory_type::build().with_skip_sorting(true).on(ref); + auto factory = + TestFixture::ilut_factory_type::build().with_skip_sorting(true).on( + this->ref); ASSERT_EQ(factory->get_parameters().skip_sorting, true); } -TEST_F(ParIlut, SetApprox) +TYPED_TEST(ParIlut, SetApprox) { - auto factory = - ilut_factory_type::build().with_approximate_select(false).on(ref); + auto factory = TestFixture::ilut_factory_type::build() + .with_approximate_select(false) + .on(this->ref); ASSERT_EQ(factory->get_parameters().approximate_select, false); } -TEST_F(ParIlut, SetDeterministic) +TYPED_TEST(ParIlut, SetDeterministic) { - auto factory = - ilut_factory_type::build().with_deterministic_sample(true).on(ref); + auto factory = TestFixture::ilut_factory_type::build() + .with_deterministic_sample(true) + .on(this->ref); ASSERT_EQ(factory->get_parameters().deterministic_sample, true); } -TEST_F(ParIlut, SetFillIn) +TYPED_TEST(ParIlut, SetFillIn) { - auto factory = ilut_factory_type::build().with_fill_in_limit(1.2).on(ref); + auto factory = + TestFixture::ilut_factory_type::build().with_fill_in_limit(1.2).on( + this->ref); ASSERT_EQ(factory->get_parameters().fill_in_limit, 1.2); } -TEST_F(ParIlut, SetDefaults) +TYPED_TEST(ParIlut, SetLStrategy) { - auto factory = ilut_factory_type::build().on(ref); + auto strategy = std::make_shared(); + + auto factory = + TestFixture::ilut_factory_type::build().with_l_strategy(strategy).on( + this->ref); + + ASSERT_EQ(factory->get_parameters().l_strategy, strategy); +} + + +TYPED_TEST(ParIlut, SetUStrategy) +{ + auto strategy = std::make_shared(); + + auto factory = + TestFixture::ilut_factory_type::build().with_u_strategy(strategy).on( + this->ref); + + ASSERT_EQ(factory->get_parameters().u_strategy, strategy); +} + + +TYPED_TEST(ParIlut, SetDefaults) +{ + auto factory = TestFixture::ilut_factory_type::build().on(this->ref); - ASSERT_EQ(factory->get_parameters().skip_sorting, false); ASSERT_EQ(factory->get_parameters().iterations, 5u); + ASSERT_EQ(factory->get_parameters().skip_sorting, false); ASSERT_EQ(factory->get_parameters().approximate_select, true); ASSERT_EQ(factory->get_parameters().deterministic_sample, false); ASSERT_EQ(factory->get_parameters().fill_in_limit, 2.0); + ASSERT_EQ(factory->get_parameters().l_strategy, nullptr); + ASSERT_EQ(factory->get_parameters().u_strategy, nullptr); } -TEST_F(ParIlut, SetEverything) +TYPED_TEST(ParIlut, SetEverything) { - auto factory = ilut_factory_type::build() - .with_skip_sorting(true) + auto strategy = std::make_shared(); + auto strategy2 = std::make_shared(); + + auto factory = TestFixture::ilut_factory_type::build() .with_iterations(7u) + .with_skip_sorting(true) .with_approximate_select(false) .with_deterministic_sample(true) .with_fill_in_limit(1.2) - .on(ref); + .with_l_strategy(strategy) + .with_u_strategy(strategy2) + .on(this->ref); - ASSERT_EQ(factory->get_parameters().skip_sorting, true); ASSERT_EQ(factory->get_parameters().iterations, 7u); + ASSERT_EQ(factory->get_parameters().skip_sorting, true); ASSERT_EQ(factory->get_parameters().approximate_select, false); ASSERT_EQ(factory->get_parameters().deterministic_sample, true); ASSERT_EQ(factory->get_parameters().fill_in_limit, 1.2); + ASSERT_EQ(factory->get_parameters().l_strategy, strategy); + ASSERT_EQ(factory->get_parameters().u_strategy, strategy2); } diff --git a/core/test/log/convergence.cpp b/core/test/log/convergence.cpp index bb05007817b..e53de5a681b 100644 --- a/core/test/log/convergence.cpp +++ b/core/test/log/convergence.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -47,7 +47,7 @@ namespace { template class Convergence : public ::testing::Test {}; -TYPED_TEST_CASE(Convergence, gko::test::ValueTypes); +TYPED_TEST_SUITE(Convergence, gko::test::ValueTypes); TYPED_TEST(Convergence, CanGetData) @@ -56,6 +56,7 @@ TYPED_TEST(Convergence, CanGetData) auto logger = gko::log::Convergence::create( exec, gko::log::Logger::iteration_complete_mask); + ASSERT_EQ(logger->has_converged(), false); ASSERT_EQ(logger->get_num_iterations(), 0); ASSERT_EQ(logger->get_residual(), nullptr); ASSERT_EQ(logger->get_residual_norm(), nullptr); diff --git a/core/test/log/logger.cpp b/core/test/log/logger.cpp index e051e16f692..d9b1cd11f4e 100644 --- a/core/test/log/logger.cpp +++ b/core/test/log/logger.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include @@ -65,8 +66,8 @@ TEST(DummyLogged, CanAddLogger) auto exec = gko::ReferenceExecutor::create(); DummyLoggedClass c; - c.add_logger( - gko::log::Record::create(exec, gko::log::Logger::all_events_mask)); + c.add_logger(gko::log::Convergence<>::create( + exec, gko::log::Logger::all_events_mask)); ASSERT_EQ(c.get_num_loggers(), 1); } @@ -77,12 +78,46 @@ TEST(DummyLogged, CanAddMultipleLoggers) auto exec = gko::ReferenceExecutor::create(); DummyLoggedClass c; + c.add_logger(gko::log::Convergence<>::create( + exec, gko::log::Logger::all_events_mask)); + c.add_logger(gko::log::Stream<>::create( + exec, gko::log::Logger::all_events_mask, std::cout)); + + ASSERT_EQ(c.get_num_loggers(), 2); +} + + +TEST(DummyLogged, CanAccessLoggers) +{ + auto exec = gko::ReferenceExecutor::create(); + DummyLoggedClass c; + + auto logger1 = gko::share( + gko::log::Record::create(exec, gko::log::Logger::all_events_mask)); + auto logger2 = gko::share(gko::log::Stream<>::create( + exec, gko::log::Logger::all_events_mask, std::cout)); + + c.add_logger(logger1); + c.add_logger(logger2); + + ASSERT_EQ(c.get_loggers()[0], logger1); + ASSERT_EQ(c.get_loggers()[1], logger2); + ASSERT_EQ(c.get_num_loggers(), 2); +} + + +TEST(DummyLogged, CanClearLoggers) +{ + auto exec = gko::ReferenceExecutor::create(); + DummyLoggedClass c; c.add_logger( gko::log::Record::create(exec, gko::log::Logger::all_events_mask)); c.add_logger(gko::log::Stream<>::create( exec, gko::log::Logger::all_events_mask, std::cout)); - ASSERT_EQ(c.get_num_loggers(), 2); + c.clear_loggers(); + + ASSERT_EQ(c.get_num_loggers(), 0); } @@ -90,8 +125,8 @@ TEST(DummyLogged, CanRemoveLogger) { auto exec = gko::ReferenceExecutor::create(); DummyLoggedClass c; - auto r = gko::share( - gko::log::Record::create(exec, gko::log::Logger::all_events_mask)); + auto r = gko::share(gko::log::Convergence<>::create( + exec, gko::log::Logger::all_events_mask)); c.add_logger(r); c.add_logger(gko::log::Stream<>::create( exec, gko::log::Logger::all_events_mask, std::cout)); diff --git a/core/test/log/papi.cpp b/core/test/log/papi.cpp index d318a29f228..c5fb0d98f95 100644 --- a/core/test/log/papi.cpp +++ b/core/test/log/papi.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -119,7 +119,7 @@ class Papi : public ::testing::Test { int eventset; }; -TYPED_TEST_CASE(Papi, gko::test::ValueTypes); +TYPED_TEST_SUITE(Papi, gko::test::ValueTypes); TYPED_TEST(Papi, CatchesAllocationStarted) @@ -482,7 +482,7 @@ TYPED_TEST(Papi, CatchesIterationComplete) this->start(); this->logger->template on( - A.get(), 42, nullptr, nullptr, nullptr); + A.get(), 42, nullptr, nullptr, nullptr, nullptr); long long int value = 0; this->stop(&value); diff --git a/core/test/log/record.cpp b/core/test/log/record.cpp index dd829d39a8c..a9a0947d199 100644 --- a/core/test/log/record.cpp +++ b/core/test/log/record.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -464,7 +464,7 @@ TEST(Record, CatchesCriterionCheckStarted) } -TEST(Record, CatchesCriterionCheckCompleted) +TEST(Record, CatchesCriterionCheckCompletedOld) { auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( @@ -494,6 +494,36 @@ TEST(Record, CatchesCriterionCheckCompleted) } +TEST(Record, CatchesCriterionCheckCompleted) +{ + auto exec = gko::ReferenceExecutor::create(); + auto logger = gko::log::Record::create( + exec, gko::log::Logger::criterion_check_completed_mask); + auto criterion = + gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate( + nullptr, nullptr, nullptr); + constexpr gko::uint8 RelativeStoppingId{42}; + gko::Array stop_status(exec, 1); + + logger->on( + criterion.get(), 1, nullptr, nullptr, nullptr, nullptr, + RelativeStoppingId, true, &stop_status, true, true); + + stop_status.get_data()->reset(); + stop_status.get_data()->stop(RelativeStoppingId); + auto &data = logger->get().criterion_check_completed.back(); + ASSERT_NE(data->criterion, nullptr); + ASSERT_EQ(data->stopping_id, RelativeStoppingId); + ASSERT_EQ(data->set_finalized, true); + ASSERT_EQ(data->status->get_const_data()->has_stopped(), true); + ASSERT_EQ(data->status->get_const_data()->get_id(), + stop_status.get_const_data()->get_id()); + ASSERT_EQ(data->status->get_const_data()->is_finalized(), true); + ASSERT_EQ(data->oneChanged, true); + ASSERT_EQ(data->converged, true); +} + + TEST(Record, CatchesIterations) { using Dense = gko::matrix::Dense<>; @@ -509,11 +539,12 @@ TEST(Record, CatchesIterations) auto residual = gko::initialize({-4.4}, exec); auto solution = gko::initialize({-2.2}, exec); auto residual_norm = gko::initialize({-3.3}, exec); + auto implicit_sq_residual_norm = gko::initialize({-3.5}, exec); logger->on( solver.get(), num_iters, residual.get(), solution.get(), - residual_norm.get()); + residual_norm.get(), implicit_sq_residual_norm.get()); auto &data = logger->get().iteration_completed.back(); ASSERT_NE(data->solver.get(), nullptr); @@ -522,6 +553,8 @@ TEST(Record, CatchesIterations) GKO_ASSERT_MTX_NEAR(gko::as(data->solution.get()), solution, 0); GKO_ASSERT_MTX_NEAR(gko::as(data->residual_norm.get()), residual_norm, 0); + GKO_ASSERT_MTX_NEAR(gko::as(data->implicit_sq_residual_norm.get()), + implicit_sq_residual_norm, 0); } diff --git a/core/test/log/stream.cpp b/core/test/log/stream.cpp index 163a54fd74a..42e5a665417 100644 --- a/core/test/log/stream.cpp +++ b/core/test/log/stream.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -59,7 +59,7 @@ constexpr int num_iters = 10; template class Stream : public ::testing::Test {}; -TYPED_TEST_CASE(Stream, gko::test::ValueTypes); +TYPED_TEST_SUITE(Stream, gko::test::ValueTypes); TYPED_TEST(Stream, CatchesAllocationStarted) @@ -708,13 +708,15 @@ TYPED_TEST(Stream, CatchesIterations) auto residual = Dense::create(exec); auto solution = Dense::create(exec); auto residual_norm = Dense::create(exec); + auto implicit_sq_residual_norm = Dense::create(exec); std::stringstream ptrstream_solver; ptrstream_solver << solver.get(); std::stringstream ptrstream_residual; ptrstream_residual << residual.get(); logger->template on( - solver.get(), num_iters, residual.get()); + solver.get(), num_iters, residual.get(), solution.get(), + residual_norm.get(), implicit_sq_residual_norm.get()); GKO_ASSERT_STR_CONTAINS(out.str(), "iteration " + std::to_string(num_iters)); diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index ac36ed4f7ff..64b3b3ed593 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -5,6 +5,8 @@ ginkgo_create_test(csr_builder) ginkgo_create_test(dense) ginkgo_create_test(diagonal) ginkgo_create_test(ell) +ginkgo_create_test(fbcsr) +ginkgo_create_test(fbcsr_builder) ginkgo_create_test(hybrid) ginkgo_create_test(identity) ginkgo_create_test(permutation) diff --git a/core/test/matrix/coo.cpp b/core/test/matrix/coo.cpp index 92a999febb6..3500dc77edc 100644 --- a/core/test/matrix/coo.cpp +++ b/core/test/matrix/coo.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -107,7 +107,7 @@ class Coo : public ::testing::Test { } }; -TYPED_TEST_CASE(Coo, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(Coo, gko::test::ValueIndexTypes); TYPED_TEST(Coo, KnowsItsSize) @@ -211,6 +211,26 @@ TYPED_TEST(Coo, CanBeReadFromMatrixData) } +TYPED_TEST(Coo, CanBeReadFromMatrixAssemblyData) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto m = Mtx::create(this->exec); + gko::matrix_assembly_data data(gko::dim<2>{2, 3}); + data.set_value(0, 0, 1.0); + data.set_value(0, 1, 3.0); + data.set_value(0, 2, 2.0); + data.set_value(1, 0, 0.0); + data.set_value(1, 1, 5.0); + data.set_value(1, 2, 0.0); + + m->read(data); + + this->assert_equal_to_original_mtx(m.get()); +} + + TYPED_TEST(Coo, GeneratesCorrectMatrixData) { using Mtx = typename TestFixture::Mtx; diff --git a/core/test/matrix/coo_builder.cpp b/core/test/matrix/coo_builder.cpp index de5844b0bbe..67f847681d1 100644 --- a/core/test/matrix/coo_builder.cpp +++ b/core/test/matrix/coo_builder.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -63,7 +63,7 @@ class CooBuilder : public ::testing::Test { std::unique_ptr mtx; }; -TYPED_TEST_CASE(CooBuilder, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(CooBuilder, gko::test::ValueIndexTypes); TYPED_TEST(CooBuilder, ReturnsCorrectArrays) diff --git a/core/test/matrix/csr.cpp b/core/test/matrix/csr.cpp index f927861afce..a9bcb92a7c8 100644 --- a/core/test/matrix/csr.cpp +++ b/core/test/matrix/csr.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -111,7 +111,7 @@ class Csr : public ::testing::Test { } }; -TYPED_TEST_CASE(Csr, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(Csr, gko::test::ValueIndexTypes); TYPED_TEST(Csr, KnowsItsSize) @@ -221,6 +221,27 @@ TYPED_TEST(Csr, CanBeReadFromMatrixData) } +TYPED_TEST(Csr, CanBeReadFromMatrixAssemblyData) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto m = Mtx::create(this->exec, + std::make_shared(2)); + gko::matrix_assembly_data data(gko::dim<2>{2, 3}); + data.set_value(0, 0, 1.0); + data.set_value(0, 1, 3.0); + data.set_value(0, 2, 2.0); + data.set_value(1, 0, 0.0); + data.set_value(1, 1, 5.0); + data.set_value(1, 2, 0.0); + + m->read(data); + + this->assert_equal_to_original_mtx(m.get()); +} + + TYPED_TEST(Csr, GeneratesCorrectMatrixData) { using value_type = typename TestFixture::value_type; diff --git a/core/test/matrix/csr_builder.cpp b/core/test/matrix/csr_builder.cpp index 9a1bfb6eb5f..d5a0187ce93 100644 --- a/core/test/matrix/csr_builder.cpp +++ b/core/test/matrix/csr_builder.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -64,7 +64,7 @@ class CsrBuilder : public ::testing::Test { std::unique_ptr mtx; }; -TYPED_TEST_CASE(CsrBuilder, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(CsrBuilder, gko::test::ValueIndexTypes); TYPED_TEST(CsrBuilder, ReturnsCorrectArrays) diff --git a/core/test/matrix/dense.cpp b/core/test/matrix/dense.cpp index c89f9a740e5..77dd543ac68 100644 --- a/core/test/matrix/dense.cpp +++ b/core/test/matrix/dense.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -80,7 +80,7 @@ class Dense : public ::testing::Test { std::unique_ptr> mtx; }; -TYPED_TEST_CASE(Dense, gko::test::ValueTypes); +TYPED_TEST_SUITE(Dense, gko::test::ValueTypes); TYPED_TEST(Dense, CanBeEmpty) @@ -138,6 +138,18 @@ TYPED_TEST(Dense, CanBeConstructedFromExistingData) } +TYPED_TEST(Dense, CreateWithSameConfigKeepsStride) +{ + auto m = + gko::matrix::Dense::create(this->exec, gko::dim<2>{2, 3}, 4); + auto m2 = gko::matrix::Dense::create_with_config_of(m.get()); + + ASSERT_EQ(m2->get_size(), gko::dim<2>(2, 3)); + EXPECT_EQ(m2->get_stride(), 4); + ASSERT_EQ(m2->get_num_stored_elements(), 8); +} + + TYPED_TEST(Dense, KnowsItsSizeAndValues) { this->assert_equal_to_original_mtx(this->mtx.get()); @@ -278,6 +290,31 @@ TYPED_TEST(Dense, GeneratesCorrectMatrixData) } +TYPED_TEST(Dense, CanBeReadFromMatrixAssemblyData) +{ + using value_type = typename TestFixture::value_type; + auto m = gko::matrix::Dense::create(this->exec); + gko::matrix_assembly_data data(gko::dim<2>{2, 3}); + data.set_value(0, 0, 1.0); + data.set_value(0, 1, 3.0); + data.set_value(0, 2, 2.0); + data.set_value(1, 0, 0.0); + data.set_value(1, 1, 5.0); + data.set_value(1, 2, 0.0); + + m->read(data); + + ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 6); + EXPECT_EQ(m->at(0, 0), value_type{1.0}); + EXPECT_EQ(m->at(1, 0), value_type{0.0}); + EXPECT_EQ(m->at(0, 1), value_type{3.0}); + EXPECT_EQ(m->at(1, 1), value_type{5.0}); + EXPECT_EQ(m->at(0, 2), value_type{2.0}); + ASSERT_EQ(m->at(1, 2), value_type{0.0}); +} + + TYPED_TEST(Dense, CanCreateSubmatrix) { using value_type = typename TestFixture::value_type; @@ -303,4 +340,40 @@ TYPED_TEST(Dense, CanCreateSubmatrixWithStride) } +TYPED_TEST(Dense, CanCreateRealView) +{ + using value_type = typename TestFixture::value_type; + using real_type = gko::remove_complex; + auto real_view = this->mtx->create_real_view(); + + if (gko::is_complex()) { + EXPECT_EQ(real_view->get_size()[0], this->mtx->get_size()[0]); + EXPECT_EQ(real_view->get_size()[1], 2 * this->mtx->get_size()[1]); + EXPECT_EQ(real_view->get_stride(), 2 * this->mtx->get_stride()); + EXPECT_EQ(real_view->at(0, 0), real_type{1.0}); + EXPECT_EQ(real_view->at(0, 1), real_type{0.0}); + EXPECT_EQ(real_view->at(0, 2), real_type{2.0}); + EXPECT_EQ(real_view->at(0, 3), real_type{0.0}); + EXPECT_EQ(real_view->at(0, 4), real_type{3.0}); + EXPECT_EQ(real_view->at(0, 5), real_type{0.0}); + EXPECT_EQ(real_view->at(1, 0), real_type{1.5}); + EXPECT_EQ(real_view->at(1, 1), real_type{0.0}); + EXPECT_EQ(real_view->at(1, 2), real_type{2.5}); + EXPECT_EQ(real_view->at(1, 3), real_type{0.0}); + EXPECT_EQ(real_view->at(1, 4), real_type{3.5}); + EXPECT_EQ(real_view->at(1, 5), real_type{0.0}); + } else { + EXPECT_EQ(real_view->get_size()[0], this->mtx->get_size()[0]); + EXPECT_EQ(real_view->get_size()[1], this->mtx->get_size()[1]); + EXPECT_EQ(real_view->get_stride(), this->mtx->get_stride()); + EXPECT_EQ(real_view->at(0, 0), real_type{1.0}); + EXPECT_EQ(real_view->at(0, 1), real_type{2.0}); + EXPECT_EQ(real_view->at(0, 2), real_type{3.0}); + EXPECT_EQ(real_view->at(1, 0), real_type{1.5}); + EXPECT_EQ(real_view->at(1, 1), real_type{2.5}); + EXPECT_EQ(real_view->at(1, 2), real_type{3.5}); + } +} + + } // namespace diff --git a/core/test/matrix/diagonal.cpp b/core/test/matrix/diagonal.cpp index 3fafc05d83e..9e35a51c31e 100644 --- a/core/test/matrix/diagonal.cpp +++ b/core/test/matrix/diagonal.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -77,7 +77,7 @@ class Diagonal : public ::testing::Test { } }; -TYPED_TEST_CASE(Diagonal, gko::test::ValueTypes); +TYPED_TEST_SUITE(Diagonal, gko::test::ValueTypes); TYPED_TEST(Diagonal, KnowsItsSize) diff --git a/core/test/matrix/ell.cpp b/core/test/matrix/ell.cpp index 6e92f1251ba..f62e17e3958 100644 --- a/core/test/matrix/ell.cpp +++ b/core/test/matrix/ell.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -110,7 +110,7 @@ class Ell : public ::testing::Test { } }; -TYPED_TEST_CASE(Ell, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(Ell, gko::test::ValueIndexTypes); TYPED_TEST(Ell, KnowsItsSize) @@ -231,4 +231,24 @@ TYPED_TEST(Ell, GeneratesCorrectMatrixData) } +TYPED_TEST(Ell, CanBeReadFromMatrixAssemblyData) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto m = Mtx::create(this->exec); + gko::matrix_assembly_data data(gko::dim<2>{2, 3}); + data.set_value(0, 0, 1.0); + data.set_value(0, 1, 3.0); + data.set_value(0, 2, 2.0); + data.set_value(1, 0, 0.0); + data.set_value(1, 1, 5.0); + data.set_value(1, 2, 0.0); + + m->read(data); + + this->assert_equal_to_original_mtx(m.get()); +} + + } // namespace diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp new file mode 100644 index 00000000000..80890e146e5 --- /dev/null +++ b/core/test/matrix/fbcsr.cpp @@ -0,0 +1,501 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include + + +#include + + +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" +#include "core/test/matrix/fbcsr_sample.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +template +void assert_matrices_are_same( + const gko::matrix::Fbcsr *const bm, + const gko::matrix::Csr *const cm, + const gko::matrix::Diagonal *const diam = nullptr, + const gko::matrix_data *const md = nullptr) +{ + if (cm) { + ASSERT_EQ(bm->get_size(), cm->get_size()); + ASSERT_EQ(bm->get_num_stored_elements(), cm->get_num_stored_elements()); + } + if (md) { + ASSERT_EQ(bm->get_size(), md->size); + ASSERT_EQ(bm->get_num_stored_elements(), md->nonzeros.size()); + } + if (diam) { + const gko::size_type minsize = + std::min(bm->get_size()[0], bm->get_size()[1]); + ASSERT_EQ(minsize, diam->get_size()[0]); + ASSERT_EQ(minsize, diam->get_size()[1]); + } + + const IndexType nbrows = bm->get_num_block_rows(); + const int bs = bm->get_block_size(); + const auto nbnz = bm->get_num_stored_blocks(); + gko::acc::range> fbvals( + std::array{nbnz, static_cast(bs), + static_cast(bs)}, + bm->get_const_values()); + + for (IndexType ibrow = 0; ibrow < nbrows; ibrow++) { + const IndexType *const browptr = bm->get_const_row_ptrs(); + const IndexType numblocksbrow = browptr[ibrow + 1] - browptr[ibrow]; + for (IndexType irow = ibrow * bs; irow < ibrow * bs + bs; irow++) { + const IndexType rowstart = browptr[ibrow] * bs * bs + + (irow - ibrow * bs) * numblocksbrow * bs; + if (cm) { + ASSERT_EQ(cm->get_const_row_ptrs()[irow], rowstart); + } + } + + const IndexType iz_browstart = browptr[ibrow] * bs * bs; + const IndexType *const bcolinds = bm->get_const_col_idxs(); + + for (IndexType ibnz = browptr[ibrow]; ibnz < browptr[ibrow + 1]; + ibnz++) { + const IndexType bcol = bcolinds[ibnz]; + const IndexType blkoffset_frombrowstart = ibnz - browptr[ibrow]; + + for (int ib = 0; ib < bs; ib++) { + const IndexType row = ibrow * bs + ib; + const IndexType inz_rowstart = + iz_browstart + ib * numblocksbrow * bs; + const IndexType inz_blockstart_row = + inz_rowstart + blkoffset_frombrowstart * bs; + + for (int jb = 0; jb < bs; jb++) { + const IndexType col = bcol * bs + jb; + const IndexType inz = inz_blockstart_row + jb; + if (cm) { + ASSERT_EQ(col, cm->get_const_col_idxs()[inz]); + ASSERT_EQ(fbvals(ibnz, ib, jb), + cm->get_const_values()[inz]); + } + if (md) { + ASSERT_EQ(row, md->nonzeros[inz].row); + ASSERT_EQ(col, md->nonzeros[inz].column); + ASSERT_EQ(fbvals(ibnz, ib, jb), + md->nonzeros[inz].value); + } + if (row == col && diam) { + ASSERT_EQ(fbvals(ibnz, ib, jb), + diam->get_const_values()[row]); + } + } + } + } + } +} + + +template +void check_sample_generator_common(const SampleGenerator sg) +{ + auto fbmtx = sg.generate_fbcsr(); + ASSERT_EQ(fbmtx->get_num_block_rows(), sg.nbrows); + ASSERT_EQ(fbmtx->get_num_block_cols(), sg.nbcols); + ASSERT_EQ(fbmtx->get_size()[0], sg.nrows); + ASSERT_EQ(fbmtx->get_size()[1], sg.ncols); + ASSERT_EQ(fbmtx->get_num_stored_blocks(), sg.nbnz); + ASSERT_EQ(fbmtx->get_num_stored_elements(), sg.nnz); +} + + +template +class FbcsrSample : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + + FbcsrSample() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + + +TYPED_TEST_SUITE(FbcsrSample, gko::test::ValueIndexTypes); + + +TYPED_TEST(FbcsrSample, SampleGeneratorsAreCorrect) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using MtxData = gko::matrix_data; + using Diag = gko::matrix::Diagonal; + auto ref = this->ref; + gko::testing::FbcsrSample fbsample(ref); + gko::testing::FbcsrSample2 fbsample2(ref); + + std::unique_ptr fbmtx = fbsample.generate_fbcsr(); + std::unique_ptr csmtx = fbsample.generate_csr(); + const MtxData mdata = fbsample.generate_matrix_data_with_explicit_zeros(); + std::unique_ptr fbmtx2 = fbsample2.generate_fbcsr(); + std::unique_ptr csmtx2 = fbsample2.generate_csr(); + std::unique_ptr diag2 = fbsample2.extract_diagonal(); + const gko::Array nnzperrow = fbsample2.getNonzerosPerRow(); + + check_sample_generator_common(fbsample); + assert_matrices_are_same(fbmtx.get(), csmtx.get(), + static_cast(nullptr), &mdata); + check_sample_generator_common(fbsample2); + assert_matrices_are_same(fbmtx2.get(), csmtx2.get(), diag2.get()); + for (index_type irow = 0; irow < fbsample2.nrows; irow++) { + const index_type *const row_ptrs = csmtx2->get_const_row_ptrs(); + const index_type num_nnz_row = row_ptrs[irow + 1] - row_ptrs[irow]; + ASSERT_EQ(nnzperrow.get_const_data()[irow], num_nnz_row); + for (index_type iz = row_ptrs[irow]; iz < row_ptrs[irow + 1]; iz++) { + const index_type col = csmtx2->get_const_col_idxs()[iz]; + if (irow == col) { + ASSERT_EQ(csmtx2->get_const_values()[iz], + diag2->get_const_values()[irow]); + } + } + } + check_sample_generator_common( + gko::testing::FbcsrSampleUnsorted(ref)); + check_sample_generator_common( + gko::testing::FbcsrSampleSquare(ref)); +} + + +template +class FbcsrSampleComplex : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + + FbcsrSampleComplex() : ref(gko::ReferenceExecutor::create()) {} + + std::shared_ptr ref; +}; + + +TYPED_TEST_SUITE(FbcsrSampleComplex, gko::test::ComplexValueIndexTypes); + + +TYPED_TEST(FbcsrSampleComplex, ComplexSampleGeneratorIsCorrect) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + auto ref = this->ref; + gko::testing::FbcsrSampleComplex fbsample3(ref); + + std::unique_ptr fbmtx3 = fbsample3.generate_fbcsr(); + std::unique_ptr csmtx3 = fbsample3.generate_csr(); + + check_sample_generator_common(fbsample3); + assert_matrices_are_same(fbmtx3.get(), csmtx3.get()); +} + + +template +class Fbcsr : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Mtx = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using MtxData = gko::matrix_data; + + Fbcsr() + : exec(gko::ReferenceExecutor::create()), + fbsample(exec), + mtx(fbsample.generate_fbcsr()) + { + // backup for move tests + const value_type *const v = mtx->get_values(); + const index_type *const c = mtx->get_col_idxs(); + const index_type *const r = mtx->get_row_ptrs(); + orig_size = mtx->get_size(); + orig_rowptrs.resize(fbsample.nbrows + 1); + orig_colinds.resize(fbsample.nbnz); + orig_vals.resize(fbsample.nnz); + std::copy(r, r + fbsample.nbrows + 1, orig_rowptrs.data()); + std::copy(c, c + fbsample.nbnz, orig_colinds.data()); + std::copy(v, v + fbsample.nnz, orig_vals.data()); + } + + std::shared_ptr exec; + const gko::testing::FbcsrSample fbsample; + std::unique_ptr mtx; + + gko::dim<2> orig_size; + std::vector orig_vals; + std::vector orig_rowptrs; + std::vector orig_colinds; + + void assert_equal_to_original_mtx(const Mtx *m) + { + auto v = m->get_const_values(); + auto c = m->get_const_col_idxs(); + auto r = m->get_const_row_ptrs(); + + const int bs = fbsample.bs; + + ASSERT_EQ(m->get_size(), orig_size); + ASSERT_EQ(m->get_num_stored_elements(), orig_vals.size()); + ASSERT_EQ(m->get_block_size(), bs); + ASSERT_EQ(m->get_num_block_rows(), m->get_size()[0] / bs); + ASSERT_EQ(m->get_num_block_cols(), m->get_size()[1] / bs); + + for (index_type irow = 0; irow < orig_size[0] / bs; irow++) { + const index_type *const rowptr = &orig_rowptrs[0]; + ASSERT_EQ(r[irow], rowptr[irow]); + + for (index_type inz = rowptr[irow]; inz < rowptr[irow + 1]; inz++) { + ASSERT_EQ(c[inz], orig_colinds[inz]); + + for (int i = 0; i < bs * bs; i++) { + ASSERT_EQ(v[inz * bs * bs + i], + orig_vals[inz * bs * bs + i]); + } + } + } + } + + void assert_empty(const Mtx *m) + { + ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); + ASSERT_EQ(m->get_num_stored_elements(), 0); + ASSERT_EQ(m->get_block_size(), 1); + ASSERT_EQ(m->get_const_values(), nullptr); + ASSERT_EQ(m->get_const_col_idxs(), nullptr); + ASSERT_NE(m->get_const_row_ptrs(), nullptr); + } +}; + +TYPED_TEST_SUITE(Fbcsr, gko::test::ValueIndexTypes); + + +TYPED_TEST(Fbcsr, GetNumBlocksCorrectlyThrows) +{ + using index_type = typename TestFixture::index_type; + const index_type vec_sz = 47; + const int blk_sz = 9; + + ASSERT_THROW(gko::matrix::detail::get_num_blocks(blk_sz, vec_sz), + gko::BlockSizeError); +} + + +TYPED_TEST(Fbcsr, GetNumBlocksWorks) +{ + using index_type = typename TestFixture::index_type; + const index_type vec_sz = 45; + const int blk_sz = 9; + + ASSERT_EQ(gko::matrix::detail::get_num_blocks(blk_sz, vec_sz), 5); +} + + +TYPED_TEST(Fbcsr, KnowsItsSize) +{ + ASSERT_EQ(this->mtx->get_size(), gko::dim<2>(6, 12)); + ASSERT_EQ(this->mtx->get_block_size(), 3); + ASSERT_EQ(this->mtx->get_num_stored_elements(), 36); + ASSERT_EQ(this->mtx->get_num_block_rows(), 2); + ASSERT_EQ(this->mtx->get_num_block_cols(), 4); +} + + +TYPED_TEST(Fbcsr, ContainsCorrectData) +{ + this->assert_equal_to_original_mtx(this->mtx.get()); +} + + +TYPED_TEST(Fbcsr, BlockSizeIsSetCorrectly) +{ + using Mtx = typename TestFixture::Mtx; + auto m = Mtx::create(this->exec); + m->set_block_size(6); + ASSERT_EQ(m->get_block_size(), 6); +} + + +TYPED_TEST(Fbcsr, CanBeEmpty) +{ + using Mtx = typename TestFixture::Mtx; + auto mtx = Mtx::create(this->exec); + + this->assert_empty(mtx.get()); +} + + +TYPED_TEST(Fbcsr, CanBeCreatedFromExistingData) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using size_type = gko::size_type; + const int bs = this->fbsample.bs; + const size_type nbrows = this->fbsample.nbrows; + const size_type nbcols = this->fbsample.nbcols; + const size_type bnnz = this->fbsample.nbnz; + std::unique_ptr refmat = this->fbsample.generate_fbcsr(); + value_type *const values = refmat->get_values(); + index_type *const col_idxs = refmat->get_col_idxs(); + index_type *const row_ptrs = refmat->get_row_ptrs(); + + auto mtx = gko::matrix::Fbcsr::create( + this->exec, gko::dim<2>{nbrows * bs, nbcols * bs}, bs, + gko::Array::view(this->exec, bnnz * bs * bs, values), + gko::Array::view(this->exec, bnnz, col_idxs), + gko::Array::view(this->exec, nbrows + 1, row_ptrs)); + + ASSERT_EQ(mtx->get_const_values(), values); + ASSERT_EQ(mtx->get_const_col_idxs(), col_idxs); + ASSERT_EQ(mtx->get_const_row_ptrs(), row_ptrs); +} + + +TYPED_TEST(Fbcsr, CanBeCopied) +{ + using Mtx = typename TestFixture::Mtx; + auto copy = Mtx::create(this->exec); + + copy->copy_from(this->mtx.get()); + + this->assert_equal_to_original_mtx(this->mtx.get()); + this->mtx->get_values()[1] = 3.0; + this->assert_equal_to_original_mtx(copy.get()); +} + + +TYPED_TEST(Fbcsr, CanBeMoved) +{ + using Mtx = typename TestFixture::Mtx; + auto copy = Mtx::create(this->exec); + + copy->copy_from(std::move(this->mtx)); + + this->assert_equal_to_original_mtx(copy.get()); +} + + +TYPED_TEST(Fbcsr, CanBeCloned) +{ + using Mtx = typename TestFixture::Mtx; + + auto clone = this->mtx->clone(); + + this->assert_equal_to_original_mtx(this->mtx.get()); + this->mtx->get_values()[1] = 5.0; + this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); +} + + +TYPED_TEST(Fbcsr, CanBeCleared) +{ + this->mtx->clear(); + + this->assert_empty(this->mtx.get()); +} + + +TYPED_TEST(Fbcsr, CanBeReadFromMatrixData) +{ + using Mtx = typename TestFixture::Mtx; + auto m = Mtx::create(this->exec); + m->set_block_size(this->fbsample.bs); + + m->read(this->fbsample.generate_matrix_data()); + + this->assert_equal_to_original_mtx(m.get()); +} + + +TYPED_TEST(Fbcsr, CanBeReadFromEmptyMatrixData) +{ + using Mtx = typename TestFixture::Mtx; + using MtxData = typename TestFixture::MtxData; + auto m = Mtx::create(this->exec); + m->set_block_size(this->fbsample.bs); + MtxData mdata; + mdata.size = gko::dim<2>{0, 0}; + + m->read(mdata); + + ASSERT_EQ(m->get_size(), (gko::dim<2>{0, 0})); + ASSERT_EQ(m->get_const_row_ptrs()[0], 0); + ASSERT_EQ(m->get_const_col_idxs(), nullptr); + ASSERT_EQ(m->get_const_values(), nullptr); +} + + +TYPED_TEST(Fbcsr, GeneratesCorrectMatrixData) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using MtxData = typename TestFixture::MtxData; + MtxData refdata = this->fbsample.generate_matrix_data_with_explicit_zeros(); + refdata.ensure_row_major_order(); + + MtxData data; + this->mtx->write(data); + data.ensure_row_major_order(); + + ASSERT_EQ(data.size, refdata.size); + ASSERT_EQ(data.nonzeros.size(), refdata.nonzeros.size()); + for (size_t i = 0; i < data.nonzeros.size(); i++) { + ASSERT_EQ(data.nonzeros[i], refdata.nonzeros[i]); + } +} + + +} // namespace diff --git a/core/test/matrix/fbcsr_builder.cpp b/core/test/matrix/fbcsr_builder.cpp new file mode 100644 index 00000000000..3a4bf358d51 --- /dev/null +++ b/core/test/matrix/fbcsr_builder.cpp @@ -0,0 +1,86 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_builder.hpp" + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class FbcsrBuilder : public ::testing::Test { +public: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Mtx = gko::matrix::Fbcsr; + +protected: + FbcsrBuilder() + : exec(gko::ReferenceExecutor::create()), + mtx(Mtx::create(exec, gko::dim<2>{4, 6}, 8, 2)) + {} + + std::shared_ptr exec; + std::unique_ptr mtx; +}; + +TYPED_TEST_SUITE(FbcsrBuilder, gko::test::ValueIndexTypes); + + +TYPED_TEST(FbcsrBuilder, ReturnsCorrectArrays) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + gko::matrix::FbcsrBuilder builder{this->mtx.get()}; + + auto builder_col_idxs = builder.get_col_idx_array().get_data(); + auto builder_values = builder.get_value_array().get_data(); + auto ref_col_idxs = this->mtx->get_col_idxs(); + auto ref_values = this->mtx->get_values(); + + ASSERT_EQ(builder_col_idxs, ref_col_idxs); + ASSERT_EQ(builder_values, ref_values); +} + + +} // namespace diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp new file mode 100644 index 00000000000..9e6ff969045 --- /dev/null +++ b/core/test/matrix/fbcsr_sample.hpp @@ -0,0 +1,546 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_TEST_MATRIX_FBCSR_SAMPLE_HPP_ +#define GKO_CORE_TEST_MATRIX_FBCSR_SAMPLE_HPP_ + + +#include +#include +#include +#include +#include +#include + + +#include "accessor/block_col_major.hpp" +#include "accessor/range.hpp" +#include "core/test/utils.hpp" + + +namespace gko { +namespace testing { + + +constexpr double fbcsr_test_offset = 0.000011118888; + + +/** Generates the same sample block CSR matrix in different formats + * + * This currently a 6 x 12 matrix with 3x3 blocks. + * Assumes that the layout within each block is row-major. + * Generates complex data when instantiated with a complex value type. + */ +template +class FbcsrSample { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using MatData = gko::matrix_data; + using SparCsr = gko::matrix::SparsityCsr; + + + const size_type nrows = 6; + const size_type ncols = 12; + const size_type nnz = 36; + const size_type nbrows = 2; + const size_type nbcols = 4; + const size_type nbnz = 4; + const int bs = 3; + const std::shared_ptr exec; + + + FbcsrSample(std::shared_ptr rexec) + : exec(rexec) + {} + + /** + * @return The sample matrix in FBCSR format + */ + std::unique_ptr generate_fbcsr() const + { + std::unique_ptr mtx = + Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + nnz, bs); + + value_type *const v = mtx->get_values(); + index_type *const c = mtx->get_col_idxs(); + index_type *const r = mtx->get_row_ptrs(); + r[0] = 0; + r[1] = 2; + r[2] = 4; + c[0] = 1; + c[1] = 3; + c[2] = 0; + c[3] = 2; + + gko::acc::range> vals( + std::array{nbnz, static_cast(bs), + static_cast(bs)}, + v); + + if (mtx->get_size()[0] % bs != 0) + throw gko::BadDimension(__FILE__, __LINE__, __func__, "test fbcsr", + mtx->get_size()[0], mtx->get_size()[1], + "block size does not divide the size!"); + + for (index_type ibrow = 0; ibrow < mtx->get_num_block_rows(); ibrow++) { + const index_type *const browptr = mtx->get_row_ptrs(); + for (index_type inz = browptr[ibrow]; inz < browptr[ibrow + 1]; + inz++) { + const index_type bcolind = mtx->get_col_idxs()[inz]; + const value_type base = (ibrow + 1) * (bcolind + 1); + for (int ival = 0; ival < bs; ival++) + for (int jval = 0; jval < bs; jval++) + vals(inz, ival, jval) = + base + static_cast>( + ival * bs + jval); + } + } + + // Some of the entries are set to zero + vals(0, 2, 0) = gko::zero(); + vals(0, 2, 2) = gko::zero(); + vals(3, 0, 0) = gko::zero(); + + vals(3, 2, 1) += fbcsr_test_imaginary; + vals(3, 2, 2) += fbcsr_test_imaginary; + + return mtx; + } + + /** + * @return Sample matrix in CSR format + * + * Keeps explicit zeros. + */ + std::unique_ptr generate_csr() const + { + gko::Array csrrow(exec, {0, 6, 12, 18, 24, 30, 36}); + gko::Array csrcols( + exec, {3, 4, 5, 9, 10, 11, 3, 4, 5, 9, 10, 11, 3, 4, 5, 9, 10, 11, + 0, 1, 2, 6, 7, 8, 0, 1, 2, 6, 7, 8, 0, 1, 2, 6, 7, 8}); + // clang-format off + gko::Array csrvals(exec, I + {2, 3, 4, 4, 5, 6, 5, 6, 7, 7, 8, 9, 0, 9, 0, + 10, 11, 12, 2, 3, 4, 0, 7, 8, 5, 6, 7, + 9, 10, 11, 8, 9, 10, 12, + sct(13.0) + fbcsr_test_imaginary, + sct(14.0) + fbcsr_test_imaginary}); + // clang-format on + return Csr::create(exec, gko::dim<2>{nrows, ncols}, csrvals, csrcols, + csrrow); + } + + /** + * @return Sparsity structure of the matrix + */ + std::unique_ptr generate_sparsity_csr() const + { + gko::Array colids(exec, nbnz); + gko::Array rowptrs(exec, nbrows + 1); + const std::unique_ptr fbmat = generate_fbcsr(); + for (index_type i = 0; i < nbrows + 1; i++) + rowptrs.get_data()[i] = fbmat->get_const_row_ptrs()[i]; + for (index_type i = 0; i < nbnz; i++) + colids.get_data()[i] = fbmat->get_const_col_idxs()[i]; + return SparCsr::create(exec, gko::dim<2>{nbrows, nbcols}, colids, + rowptrs); + } + + /** + * @return Array of COO triplets that represent the matrix + * + * @note The order of the triplets assumes the blocks are stored row-major + */ + MatData generate_matrix_data() const + { + return MatData({{6, 12}, + {{0, 3, 2.0}, + {0, 4, 3.0}, + {0, 5, 4.0}, + {1, 3, 5.0}, + {1, 4, 6.0}, + {1, 5, 7.0}, + {2, 4, 9.0}, + + {0, 9, 4.0}, + {0, 10, 5.0}, + {0, 11, 6.0}, + {1, 9, 7.0}, + {1, 10, 8.0}, + {1, 11, 9.0}, + {2, 9, 10.0}, + {2, 10, 11.0}, + {2, 11, 12.0}, + + {3, 0, 2.0}, + {3, 1, 3.0}, + {3, 2, 4.0}, + {4, 0, 5.0}, + {4, 1, 6.0}, + {4, 2, 7.0}, + {5, 0, 8.0}, + {5, 1, 9.0}, + {5, 2, 10.0}, + + {3, 7, 7.0}, + {3, 8, 8.0}, + {4, 6, 9.0}, + {4, 7, 10.0}, + {4, 8, 11.0}, + {5, 6, 12.0}, + {5, 7, sct(13.0) + fbcsr_test_imaginary}, + {5, 8, sct(14.0) + fbcsr_test_imaginary}}}); + } + + /** + * @return Array of COO triplets that represent the matrix; includes + * explicit zeros + * + * @note The order of the triplets assumes the blocks are stored row-major + */ + MatData generate_matrix_data_with_explicit_zeros() const + { + auto mdata = generate_matrix_data(); + mdata.nonzeros.push_back({2, 3, 0.0}); + mdata.nonzeros.push_back({2, 5, 0.0}); + mdata.nonzeros.push_back({3, 6, 0.0}); + mdata.ensure_row_major_order(); + return mdata; + } + +private: + /// Enables complex data to be used for complex instantiations... + template + constexpr std::enable_if_t() || is_complex(), + ValueType> + sct(U u) const + { + return static_cast(u); + } + + /// ... while ignoring imaginary parts for real instantiations + template + constexpr std::enable_if_t() && !is_complex(), + ValueType> + sct(std::complex cu) const + { + return static_cast(cu.real()); + } + + const ValueType fbcsr_test_imaginary = sct( + std::complex>(0, 0.1 + fbcsr_test_offset)); +}; + +/** + * Generates a sample block CSR matrix in different formats. + * 6 x 8 matrix with 2x2 blocks. + */ +template +class FbcsrSample2 { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + using Csr = gko::matrix::Csr; + using Diagonal = gko::matrix::Diagonal; + + + const size_type nrows = 6; + const size_type ncols = 8; + const size_type nnz = 16; + const size_type nbrows = 3; + const size_type nbcols = 4; + const size_type nbnz = 4; + const int bs = 2; + const std::shared_ptr exec; + + + FbcsrSample2(std::shared_ptr rexec) + : exec(rexec) + {} + + std::unique_ptr generate_fbcsr() const + { + gko::Array r(exec, {0, 1, 3, 4}); + gko::Array c(exec, {0, 0, 3, 2}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; + + v[0] = 1; + v[1] = 3; + v[2] = 2; + v[3] = 0; + v[9] = 0; + v[11] = 0; + v[12] = -12; + v[13] = -2; + v[14] = -1; + v[15] = -11; + + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); + } + + std::unique_ptr generate_csr() const + { + gko::Array r(exec, {0, 2, 4, 8, 12, 14, 16}); + gko::Array c( + exec, {0, 1, 0, 1, 0, 1, 6, 7, 0, 1, 6, 7, 4, 5, 4, 5}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; + v[0] = 1; + v[1] = 2; + v[2] = 3; + v[3] = 0; + v[10] = 0; + v[11] = 0; + v[12] = -12; + v[13] = -1; + v[14] = -2; + v[15] = -11; + + return Csr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + vals, c, r, + std::make_shared()); + } + + std::unique_ptr extract_diagonal() const + { + gko::Array dvals(exec, {1, 0, 0, 0, -12, -11}); + return Diagonal::create(exec, nrows, dvals); + } + + gko::Array getNonzerosPerRow() const + { + return gko::Array(exec, {2, 2, 4, 4, 2, 2}); + } + + +private: + /// Enables use of literals to instantiate value data + template + constexpr ValueType sct(U u) const + { + return static_cast(u); + } +}; + +/** + * @brief Generates the a sample block CSR square matrix + * + * This currently a 4 x 4 matrix with 2x2 blocks. + */ +template +class FbcsrSampleSquare { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + + + const size_type nrows = 4; + const size_type ncols = 4; + const size_type nnz = 8; + const size_type nbrows = 2; + const size_type nbcols = 2; + const size_type nbnz = 2; + const int bs = 2; + const std::shared_ptr exec; + + + FbcsrSampleSquare(std::shared_ptr rexec) + : exec(rexec) + {} + + std::unique_ptr generate_fbcsr() const + { + gko::Array c(exec, {1, 1}); + gko::Array r(exec, {0, 1, 2}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) v[i] = i; + + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); + } +}; + +/** + * @brief Generates a sample block CSR matrix with complex values + * + * This is a 6 x 8 matrix with 2x2 blocks. + */ +template +class FbcsrSampleComplex { +public: + using value_type = ValueType; + using index_type = IndexType; + using Csr = gko::matrix::Csr; + using Fbcsr = gko::matrix::Fbcsr; + + + static_assert(is_complex(), "Only for complex types!"); + + + const size_type nrows = 6; + const size_type ncols = 8; + const size_type nnz = 16; + const size_type nbrows = 3; + const size_type nbcols = 4; + const size_type nbnz = 4; + const int bs = 2; + const std::shared_ptr exec; + + + FbcsrSampleComplex(std::shared_ptr rexec) + : exec(rexec) + {} + + std::unique_ptr generate_fbcsr() const + { + gko::Array r(exec, {0, 1, 3, 4}); + gko::Array c(exec, {0, 0, 3, 2}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; + + using namespace std::complex_literals; + v[0] = 1.0 + 1.15i; + v[2] = 2.0 + 2.15i; + v[1] = 3.0 - 3.15i; + v[3] = 0.0 - 0.15i; + v[9] = 0.0; + v[11] = 0.0; + v[12] = -12.0 + 12.15i; + v[14] = -1.0 + 1.15i; + v[13] = -2.0 - 2.15i; + v[15] = -11.0 - 11.15i; + + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); + } + + std::unique_ptr generate_csr() const + { + gko::Array r(exec, {0, 2, 4, 8, 12, 14, 16}); + gko::Array c( + exec, {0, 1, 0, 1, 0, 1, 6, 7, 0, 1, 6, 7, 4, 5, 4, 5}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; + + using namespace std::complex_literals; + v[0] = 1.0 + 1.15i; + v[1] = 2.0 + 2.15i; + v[2] = 3.0 - 3.15i; + v[3] = 0.0 - 0.15i; + v[10] = 0.0; + v[11] = 0.0; + v[12] = -12.0 + 12.15i; + v[13] = -1.0 + 1.15i; + v[14] = -2.0 - 2.15i; + v[15] = -11.0 - 11.15i; + + return Csr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + vals, c, r, + std::make_shared()); + } +}; + + +/** + * Generates a fixed-block CSR matrix with longer and unsorted columns + */ +template +class FbcsrSampleUnsorted { +public: + using value_type = ValueType; + using index_type = IndexType; + using Fbcsr = gko::matrix::Fbcsr; + + + const size_type nbrows = 3; + const size_type nbcols = 20; + const size_type nbnz = 30; + const int bs = 3; + const size_type nrows = nbrows * bs; + const size_type ncols = nbcols * bs; + const size_type nnz = nbnz * bs * bs; + const std::shared_ptr exec; + + + FbcsrSampleUnsorted(std::shared_ptr rexec) + : exec(rexec) + {} + + std::unique_ptr generate_fbcsr() const + { + gko::Array r(exec, {0, 8, 19, 30}); + gko::Array c( + exec, {0, 1, 20, 15, 12, 18, 5, 28, 3, 10, 29, 5, 9, 2, 16, + 12, 21, 2, 0, 1, 5, 9, 12, 15, 17, 20, 22, 24, 27, 28}); + gko::Array vals(exec, nnz); + value_type *const v = vals.get_data(); + for (IndexType i = 0; i < nnz; i++) { + v[i] = static_cast(i + 0.15 + fbcsr_test_offset); + } + + return Fbcsr::create(exec, + gko::dim<2>{static_cast(nrows), + static_cast(ncols)}, + bs, vals, c, r); + } +}; + + +} // namespace testing +} // namespace gko + +#endif // GKO_CORE_TEST_MATRIX_FBCSR_SAMPLE_HPP_ diff --git a/core/test/matrix/hybrid.cpp b/core/test/matrix/hybrid.cpp index dac9da86167..57bf3e24c47 100644 --- a/core/test/matrix/hybrid.cpp +++ b/core/test/matrix/hybrid.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -42,6 +42,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { +template +struct change_index_s { + using type = gko::int32; +}; + +template <> +struct change_index_s { + using type = gko::int64; +}; + + +template +using change_index = typename change_index_s::type; + + template class Hybrid : public ::testing::Test { protected: @@ -112,7 +127,7 @@ class Hybrid : public ::testing::Test { } }; -TYPED_TEST_CASE(Hybrid, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(Hybrid, gko::test::ValueIndexTypes); TYPED_TEST(Hybrid, KnowsItsSize) @@ -257,19 +272,123 @@ TYPED_TEST(Hybrid, CanBeReadFromMatrixDataByPercent40) auto c = m->get_const_ell_col_idxs(); auto n = m->get_ell_num_stored_elements_per_row(); auto p = m->get_ell_stride(); + auto coo_v = m->get_const_coo_values(); + auto coo_c = m->get_const_coo_col_idxs(); + auto coo_r = m->get_const_coo_row_idxs(); ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); ASSERT_EQ(m->get_ell_num_stored_elements(), 2); + ASSERT_EQ(m->get_coo_num_stored_elements(), 2); EXPECT_EQ(n, 1); EXPECT_EQ(p, 2); EXPECT_EQ(c[0], 0); EXPECT_EQ(c[1], 1); EXPECT_EQ(v[0], value_type{1.0}); EXPECT_EQ(v[1], value_type{5.0}); + EXPECT_EQ(coo_v[0], value_type{3.0}); + EXPECT_EQ(coo_v[1], value_type{2.0}); + EXPECT_EQ(coo_c[0], 1); + EXPECT_EQ(coo_c[1], 2); + EXPECT_EQ(coo_r[0], 0); + EXPECT_EQ(coo_r[1], 0); +} + + +TYPED_TEST(Hybrid, CanBeReadFromMatrixAssemblyDataAutomatically) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto m = + Mtx::create(this->exec, std::make_shared()); + gko::matrix_assembly_data data(gko::dim<2>{2, 3}); + data.set_value(0, 0, 1.0); + data.set_value(0, 1, 3.0); + data.set_value(0, 2, 2.0); + data.set_value(1, 0, 0.0); + data.set_value(1, 1, 5.0); + data.set_value(1, 2, 0.0); + + m->read(data); + + auto v = m->get_const_coo_values(); + auto c = m->get_const_coo_col_idxs(); + auto r = m->get_const_coo_row_idxs(); + auto n = m->get_ell_num_stored_elements_per_row(); + auto p = m->get_ell_stride(); + ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_ell_num_stored_elements(), 0); + ASSERT_EQ(m->get_coo_num_stored_elements(), 4); + EXPECT_EQ(n, 0); + EXPECT_EQ(p, 2); + EXPECT_EQ(r[0], 0); + EXPECT_EQ(r[1], 0); + EXPECT_EQ(r[2], 0); + EXPECT_EQ(r[3], 1); + EXPECT_EQ(c[0], 0); + EXPECT_EQ(c[1], 1); + EXPECT_EQ(c[2], 2); + EXPECT_EQ(c[3], 1); + EXPECT_EQ(v[0], value_type{1.0}); + EXPECT_EQ(v[1], value_type{3.0}); + EXPECT_EQ(v[2], value_type{2.0}); + EXPECT_EQ(v[3], value_type{5.0}); +} + + +TYPED_TEST(Hybrid, CanBeReadFromMatrixAssemblyDataByColumns2) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto m = Mtx::create(this->exec, + std::make_shared(2)); + gko::matrix_assembly_data data(gko::dim<2>{2, 3}); + data.set_value(0, 0, 1.0); + data.set_value(0, 1, 3.0); + data.set_value(0, 2, 2.0); + data.set_value(1, 0, 0.0); + data.set_value(1, 1, 5.0); + data.set_value(1, 2, 0.0); + + m->read(data); + + this->assert_equal_to_original_mtx(m.get()); +} + +TYPED_TEST(Hybrid, CanBeReadFromMatrixAssemblyDataByPercent40) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto m = Mtx::create(this->exec, + std::make_shared(0.4)); + gko::matrix_assembly_data data(gko::dim<2>{2, 3}); + data.set_value(0, 0, 1.0); + data.set_value(0, 1, 3.0); + data.set_value(0, 2, 2.0); + data.set_value(1, 0, 0.0); + data.set_value(1, 1, 5.0); + data.set_value(1, 2, 0.0); + + m->read(data); + + auto v = m->get_const_ell_values(); + auto c = m->get_const_ell_col_idxs(); + auto n = m->get_ell_num_stored_elements_per_row(); + auto p = m->get_ell_stride(); auto coo_v = m->get_const_coo_values(); auto coo_c = m->get_const_coo_col_idxs(); auto coo_r = m->get_const_coo_row_idxs(); + ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_ell_num_stored_elements(), 2); ASSERT_EQ(m->get_coo_num_stored_elements(), 2); + EXPECT_EQ(n, 1); + EXPECT_EQ(p, 2); + EXPECT_EQ(c[0], 0); + EXPECT_EQ(c[1], 1); + EXPECT_EQ(v[0], value_type{1.0}); + EXPECT_EQ(v[1], value_type{5.0}); EXPECT_EQ(coo_v[0], value_type{3.0}); EXPECT_EQ(coo_v[1], value_type{2.0}); EXPECT_EQ(coo_c[0], 1); @@ -297,4 +416,102 @@ TYPED_TEST(Hybrid, GeneratesCorrectMatrixData) } +TYPED_TEST(Hybrid, GetCorrectColumnLimit) +{ + using Mtx = typename TestFixture::Mtx; + using Mtx2 = gko::remove_complex; + using strategy = typename Mtx::column_limit; + using strategy2 = typename Mtx2::column_limit; + + auto mtx = Mtx::create(this->exec, std::make_shared(2)); + auto mtx_stra = gko::as(mtx->get_strategy()); + auto mtx2_stra = gko::as(mtx->template get_strategy()); + + EXPECT_EQ(mtx_stra->get_num_columns(), 2); + EXPECT_EQ(mtx2_stra->get_num_columns(), 2); +} + + +TYPED_TEST(Hybrid, GetCorrectImbalanceLimit) +{ + using Mtx = typename TestFixture::Mtx; + using Mtx2 = gko::remove_complex; + using strategy = typename Mtx::imbalance_limit; + using strategy2 = typename Mtx2::imbalance_limit; + + auto mtx = Mtx::create(this->exec, std::make_shared(0.4)); + auto mtx_stra = gko::as(mtx->get_strategy()); + auto mtx2_stra = gko::as(mtx->template get_strategy()); + + EXPECT_EQ(mtx_stra->get_percentage(), 0.4); + EXPECT_EQ(mtx2_stra->get_percentage(), 0.4); +} + + +TYPED_TEST(Hybrid, GetCorrectImbalanceBoundedLimit) +{ + using Mtx = typename TestFixture::Mtx; + using Mtx2 = gko::remove_complex; + using strategy = typename Mtx::imbalance_bounded_limit; + using strategy2 = typename Mtx2::imbalance_bounded_limit; + + auto mtx = Mtx::create(this->exec, std::make_shared(0.4, 0.1)); + auto mtx_stra = gko::as(mtx->get_strategy()); + auto mtx2_stra = gko::as(mtx->template get_strategy()); + + EXPECT_EQ(mtx_stra->get_percentage(), 0.4); + EXPECT_EQ(mtx_stra->get_ratio(), 0.1); + EXPECT_EQ(mtx2_stra->get_percentage(), 0.4); + EXPECT_EQ(mtx2_stra->get_ratio(), 0.1); +} + + +TYPED_TEST(Hybrid, GetCorrectMinimalStorageLimitWithDifferentHybType) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx2 = gko::matrix::Hybrid>; + using strategy = typename Mtx::minimal_storage_limit; + using strategy2 = typename Mtx2::imbalance_limit; + + auto mtx = Mtx::create(this->exec, std::make_shared()); + auto mtx_stra = gko::as(mtx->get_strategy()); + auto mtx2_stra = gko::as(mtx->template get_strategy()); + + EXPECT_EQ(mtx2_stra->get_percentage(), mtx_stra->get_percentage()); +} + + +TYPED_TEST(Hybrid, GetCorrectMinimalStorageLimitWithSameHybType) +{ + using Mtx = typename TestFixture::Mtx; + using Mtx2 = Mtx; + using strategy = typename Mtx::minimal_storage_limit; + using strategy2 = typename Mtx2::minimal_storage_limit; + + auto mtx = Mtx::create(this->exec, std::make_shared()); + auto mtx_stra = gko::as(mtx->get_strategy()); + auto mtx2_stra = gko::as(mtx->template get_strategy()); + + EXPECT_EQ(mtx2_stra->get_percentage(), mtx_stra->get_percentage()); +} + + +TYPED_TEST(Hybrid, GetCorrectAutomatic) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using Mtx2 = Mtx; + using strategy = typename Mtx::automatic; + using strategy2 = typename Mtx2::automatic; + + auto mtx = Mtx::create(this->exec, std::make_shared()); + auto mtx_stra = gko::as(mtx->get_strategy()); + + ASSERT_NO_THROW(gko::as(mtx->template get_strategy())); +} + + } // namespace diff --git a/core/test/matrix/identity.cpp b/core/test/matrix/identity.cpp index f890a9dd039..cffe69d0bcc 100644 --- a/core/test/matrix/identity.cpp +++ b/core/test/matrix/identity.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -57,7 +58,7 @@ class Identity : public ::testing::Test { std::shared_ptr exec; }; -TYPED_TEST_CASE(Identity, gko::test::ValueTypes); +TYPED_TEST_SUITE(Identity, gko::test::ValueTypes); TYPED_TEST(Identity, CanBeEmpty) @@ -72,37 +73,26 @@ TYPED_TEST(Identity, CanBeConstructedWithSize) { using Id = typename TestFixture::Id; auto identity = Id::create(this->exec, 5); + ASSERT_EQ(identity->get_size(), gko::dim<2>(5, 5)); } -TYPED_TEST(Identity, AppliesToVector) +TYPED_TEST(Identity, CanBeConstructedWithSquareSize) { using Id = typename TestFixture::Id; - using Vec = typename TestFixture::Vec; - auto identity = Id::create(this->exec, 3); - auto x = Vec::create(this->exec, gko::dim<2>{3, 1}); - auto b = gko::initialize({2.0, 1.0, 5.0}, this->exec); - - identity->apply(b.get(), x.get()); + auto identity = Id::create(this->exec, gko::dim<2>(5, 5)); - GKO_ASSERT_MTX_NEAR(x, l({2.0, 1.0, 5.0}), 0.0); + ASSERT_EQ(identity->get_size(), gko::dim<2>(5, 5)); } -TYPED_TEST(Identity, AppliesToMultipleVectors) +TYPED_TEST(Identity, FailsConstructionWithRectangularSize) { using Id = typename TestFixture::Id; - using Vec = typename TestFixture::Vec; - using T = typename TestFixture::value_type; - auto identity = Id::create(this->exec, 3); - auto x = Vec::create(this->exec, gko::dim<2>{3, 2}, 3); - auto b = gko::initialize( - 3, {I{2.0, 3.0}, I{1.0, 2.0}, I{5.0, -1.0}}, this->exec); - identity->apply(b.get(), x.get()); - - GKO_ASSERT_MTX_NEAR(x, l({{2.0, 3.0}, {1.0, 2.0}, {5.0, -1.0}}), 0.0); + ASSERT_THROW(Id::create(this->exec, gko::dim<2>(5, 4)), + gko::DimensionMismatch); } @@ -112,7 +102,7 @@ class IdentityFactory : public ::testing::Test { using value_type = T; }; -TYPED_TEST_CASE(IdentityFactory, gko::test::ValueTypes); +TYPED_TEST_SUITE(IdentityFactory, gko::test::ValueTypes); TYPED_TEST(IdentityFactory, CanGenerateIdentityMatrix) @@ -127,4 +117,14 @@ TYPED_TEST(IdentityFactory, CanGenerateIdentityMatrix) } +TYPED_TEST(IdentityFactory, FailsToGenerateRectangularIdentityMatrix) +{ + auto exec = gko::ReferenceExecutor::create(); + auto id_factory = gko::matrix::IdentityFactory::create(exec); + auto mtx = gko::matrix::Dense::create(exec, gko::dim<2>{5, 4}); + + ASSERT_THROW(id_factory->generate(std::move(mtx)), gko::DimensionMismatch); +} + + } // namespace diff --git a/core/test/matrix/permutation.cpp b/core/test/matrix/permutation.cpp index c64f39fb3e2..c5c5fe81db1 100644 --- a/core/test/matrix/permutation.cpp +++ b/core/test/matrix/permutation.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -86,7 +86,7 @@ class Permutation : public ::testing::Test { std::unique_ptr> mtx; }; -TYPED_TEST_CASE(Permutation, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(Permutation, gko::test::ValueIndexTypes); TYPED_TEST(Permutation, CanBeEmpty) diff --git a/core/test/matrix/sellp.cpp b/core/test/matrix/sellp.cpp index d6f139ba82e..65e3d532b20 100644 --- a/core/test/matrix/sellp.cpp +++ b/core/test/matrix/sellp.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -138,7 +138,7 @@ class Sellp : public ::testing::Test { } }; -TYPED_TEST_CASE(Sellp, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(Sellp, gko::test::ValueIndexTypes); TYPED_TEST(Sellp, KnowsItsSize) @@ -274,4 +274,45 @@ TYPED_TEST(Sellp, GeneratesCorrectMatrixData) } +TYPED_TEST(Sellp, CanBeReadFromMatrixAssemblyData) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto m = Mtx::create(this->exec); + gko::matrix_assembly_data data(gko::dim<2>{2, 3}); + data.set_value(0, 0, 1.0); + data.set_value(0, 1, 3.0); + data.set_value(0, 2, 2.0); + data.set_value(1, 0, 0.0); + data.set_value(1, 1, 5.0); + data.set_value(1, 2, 0.0); + + m->read(data); + + this->assert_equal_to_original_mtx(m.get()); +} + + +TYPED_TEST(Sellp, CanBeReadFromMatrixAssemblyDataWithSliceSizeAndStrideFactor) +{ + using Mtx = typename TestFixture::Mtx; + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto m = Mtx::create(this->exec, gko::dim<2>{2, 3}, 2, 2, 3); + gko::matrix_assembly_data data(gko::dim<2>{2, 3}); + data.set_value(0, 0, 1.0); + data.set_value(0, 1, 3.0); + data.set_value(0, 2, 2.0); + data.set_value(1, 0, 0.0); + data.set_value(1, 1, 5.0); + data.set_value(1, 2, 0.0); + + m->read(data); + + this->assert_equal_to_original_mtx_with_slice_size_and_stride_factor( + m.get()); +} + + } // namespace diff --git a/core/test/matrix/sparsity_csr.cpp b/core/test/matrix/sparsity_csr.cpp index 7e26fee9c88..4faf44dbccd 100644 --- a/core/test/matrix/sparsity_csr.cpp +++ b/core/test/matrix/sparsity_csr.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -107,7 +107,7 @@ class SparsityCsr : public ::testing::Test { } }; -TYPED_TEST_CASE(SparsityCsr, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(SparsityCsr, gko::test::ValueIndexTypes); TYPED_TEST(SparsityCsr, KnowsItsSize) diff --git a/core/test/multigrid/CMakeLists.txt b/core/test/multigrid/CMakeLists.txt new file mode 100644 index 00000000000..2ce9d5007d8 --- /dev/null +++ b/core/test/multigrid/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(amgx_pgm) diff --git a/core/test/multigrid/amgx_pgm.cpp b/core/test/multigrid/amgx_pgm.cpp new file mode 100644 index 00000000000..b1b434b9fe2 --- /dev/null +++ b/core/test/multigrid/amgx_pgm.cpp @@ -0,0 +1,114 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class AmgxPgmFactory : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Mtx = gko::matrix::Csr; + using Vec = gko::matrix::Dense; + using MgLevel = gko::multigrid::AmgxPgm; + AmgxPgmFactory() + : exec(gko::ReferenceExecutor::create()), + amgxpgm_factory(MgLevel::build() + .with_max_iterations(2u) + .with_max_unassigned_ratio(0.1) + .with_deterministic(true) + .on(exec)) + + {} + + std::shared_ptr exec; + std::unique_ptr amgxpgm_factory; +}; + +TYPED_TEST_SUITE(AmgxPgmFactory, gko::test::ValueIndexTypes); + + +TYPED_TEST(AmgxPgmFactory, FactoryKnowsItsExecutor) +{ + ASSERT_EQ(this->amgxpgm_factory->get_executor(), this->exec); +} + + +TYPED_TEST(AmgxPgmFactory, DefaultSetting) +{ + using MgLevel = typename TestFixture::MgLevel; + auto factory = MgLevel::build().on(this->exec); + + ASSERT_EQ(factory->get_parameters().max_iterations, 15u); + ASSERT_EQ(factory->get_parameters().max_unassigned_ratio, 0.05); + ASSERT_EQ(factory->get_parameters().deterministic, false); +} + + +TYPED_TEST(AmgxPgmFactory, SetMaxIterations) +{ + ASSERT_EQ(this->amgxpgm_factory->get_parameters().max_iterations, 2u); +} + + +TYPED_TEST(AmgxPgmFactory, SetMaxUnassignedPercentage) +{ + ASSERT_EQ(this->amgxpgm_factory->get_parameters().max_unassigned_ratio, + 0.1); +} + + +TYPED_TEST(AmgxPgmFactory, SetDeterministic) +{ + ASSERT_EQ(this->amgxpgm_factory->get_parameters().deterministic, true); +} + + +} // namespace diff --git a/core/test/preconditioner/CMakeLists.txt b/core/test/preconditioner/CMakeLists.txt index efbeed1af2e..f75cd6b3baf 100644 --- a/core/test/preconditioner/CMakeLists.txt +++ b/core/test/preconditioner/CMakeLists.txt @@ -1,3 +1,4 @@ +ginkgo_create_test(ic) ginkgo_create_test(ilu) ginkgo_create_test(isai) ginkgo_create_test(jacobi) diff --git a/core/test/preconditioner/ic.cpp b/core/test/preconditioner/ic.cpp new file mode 100644 index 00000000000..c287887ced3 --- /dev/null +++ b/core/test/preconditioner/ic.cpp @@ -0,0 +1,99 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include + + +namespace { + + +class IcFactory : public ::testing::Test { +protected: + using value_type = double; + using index_type = gko::int32; + using solver_type = gko::solver::Bicgstab; + using ic_prec_type = gko::preconditioner::Ic; + using ic_type = gko::factorization::ParIc; + + IcFactory() + : exec(gko::ReferenceExecutor::create()), + l_factory(solver_type::build().on(exec)), + fact_factory(ic_type::build().on(exec)) + {} + + std::shared_ptr exec; + std::shared_ptr l_factory; + std::shared_ptr fact_factory; +}; + + +TEST_F(IcFactory, KnowsItsExecutor) +{ + auto ic_factory = ic_prec_type::build().on(this->exec); + + ASSERT_EQ(ic_factory->get_executor(), this->exec); +} + + +TEST_F(IcFactory, CanSetLSolverFactory) +{ + auto ic_factory = ic_prec_type::build() + .with_l_solver_factory(this->l_factory) + .on(this->exec); + + ASSERT_EQ(ic_factory->get_parameters().l_solver_factory, this->l_factory); +} + + +TEST_F(IcFactory, CanSetFactorizationFactory) +{ + auto ic_factory = ic_prec_type::build() + .with_factorization_factory(this->fact_factory) + .on(this->exec); + + ASSERT_EQ(ic_factory->get_parameters().factorization_factory, + this->fact_factory); +} + + +} // namespace diff --git a/core/test/preconditioner/ilu.cpp b/core/test/preconditioner/ilu.cpp index 2103e00d958..3d93fc1411b 100644 --- a/core/test/preconditioner/ilu.cpp +++ b/core/test/preconditioner/ilu.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -40,50 +40,47 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include -#include "core/test/utils.hpp" - - namespace { -template class IluFactory : public ::testing::Test { protected: - using value_type = T; + using value_type = double; + using index_type = gko::int32; using l_solver_type = gko::solver::Bicgstab; using u_solver_type = gko::solver::Bicgstab; using ilu_prec_type = gko::preconditioner::Ilu; + using ilu_type = gko::factorization::ParIlu; IluFactory() : exec(gko::ReferenceExecutor::create()), l_factory(l_solver_type::build().on(exec)), - u_factory(u_solver_type::build().on(exec)) + u_factory(u_solver_type::build().on(exec)), + fact_factory(ilu_type::build().on(exec)) {} std::shared_ptr exec; std::shared_ptr l_factory; std::shared_ptr u_factory; + std::shared_ptr fact_factory; }; -TYPED_TEST_CASE(IluFactory, gko::test::ValueTypes); - -TYPED_TEST(IluFactory, KnowsItsExecutor) +TEST_F(IluFactory, KnowsItsExecutor) { - using ilu_prec_type = typename TestFixture::ilu_prec_type; auto ilu_factory = ilu_prec_type::build().on(this->exec); ASSERT_EQ(ilu_factory->get_executor(), this->exec); } -TYPED_TEST(IluFactory, CanSetLSolverFactory) +TEST_F(IluFactory, CanSetLSolverFactory) { - using ilu_prec_type = typename TestFixture::ilu_prec_type; auto ilu_factory = ilu_prec_type::build() .with_l_solver_factory(this->l_factory) .on(this->exec); @@ -92,9 +89,8 @@ TYPED_TEST(IluFactory, CanSetLSolverFactory) } -TYPED_TEST(IluFactory, CanSetUSolverFactory) +TEST_F(IluFactory, CanSetUSolverFactory) { - using ilu_prec_type = typename TestFixture::ilu_prec_type; auto ilu_factory = ilu_prec_type::build() .with_u_solver_factory(this->u_factory) .on(this->exec); @@ -103,4 +99,15 @@ TYPED_TEST(IluFactory, CanSetUSolverFactory) } +TEST_F(IluFactory, CanSetFactorizationFactory) +{ + auto ilu_factory = ilu_prec_type::build() + .with_factorization_factory(this->fact_factory) + .on(this->exec); + + ASSERT_EQ(ilu_factory->get_parameters().factorization_factory, + this->fact_factory); +} + + } // namespace diff --git a/core/test/preconditioner/isai.cpp b/core/test/preconditioner/isai.cpp index 1a549bca874..fc8f6cee1dd 100644 --- a/core/test/preconditioner/isai.cpp +++ b/core/test/preconditioner/isai.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include "core/test/utils.hpp" @@ -71,26 +72,38 @@ class IsaiFactory : public ::testing::Test { typename std::tuple_element<0, decltype(ValueIndexType())>::type; using index_type = typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using excess_solver_type = gko::solver::Bicgstab; + using GeneralIsai = + gko::preconditioner::GeneralIsai; + using SpdIsai = gko::preconditioner::SpdIsai; using LowerIsai = gko::preconditioner::LowerIsai; using UpperIsai = gko::preconditioner::UpperIsai; using Csr = gko::matrix::Csr; IsaiFactory() : exec(gko::ReferenceExecutor::create()), + excess_solver_factory(excess_solver_type::build().on(exec)), + general_isai_factory(GeneralIsai::build().on(exec)), + spd_isai_factory(SpdIsai::build().on(exec)), lower_isai_factory(LowerIsai::build().on(exec)), upper_isai_factory(UpperIsai::build().on(exec)) {} std::shared_ptr exec; + std::shared_ptr excess_solver_factory; + std::unique_ptr general_isai_factory; + std::unique_ptr spd_isai_factory; std::unique_ptr lower_isai_factory; std::unique_ptr upper_isai_factory; }; -TYPED_TEST_CASE(IsaiFactory, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(IsaiFactory, gko::test::ValueIndexTypes); TYPED_TEST(IsaiFactory, KnowsItsExecutor) { + ASSERT_EQ(this->general_isai_factory->get_executor(), this->exec); + ASSERT_EQ(this->spd_isai_factory->get_executor(), this->exec); ASSERT_EQ(this->lower_isai_factory->get_executor(), this->exec); ASSERT_EQ(this->upper_isai_factory->get_executor(), this->exec); } @@ -98,14 +111,22 @@ TYPED_TEST(IsaiFactory, KnowsItsExecutor) TYPED_TEST(IsaiFactory, SetsSkipSortingCorrectly) { + using GeneralIsai = typename TestFixture::GeneralIsai; + using SpdIsai = typename TestFixture::SpdIsai; using LowerIsai = typename TestFixture::LowerIsai; using UpperIsai = typename TestFixture::UpperIsai; + auto a_isai_factory = + GeneralIsai::build().with_skip_sorting(true).on(this->exec); + auto spd_isai_factory = + SpdIsai::build().with_skip_sorting(true).on(this->exec); auto l_isai_factory = LowerIsai::build().with_skip_sorting(true).on(this->exec); auto u_isai_factory = UpperIsai::build().with_skip_sorting(true).on(this->exec); + ASSERT_EQ(a_isai_factory->get_parameters().skip_sorting, true); + ASSERT_EQ(spd_isai_factory->get_parameters().skip_sorting, true); ASSERT_EQ(l_isai_factory->get_parameters().skip_sorting, true); ASSERT_EQ(u_isai_factory->get_parameters().skip_sorting, true); } @@ -113,6 +134,8 @@ TYPED_TEST(IsaiFactory, SetsSkipSortingCorrectly) TYPED_TEST(IsaiFactory, SetsDefaultSkipSortingCorrectly) { + ASSERT_EQ(this->general_isai_factory->get_parameters().skip_sorting, false); + ASSERT_EQ(this->spd_isai_factory->get_parameters().skip_sorting, false); ASSERT_EQ(this->lower_isai_factory->get_parameters().skip_sorting, false); ASSERT_EQ(this->upper_isai_factory->get_parameters().skip_sorting, false); } @@ -120,14 +143,22 @@ TYPED_TEST(IsaiFactory, SetsDefaultSkipSortingCorrectly) TYPED_TEST(IsaiFactory, SetsSparsityPowerCorrectly) { + using GeneralIsai = typename TestFixture::GeneralIsai; + using SpdIsai = typename TestFixture::SpdIsai; using LowerIsai = typename TestFixture::LowerIsai; using UpperIsai = typename TestFixture::UpperIsai; + auto a_isai_factory = + GeneralIsai::build().with_sparsity_power(2).on(this->exec); + auto spd_isai_factory = + SpdIsai::build().with_sparsity_power(2).on(this->exec); auto l_isai_factory = LowerIsai::build().with_sparsity_power(2).on(this->exec); auto u_isai_factory = UpperIsai::build().with_sparsity_power(2).on(this->exec); + ASSERT_EQ(a_isai_factory->get_parameters().sparsity_power, 2); + ASSERT_EQ(spd_isai_factory->get_parameters().sparsity_power, 2); ASSERT_EQ(l_isai_factory->get_parameters().sparsity_power, 2); ASSERT_EQ(u_isai_factory->get_parameters().sparsity_power, 2); } @@ -135,11 +166,117 @@ TYPED_TEST(IsaiFactory, SetsSparsityPowerCorrectly) TYPED_TEST(IsaiFactory, SetsDefaultSparsityPowerCorrectly) { + ASSERT_EQ(this->general_isai_factory->get_parameters().sparsity_power, 1); + ASSERT_EQ(this->spd_isai_factory->get_parameters().sparsity_power, 1); ASSERT_EQ(this->lower_isai_factory->get_parameters().sparsity_power, 1); ASSERT_EQ(this->upper_isai_factory->get_parameters().sparsity_power, 1); } +TYPED_TEST(IsaiFactory, SetsExcessLimitCorrectly) +{ + using GeneralIsai = typename TestFixture::GeneralIsai; + using SpdIsai = typename TestFixture::SpdIsai; + using LowerIsai = typename TestFixture::LowerIsai; + using UpperIsai = typename TestFixture::UpperIsai; + + auto a_isai_factory = + GeneralIsai::build().with_excess_limit(1024u).on(this->exec); + auto spd_isai_factory = + SpdIsai::build().with_excess_limit(1024u).on(this->exec); + auto l_isai_factory = + LowerIsai::build().with_excess_limit(1024u).on(this->exec); + auto u_isai_factory = + UpperIsai::build().with_excess_limit(1024u).on(this->exec); + + ASSERT_EQ(a_isai_factory->get_parameters().excess_limit, 1024u); + ASSERT_EQ(spd_isai_factory->get_parameters().excess_limit, 1024u); + ASSERT_EQ(l_isai_factory->get_parameters().excess_limit, 1024u); + ASSERT_EQ(u_isai_factory->get_parameters().excess_limit, 1024u); +} + + +TYPED_TEST(IsaiFactory, SetsDefaultExcessLimitCorrectly) +{ + ASSERT_EQ(this->general_isai_factory->get_parameters().excess_limit, 0u); + ASSERT_EQ(this->spd_isai_factory->get_parameters().excess_limit, 0u); + ASSERT_EQ(this->lower_isai_factory->get_parameters().excess_limit, 0u); + ASSERT_EQ(this->upper_isai_factory->get_parameters().excess_limit, 0u); +} + + +TYPED_TEST(IsaiFactory, CanSetExcessSolverFactoryA) +{ + using GeneralIsai = typename TestFixture::GeneralIsai; + auto general_isai_factory = + GeneralIsai::build() + .with_excess_solver_factory(this->excess_solver_factory) + .on(this->exec); + + ASSERT_EQ(general_isai_factory->get_parameters().excess_solver_factory, + this->excess_solver_factory); +} + + +TYPED_TEST(IsaiFactory, CanSetExcessSolverFactorySpd) +{ + using SpdIsai = typename TestFixture::SpdIsai; + auto spd_isai_factory = + SpdIsai::build() + .with_excess_solver_factory(this->excess_solver_factory) + .on(this->exec); + + ASSERT_EQ(spd_isai_factory->get_parameters().excess_solver_factory, + this->excess_solver_factory); +} + + +TYPED_TEST(IsaiFactory, CanSetExcessSolverFactoryL) +{ + using LowerIsai = typename TestFixture::LowerIsai; + auto lower_isai_factory = + LowerIsai::build() + .with_excess_solver_factory(this->excess_solver_factory) + .on(this->exec); + + ASSERT_EQ(lower_isai_factory->get_parameters().excess_solver_factory, + this->excess_solver_factory); +} + + +TYPED_TEST(IsaiFactory, CanSetExcessSolverFactoryU) +{ + using UpperIsai = typename TestFixture::UpperIsai; + auto upper_isai_factory = + UpperIsai::build() + .with_excess_solver_factory(this->excess_solver_factory) + .on(this->exec); + + ASSERT_EQ(upper_isai_factory->get_parameters().excess_solver_factory, + this->excess_solver_factory); +} + + +TYPED_TEST(IsaiFactory, ThrowsWrongDimensionA) +{ + using Csr = typename TestFixture::Csr; + auto mtx = Csr::create(this->exec, gko::dim<2>{1, 2}, 1); + + ASSERT_THROW(this->general_isai_factory->generate(gko::share(mtx)), + gko::DimensionMismatch); +} + + +TYPED_TEST(IsaiFactory, ThrowsWrongDimensionSpd) +{ + using Csr = typename TestFixture::Csr; + auto mtx = Csr::create(this->exec, gko::dim<2>{1, 2}, 1); + + ASSERT_THROW(this->spd_isai_factory->generate(gko::share(mtx)), + gko::DimensionMismatch); +} + + TYPED_TEST(IsaiFactory, ThrowsWrongDimensionL) { using Csr = typename TestFixture::Csr; @@ -160,6 +297,26 @@ TYPED_TEST(IsaiFactory, ThrowsWrongDimensionU) } +TYPED_TEST(IsaiFactory, ThrowsNoConversionCsrA) +{ + using Csr = typename TestFixture::Csr; + auto mtx = DummyOperator::create(this->exec, gko::dim<2>{2, 2}); + + ASSERT_THROW(this->general_isai_factory->generate(gko::share(mtx)), + gko::NotSupported); +} + + +TYPED_TEST(IsaiFactory, ThrowsNoConversionCsrSpd) +{ + using Csr = typename TestFixture::Csr; + auto mtx = DummyOperator::create(this->exec, gko::dim<2>{2, 2}); + + ASSERT_THROW(this->spd_isai_factory->generate(gko::share(mtx)), + gko::NotSupported); +} + + TYPED_TEST(IsaiFactory, ThrowsNoConversionCsrL) { using Csr = typename TestFixture::Csr; diff --git a/core/test/preconditioner/jacobi.cpp b/core/test/preconditioner/jacobi.cpp index 0af14acbabf..7c13ca68749 100644 --- a/core/test/preconditioner/jacobi.cpp +++ b/core/test/preconditioner/jacobi.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -75,7 +75,7 @@ class JacobiFactory : public ::testing::Test { std::shared_ptr> mtx; }; -TYPED_TEST_CASE(JacobiFactory, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(JacobiFactory, gko::test::ValueIndexTypes); TYPED_TEST(JacobiFactory, KnowsItsExecutor) @@ -156,7 +156,7 @@ class BlockInterleavedStorageScheme : public ::testing::Test { 2}; }; -TYPED_TEST_CASE(BlockInterleavedStorageScheme, gko::test::IndexTypes); +TYPED_TEST_SUITE(BlockInterleavedStorageScheme, gko::test::IndexTypes); TYPED_TEST(BlockInterleavedStorageScheme, ComputesStorageSpace) diff --git a/core/test/reorder/CMakeLists.txt b/core/test/reorder/CMakeLists.txt new file mode 100644 index 00000000000..47af494c29b --- /dev/null +++ b/core/test/reorder/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(rcm) diff --git a/core/test/reorder/rcm.cpp b/core/test/reorder/rcm.cpp new file mode 100644 index 00000000000..95c48c34635 --- /dev/null +++ b/core/test/reorder/rcm.cpp @@ -0,0 +1,70 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + +class Rcm : public ::testing::Test { +protected: + using v_type = double; + using i_type = int; + using reorder_type = gko::reorder::Rcm; + + Rcm() + : exec(gko::ReferenceExecutor::create()), + rcm_factory(reorder_type::build().on(exec)) + {} + + std::shared_ptr exec; + std::unique_ptr rcm_factory; +}; + +TEST_F(Rcm, RcmFactoryKnowsItsExecutor) +{ + ASSERT_EQ(this->rcm_factory->get_executor(), this->exec); +} + +} // namespace diff --git a/core/test/solver/CMakeLists.txt b/core/test/solver/CMakeLists.txt index e017edd6bee..384563c9ef3 100644 --- a/core/test/solver/CMakeLists.txt +++ b/core/test/solver/CMakeLists.txt @@ -4,6 +4,8 @@ ginkgo_create_test(cg) ginkgo_create_test(cgs) ginkgo_create_test(fcg) ginkgo_create_test(gmres) +ginkgo_create_test(cb_gmres) +ginkgo_create_test(idr) ginkgo_create_test(ir) ginkgo_create_test(lower_trs) ginkgo_create_test(upper_trs) diff --git a/core/test/solver/bicg.cpp b/core/test/solver/bicg.cpp index 0d8763cdc86..f29620ad2e7 100644 --- a/core/test/solver/bicg.cpp +++ b/core/test/solver/bicg.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -67,7 +67,7 @@ class Bicg : public ::testing::Test { Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(exec), - gko::stop::ResidualNormReduction<>::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(gko::remove_complex{1e-6}) .on(exec)) .on(exec)), @@ -91,7 +91,7 @@ class Bicg : public ::testing::Test { } }; -TYPED_TEST_CASE(Bicg, gko::test::ValueTypes); +TYPED_TEST_SUITE(Bicg, gko::test::ValueTypes); TYPED_TEST(Bicg, BicgFactoryKnowsItsExecutor) @@ -181,7 +181,7 @@ TYPED_TEST(Bicg, CanSetPreconditionerGenerator) Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(this->exec), - gko::stop::ResidualNormReduction<>::build() + gko::stop::ResidualNorm::build() .with_reduction_factor( gko::remove_complex(1e-6)) .on(this->exec)) @@ -257,7 +257,7 @@ TYPED_TEST(Bicg, ThrowsOnWrongPreconditionerInFactory) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; std::shared_ptr wrong_sized_mtx = - Mtx::create(this->exec, gko::dim<2>{1, 3}); + Mtx::create(this->exec, gko::dim<2>{2, 2}); std::shared_ptr bicg_precond = Solver::build() .with_criteria( @@ -276,6 +276,18 @@ TYPED_TEST(Bicg, ThrowsOnWrongPreconditionerInFactory) } +TYPED_TEST(Bicg, ThrowsOnRectangularMatrixInFactory) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + std::shared_ptr rectangular_mtx = + Mtx::create(this->exec, gko::dim<2>{1, 2}); + + ASSERT_THROW(this->bicg_factory->generate(rectangular_mtx), + gko::DimensionMismatch); +} + + TYPED_TEST(Bicg, CanSetPreconditioner) { using Solver = typename TestFixture::Solver; diff --git a/core/test/solver/bicgstab.cpp b/core/test/solver/bicgstab.cpp index 16d5b8a9bff..9ad4e883b5a 100644 --- a/core/test/solver/bicgstab.cpp +++ b/core/test/solver/bicgstab.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -65,7 +65,7 @@ class Bicgstab : public ::testing::Test { Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(gko::remove_complex{1e-6}) .on(exec)) .on(exec)), @@ -89,7 +89,7 @@ class Bicgstab : public ::testing::Test { } }; -TYPED_TEST_CASE(Bicgstab, gko::test::ValueTypes); +TYPED_TEST_SUITE(Bicgstab, gko::test::ValueTypes); TYPED_TEST(Bicgstab, BicgstabFactoryKnowsItsExecutor) @@ -249,7 +249,7 @@ TYPED_TEST(Bicgstab, ThrowsOnWrongPreconditionerInFactory) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; std::shared_ptr wrong_sized_mtx = - Mtx::create(this->exec, gko::dim<2>{1, 3}); + Mtx::create(this->exec, gko::dim<2>{2, 2}); std::shared_ptr bicgstab_precond = Solver::build() .with_criteria( @@ -268,6 +268,18 @@ TYPED_TEST(Bicgstab, ThrowsOnWrongPreconditionerInFactory) } +TYPED_TEST(Bicgstab, ThrowsOnRectangularMatrixInFactory) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + std::shared_ptr rectangular_mtx = + Mtx::create(this->exec, gko::dim<2>{1, 2}); + + ASSERT_THROW(this->bicgstab_factory->generate(rectangular_mtx), + gko::DimensionMismatch); +} + + TYPED_TEST(Bicgstab, CanSetPreconditioner) { using Solver = typename TestFixture::Solver; diff --git a/core/test/solver/cb_gmres.cpp b/core/test/solver/cb_gmres.cpp new file mode 100644 index 00000000000..8cf060190f6 --- /dev/null +++ b/core/test/solver/cb_gmres.cpp @@ -0,0 +1,380 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include + + +#include "core/solver/cb_gmres.cpp" +#include "core/test/utils.hpp" + + +namespace { + + +template +class CbGmres : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueEnumType())>::type; + using nc_value_type = gko::remove_complex; + using storage_helper_type = + typename std::tuple_element<1, decltype(ValueEnumType())>::type; + using Mtx = gko::matrix::Dense; + using Solver = gko::solver::CbGmres; + + CbGmres() + : exec(gko::ReferenceExecutor::create()), + mtx(gko::initialize( + {{1.0, 2.0, 3.0}, {3.0, 2.0, -1.0}, {0.0, -1.0, 2}}, exec)), + storage_precision{storage_helper_type::value}, + cb_gmres_factory( + Solver::build() + .with_storage_precision(storage_precision) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(exec), + gko::stop::ResidualNormReduction::build() + .with_reduction_factor(nc_value_type{1e-6}) + .on(exec)) + .on(exec)), + solver(cb_gmres_factory->generate(mtx)), + cb_gmres_big_factory( + Solver::build() + .with_storage_precision(storage_precision) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(128u).on( + exec), + gko::stop::ResidualNormReduction::build() + .with_reduction_factor(nc_value_type{1e-6}) + .on(exec)) + .on(exec)), + big_solver(cb_gmres_big_factory->generate(mtx)) + {} + + gko::solver::cb_gmres::storage_precision storage_precision; + std::shared_ptr exec; + std::shared_ptr mtx; + std::unique_ptr cb_gmres_factory; + std::unique_ptr solver; + std::unique_ptr cb_gmres_big_factory; + std::unique_ptr big_solver; + + static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + { + ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); + ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); + for (gko::size_type i = 0; i < m1->get_size()[0]; ++i) { + for (gko::size_type j = 0; j < m2->get_size()[1]; ++j) { + EXPECT_EQ(m1->at(i, j), m2->at(i, j)); + } + } + } +}; + + +/** + * This creates a helper structure which translates a type into an enum + * parameter. + */ +using st_enum = gko::solver::cb_gmres::storage_precision; + +template +struct st_helper_type { + static constexpr st_enum value{P}; +}; + +using st_keep = st_helper_type; +using st_r1 = st_helper_type; +using st_r2 = st_helper_type; +using st_i = st_helper_type; +using st_ir1 = st_helper_type; +using st_ir2 = st_helper_type; + +using TestTypes = + ::testing::Types, std::tuple, + std::tuple, std::tuple, + std::tuple, std::tuple, + std::tuple, std::tuple, + std::tuple, std::tuple, + std::tuple, std::tuple, + std::tuple, st_keep>, + std::tuple, st_r1>, + std::tuple, st_r2>, + std::tuple, st_keep>>; + +TYPED_TEST_SUITE(CbGmres, TestTypes); + + +TYPED_TEST(CbGmres, CbGmresFactoryKnowsItsExecutor) +{ + ASSERT_EQ(this->cb_gmres_factory->get_executor(), this->exec); +} + + +TYPED_TEST(CbGmres, CbGmresFactoryCreatesCorrectSolver) +{ + using Solver = typename TestFixture::Solver; + + ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); + auto cb_gmres_solver = static_cast(this->solver.get()); + ASSERT_NE(cb_gmres_solver->get_system_matrix(), nullptr); + ASSERT_EQ(cb_gmres_solver->get_system_matrix(), this->mtx); + ASSERT_EQ(cb_gmres_solver->get_krylov_dim(), 100u); + ASSERT_EQ(cb_gmres_solver->get_storage_precision(), + this->storage_precision); +} + + +TYPED_TEST(CbGmres, CanBeCopied) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + auto copy = this->cb_gmres_factory->generate(Mtx::create(this->exec)); + auto r_copy = static_cast(copy.get()); + + copy->copy_from(this->solver.get()); + + ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); + auto copy_mtx = r_copy->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), + this->mtx.get()); + ASSERT_EQ(r_copy->get_storage_precision(), + this->solver->get_storage_precision()); + ASSERT_EQ(r_copy->get_krylov_dim(), this->solver->get_krylov_dim()); +} + + +TYPED_TEST(CbGmres, CanBeMoved) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + auto copy = this->cb_gmres_factory->generate(Mtx::create(this->exec)); + auto r_copy = static_cast(copy.get()); + + copy->copy_from(std::move(this->solver)); + + ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); + auto copy_mtx = r_copy->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), + this->mtx.get()); + ASSERT_EQ(r_copy->get_storage_precision(), this->storage_precision); + ASSERT_EQ(r_copy->get_krylov_dim(), 100u); +} + + +TYPED_TEST(CbGmres, CanBeCloned) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + auto clone = this->solver->clone(); + auto r_clone = static_cast(clone.get()); + + ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); + auto clone_mtx = r_clone->get_system_matrix(); + this->assert_same_matrices(static_cast(clone_mtx.get()), + this->mtx.get()); + ASSERT_EQ(r_clone->get_storage_precision(), + this->solver->get_storage_precision()); + ASSERT_EQ(r_clone->get_krylov_dim(), this->solver->get_krylov_dim()); +} + + +TYPED_TEST(CbGmres, CanBeCleared) +{ + using Solver = typename TestFixture::Solver; + this->solver->clear(); + + ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); + auto solver_mtx = + static_cast(this->solver.get())->get_system_matrix(); + ASSERT_EQ(solver_mtx, nullptr); +} + + +TYPED_TEST(CbGmres, CanSetPreconditionerGenerator) +{ + using value_type = typename TestFixture::value_type; + using nc_value_type = typename TestFixture::nc_value_type; + using Solver = typename TestFixture::Solver; + auto cb_gmres_factory = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec), + gko::stop::ResidualNormReduction::build() + .with_reduction_factor(nc_value_type{1e-6}) + .on(this->exec)) + .with_preconditioner( + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on( + this->exec)) + .on(this->exec)) + .on(this->exec); + auto solver = cb_gmres_factory->generate(this->mtx); + auto precond = + static_cast(solver.get()->get_preconditioner().get()); + + ASSERT_NE(precond, nullptr); + ASSERT_EQ(precond->get_size(), gko::dim<2>(3, 3)); + ASSERT_EQ(precond->get_system_matrix(), this->mtx); +} + + +TYPED_TEST(CbGmres, CanSetKrylovDim) +{ + using value_type = typename TestFixture::value_type; + using nc_value_type = typename TestFixture::nc_value_type; + using Solver = typename TestFixture::Solver; + const gko::size_type new_krylov_dim{4u}; + + auto cb_gmres_factory = + Solver::build() + .with_krylov_dim(new_krylov_dim) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(4u).on(this->exec)) + .on(this->exec); + auto solver = cb_gmres_factory->generate(this->mtx); + + ASSERT_EQ(solver->get_krylov_dim(), new_krylov_dim); + // Also test the default storage_recision + ASSERT_EQ(solver->get_storage_precision(), + gko::solver::cb_gmres::storage_precision::reduce1); +} + + +TYPED_TEST(CbGmres, CanUseSetKrylovDim) +{ + using value_type = typename TestFixture::value_type; + using nc_value_type = typename TestFixture::nc_value_type; + using Solver = typename TestFixture::Solver; + const gko::size_type new_krylov_dim{40u}; + auto cb_gmres_factory = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(4u).on(this->exec)) + .on(this->exec); + auto solver = cb_gmres_factory->generate(this->mtx); + + solver->set_krylov_dim(new_krylov_dim); + + ASSERT_EQ(solver->get_krylov_dim(), new_krylov_dim); + // Also test the default storage_recision + ASSERT_EQ(solver->get_storage_precision(), + gko::solver::cb_gmres::storage_precision::reduce1); +} + + +TYPED_TEST(CbGmres, CanSetPreconditionerInFactory) +{ + using Solver = typename TestFixture::Solver; + std::shared_ptr cb_gmres_precond = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .on(this->exec) + ->generate(this->mtx); + + auto cb_gmres_factory = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .with_generated_preconditioner(cb_gmres_precond) + .on(this->exec); + auto solver = cb_gmres_factory->generate(this->mtx); + auto precond = solver->get_preconditioner(); + + ASSERT_NE(precond.get(), nullptr); + ASSERT_EQ(precond.get(), cb_gmres_precond.get()); +} + + +TYPED_TEST(CbGmres, ThrowsOnWrongPreconditionerInFactory) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + std::shared_ptr wrong_sized_mtx = + Mtx::create(this->exec, gko::dim<2>{1, 3}); + std::shared_ptr cb_gmres_precond = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .on(this->exec) + ->generate(wrong_sized_mtx); + + auto cb_gmres_factory = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .with_generated_preconditioner(cb_gmres_precond) + .on(this->exec); + + ASSERT_THROW(cb_gmres_factory->generate(this->mtx), gko::DimensionMismatch); +} + + +TYPED_TEST(CbGmres, CanSetPreconditioner) +{ + using Solver = typename TestFixture::Solver; + std::shared_ptr cb_gmres_precond = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .on(this->exec) + ->generate(this->mtx); + + auto cb_gmres_factory = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .on(this->exec); + auto solver = cb_gmres_factory->generate(this->mtx); + solver->set_preconditioner(cb_gmres_precond); + auto precond = solver->get_preconditioner(); + + ASSERT_NE(precond.get(), nullptr); + ASSERT_EQ(precond.get(), cb_gmres_precond.get()); +} + + +} // namespace diff --git a/core/test/solver/cg.cpp b/core/test/solver/cg.cpp index e6652defb0e..d4314788d7b 100644 --- a/core/test/solver/cg.cpp +++ b/core/test/solver/cg.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -67,7 +67,7 @@ class Cg : public ::testing::Test { Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(gko::remove_complex{1e-6}) .on(exec)) .on(exec)), @@ -91,7 +91,7 @@ class Cg : public ::testing::Test { } }; -TYPED_TEST_CASE(Cg, gko::test::ValueTypes); +TYPED_TEST_SUITE(Cg, gko::test::ValueTypes); TYPED_TEST(Cg, CgFactoryKnowsItsExecutor) @@ -180,7 +180,7 @@ TYPED_TEST(Cg, CanSetPreconditionerGenerator) Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(this->exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor( gko::remove_complex(1e-6)) .on(this->exec)) @@ -256,7 +256,7 @@ TYPED_TEST(Cg, ThrowsOnWrongPreconditionerInFactory) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; std::shared_ptr wrong_sized_mtx = - Mtx::create(this->exec, gko::dim<2>{1, 3}); + Mtx::create(this->exec, gko::dim<2>{2, 2}); std::shared_ptr cg_precond = Solver::build() .with_criteria( @@ -275,6 +275,18 @@ TYPED_TEST(Cg, ThrowsOnWrongPreconditionerInFactory) } +TYPED_TEST(Cg, ThrowsOnRectangularMatrixInFactory) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + std::shared_ptr rectangular_mtx = + Mtx::create(this->exec, gko::dim<2>{1, 2}); + + ASSERT_THROW(this->cg_factory->generate(rectangular_mtx), + gko::DimensionMismatch); +} + + TYPED_TEST(Cg, CanSetPreconditioner) { using Solver = typename TestFixture::Solver; diff --git a/core/test/solver/cgs.cpp b/core/test/solver/cgs.cpp index 04f7c31aab9..a9bcc316346 100644 --- a/core/test/solver/cgs.cpp +++ b/core/test/solver/cgs.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -67,7 +67,7 @@ class Cgs : public ::testing::Test { Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(gko::remove_complex{1e-6}) .on(exec)) .on(exec)), @@ -91,7 +91,7 @@ class Cgs : public ::testing::Test { } }; -TYPED_TEST_CASE(Cgs, gko::test::ValueTypes); +TYPED_TEST_SUITE(Cgs, gko::test::ValueTypes); TYPED_TEST(Cgs, CgsFactoryKnowsItsExecutor) @@ -180,7 +180,7 @@ TYPED_TEST(Cgs, CanSetPreconditionerGenerator) Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(this->exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor( gko::remove_complex(1e-6)) .on(this->exec)) @@ -256,7 +256,7 @@ TYPED_TEST(Cgs, ThrowsOnWrongPreconditionerInFactory) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; std::shared_ptr wrong_sized_mtx = - Mtx::create(this->exec, gko::dim<2>{1, 3}); + Mtx::create(this->exec, gko::dim<2>{2, 2}); std::shared_ptr cgs_precond = Solver::build() .with_criteria( @@ -275,6 +275,18 @@ TYPED_TEST(Cgs, ThrowsOnWrongPreconditionerInFactory) } +TYPED_TEST(Cgs, ThrowsOnRectangularMatrixInFactory) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + std::shared_ptr rectangular_mtx = + Mtx::create(this->exec, gko::dim<2>{1, 2}); + + ASSERT_THROW(this->cgs_factory->generate(rectangular_mtx), + gko::DimensionMismatch); +} + + TYPED_TEST(Cgs, CanSetPreconditioner) { using Solver = typename TestFixture::Solver; diff --git a/core/test/solver/fcg.cpp b/core/test/solver/fcg.cpp index 6b9c0e954a7..43052f011e4 100644 --- a/core/test/solver/fcg.cpp +++ b/core/test/solver/fcg.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -64,7 +64,7 @@ class Fcg : public ::testing::Test { Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(gko::remove_complex{1e-6}) .on(exec)) .on(exec)), @@ -77,7 +77,7 @@ class Fcg : public ::testing::Test { std::unique_ptr solver; }; -TYPED_TEST_CASE(Fcg, gko::test::ValueTypes); +TYPED_TEST_SUITE(Fcg, gko::test::ValueTypes); TYPED_TEST(Fcg, FcgFactoryKnowsItsExecutor) @@ -165,7 +165,7 @@ TYPED_TEST(Fcg, CanSetPreconditionerGenerator) Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(this->exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor( gko::remove_complex(1e-6)) .on(this->exec)) @@ -241,7 +241,7 @@ TYPED_TEST(Fcg, ThrowsOnWrongPreconditionerInFactory) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; std::shared_ptr wrong_sized_mtx = - Mtx::create(this->exec, gko::dim<2>{1, 3}); + Mtx::create(this->exec, gko::dim<2>{2, 2}); std::shared_ptr fcg_precond = Solver::build() .with_criteria( @@ -260,6 +260,18 @@ TYPED_TEST(Fcg, ThrowsOnWrongPreconditionerInFactory) } +TYPED_TEST(Fcg, ThrowsOnRectangularMatrixInFactory) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + std::shared_ptr rectangular_mtx = + Mtx::create(this->exec, gko::dim<2>{1, 2}); + + ASSERT_THROW(this->fcg_factory->generate(rectangular_mtx), + gko::DimensionMismatch); +} + + TYPED_TEST(Fcg, CanSetPreconditioner) { using Solver = typename TestFixture::Solver; diff --git a/core/test/solver/gmres.cpp b/core/test/solver/gmres.cpp index 4765f07183b..3f5b9510332 100644 --- a/core/test/solver/gmres.cpp +++ b/core/test/solver/gmres.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -71,7 +71,7 @@ class Gmres : public ::testing::Test { Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) .on(exec)), @@ -81,7 +81,7 @@ class Gmres : public ::testing::Test { .with_criteria( gko::stop::Iteration::build().with_max_iters(128u).on( exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) .on(exec)), @@ -110,7 +110,7 @@ class Gmres : public ::testing::Test { template constexpr gko::remove_complex Gmres::reduction_factor; -TYPED_TEST_CASE(Gmres, gko::test::ValueTypes); +TYPED_TEST_SUITE(Gmres, gko::test::ValueTypes); TYPED_TEST(Gmres, GmresFactoryKnowsItsExecutor) @@ -198,7 +198,7 @@ TYPED_TEST(Gmres, CanSetPreconditionerGenerator) Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(this->exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(TestFixture::reduction_factor) .on(this->exec)) .with_preconditioner( @@ -254,7 +254,7 @@ TYPED_TEST(Gmres, CanSetKrylovDim) .with_krylov_dim(4u) .with_criteria( gko::stop::Iteration::build().with_max_iters(4u).on(this->exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(TestFixture::reduction_factor) .on(this->exec)) .on(this->exec); @@ -313,7 +313,7 @@ TYPED_TEST(Gmres, ThrowsOnWrongPreconditionerInFactory) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; std::shared_ptr wrong_sized_mtx = - Mtx::create(this->exec, gko::dim<2>{1, 3}); + Mtx::create(this->exec, gko::dim<2>{2, 2}); std::shared_ptr gmres_precond = Solver::build() .with_criteria( @@ -332,6 +332,18 @@ TYPED_TEST(Gmres, ThrowsOnWrongPreconditionerInFactory) } +TYPED_TEST(Gmres, ThrowsOnRectangularMatrixInFactory) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + std::shared_ptr rectangular_mtx = + Mtx::create(this->exec, gko::dim<2>{1, 2}); + + ASSERT_THROW(this->gmres_factory->generate(rectangular_mtx), + gko::DimensionMismatch); +} + + TYPED_TEST(Gmres, CanSetPreconditioner) { using Solver = typename TestFixture::Solver; diff --git a/core/test/solver/idr.cpp b/core/test/solver/idr.cpp new file mode 100644 index 00000000000..62dfc1c7e03 --- /dev/null +++ b/core/test/solver/idr.cpp @@ -0,0 +1,446 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class Idr : public ::testing::Test { +protected: + using value_type = T; + using Mtx = gko::matrix::Dense; + using Solver = gko::solver::Idr; + + Idr() + : exec(gko::ReferenceExecutor::create()), + mtx(gko::initialize( + {{2, -1.0, 0.0}, {-1.0, 2, -1.0}, {0.0, -1.0, 2}}, exec)), + idr_factory( + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(exec), + gko::stop::ResidualNorm::build() + .with_reduction_factor(gko::remove_complex{1e-6}) + .on(exec)) + .on(exec)), + solver(idr_factory->generate(mtx)) + {} + + std::shared_ptr exec; + std::shared_ptr mtx; + std::unique_ptr idr_factory; + std::unique_ptr solver; + + static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + { + ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); + ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); + for (gko::size_type i = 0; i < m1->get_size()[0]; ++i) { + for (gko::size_type j = 0; j < m2->get_size()[1]; ++j) { + EXPECT_EQ(m1->at(i, j), m2->at(i, j)); + } + } + } +}; + +TYPED_TEST_SUITE(Idr, gko::test::ValueTypes); + + +TYPED_TEST(Idr, IdrFactoryKnowsItsExecutor) +{ + ASSERT_EQ(this->idr_factory->get_executor(), this->exec); +} + + +TYPED_TEST(Idr, IdrFactoryCreatesCorrectSolver) +{ + using Solver = typename TestFixture::Solver; + ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); + auto idr_solver = static_cast(this->solver.get()); + ASSERT_NE(idr_solver->get_system_matrix(), nullptr); + ASSERT_EQ(idr_solver->get_system_matrix(), this->mtx); +} + + +TYPED_TEST(Idr, CanBeCopied) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + auto copy = this->idr_factory->generate(Mtx::create(this->exec)); + + copy->copy_from(this->solver.get()); + + ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), + this->mtx.get()); +} + + +TYPED_TEST(Idr, CanBeMoved) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + auto copy = this->idr_factory->generate(Mtx::create(this->exec)); + + copy->copy_from(std::move(this->solver)); + + ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), + this->mtx.get()); +} + + +TYPED_TEST(Idr, CanBeCloned) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + + auto clone = this->solver->clone(); + + ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); + auto clone_mtx = static_cast(clone.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(clone_mtx.get()), + this->mtx.get()); +} + + +TYPED_TEST(Idr, CanBeCleared) +{ + using Solver = typename TestFixture::Solver; + + this->solver->clear(); + + ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); + auto solver_mtx = + static_cast(this->solver.get())->get_system_matrix(); + ASSERT_EQ(solver_mtx, nullptr); +} + + +TYPED_TEST(Idr, ApplyUsesInitialGuessReturnsTrue) +{ + ASSERT_TRUE(this->solver->apply_uses_initial_guess()); +} + + +TYPED_TEST(Idr, CanSetPreconditionerGenerator) +{ + using Solver = typename TestFixture::Solver; + using value_type = typename TestFixture::value_type; + auto idr_factory = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .with_preconditioner( + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on( + this->exec)) + .on(this->exec)) + .on(this->exec); + + auto solver = idr_factory->generate(this->mtx); + auto precond = dynamic_cast *>( + gko::lend(solver->get_preconditioner())); + + ASSERT_NE(precond, nullptr); + ASSERT_EQ(precond->get_size(), gko::dim<2>(3, 3)); + ASSERT_EQ(precond->get_system_matrix(), this->mtx); +} + + +TYPED_TEST(Idr, CanSetCriteriaAgain) +{ + using Solver = typename TestFixture::Solver; + std::shared_ptr init_crit = + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec); + auto idr_factory = Solver::build().with_criteria(init_crit).on(this->exec); + + ASSERT_EQ((idr_factory->get_parameters().criteria).back(), init_crit); + + auto solver = idr_factory->generate(this->mtx); + std::shared_ptr new_crit = + gko::stop::Iteration::build().with_max_iters(5u).on(this->exec); + + solver->set_stop_criterion_factory(new_crit); + auto new_crit_fac = solver->get_stop_criterion_factory(); + auto niter = + static_cast(new_crit_fac.get()) + ->get_parameters() + .max_iters; + + ASSERT_EQ(niter, 5); +} + + +TYPED_TEST(Idr, CanSetPreconditionerInFactory) +{ + using Solver = typename TestFixture::Solver; + std::shared_ptr idr_precond = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .on(this->exec) + ->generate(this->mtx); + + auto idr_factory = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .with_generated_preconditioner(idr_precond) + .on(this->exec); + auto solver = idr_factory->generate(this->mtx); + auto precond = solver->get_preconditioner(); + + ASSERT_NE(precond.get(), nullptr); + ASSERT_EQ(precond.get(), idr_precond.get()); +} + + +TYPED_TEST(Idr, ThrowsOnWrongPreconditionerInFactory) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + std::shared_ptr wrong_sized_mtx = + Mtx::create(this->exec, gko::dim<2>{1, 3}); + std::shared_ptr idr_precond = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .on(this->exec) + ->generate(wrong_sized_mtx); + + auto idr_factory = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .with_generated_preconditioner(idr_precond) + .on(this->exec); + + ASSERT_THROW(idr_factory->generate(this->mtx), gko::DimensionMismatch); +} + + +TYPED_TEST(Idr, CanSetPreconditioner) +{ + using Solver = typename TestFixture::Solver; + std::shared_ptr idr_precond = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .on(this->exec) + ->generate(this->mtx); + + auto idr_factory = + Solver::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec)) + .on(this->exec); + auto solver = idr_factory->generate(this->mtx); + solver->set_preconditioner(idr_precond); + auto precond = solver->get_preconditioner(); + + ASSERT_NE(precond.get(), nullptr); + ASSERT_EQ(precond.get(), idr_precond.get()); +} + + +TYPED_TEST(Idr, CanSetSubspaceDim) +{ + using Solver = typename TestFixture::Solver; + using value_type = typename TestFixture::value_type; + auto idr_factory = + Solver::build() + .with_subspace_dim(8u) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(4u).on(this->exec)) + .on(this->exec); + auto solver = idr_factory->generate(this->mtx); + auto subspace_dim = solver->get_subspace_dim(); + + ASSERT_EQ(subspace_dim, 8u); +} + + +TYPED_TEST(Idr, CanSetSubspaceDimAgain) +{ + using Solver = typename TestFixture::Solver; + std::shared_ptr init_crit = + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec); + auto idr_factory = + Solver::build().with_criteria(init_crit).with_subspace_dim(10u).on( + this->exec); + + ASSERT_EQ(idr_factory->get_parameters().subspace_dim, 10); + + auto solver = idr_factory->generate(this->mtx); + + solver->set_subspace_dim(20); + + ASSERT_EQ(solver->get_subspace_dim(), 20); +} + + +TYPED_TEST(Idr, CanSetKappa) +{ + using Solver = typename TestFixture::Solver; + using value_type = typename TestFixture::value_type; + using real_type = gko::remove_complex; + auto idr_factory = + Solver::build() + .with_kappa(real_type{0.05}) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(4u).on(this->exec)) + .on(this->exec); + auto solver = idr_factory->generate(this->mtx); + auto kappa = solver->get_kappa(); + + ASSERT_EQ(kappa, real_type{0.05}); +} + + +TYPED_TEST(Idr, CanSetKappaAgain) +{ + using Solver = typename TestFixture::Solver; + using value_type = typename TestFixture::value_type; + using real_type = gko::remove_complex; + std::shared_ptr init_crit = + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec); + auto idr_factory = Solver::build() + .with_criteria(init_crit) + .with_kappa(real_type{0.05}) + .on(this->exec); + + ASSERT_EQ(idr_factory->get_parameters().kappa, real_type{0.05}); + + auto solver = idr_factory->generate(this->mtx); + + solver->set_kappa(real_type{0.3}); + + ASSERT_EQ(solver->get_kappa(), real_type{0.3}); +} + + +TYPED_TEST(Idr, CanSetDeterministic) +{ + using Solver = typename TestFixture::Solver; + using value_type = typename TestFixture::value_type; + auto idr_factory = + Solver::build() + .with_deterministic(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(4u).on(this->exec)) + .on(this->exec); + auto solver = idr_factory->generate(this->mtx); + auto deterministic = solver->get_deterministic(); + + ASSERT_EQ(deterministic, true); +} + + +TYPED_TEST(Idr, CanSetDeterministicAgain) +{ + using Solver = typename TestFixture::Solver; + using value_type = typename TestFixture::value_type; + std::shared_ptr init_crit = + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec); + auto idr_factory = + Solver::build().with_criteria(init_crit).with_deterministic(true).on( + this->exec); + + ASSERT_EQ(idr_factory->get_parameters().deterministic, true); + + auto solver = idr_factory->generate(this->mtx); + + solver->set_deterministic(false); + + ASSERT_EQ(solver->get_deterministic(), false); +} + + +TYPED_TEST(Idr, CanSetComplexSubspace) +{ + using Solver = typename TestFixture::Solver; + using value_type = typename TestFixture::value_type; + auto idr_factory = + Solver::build() + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(4u).on(this->exec)) + .on(this->exec); + auto solver = idr_factory->generate(this->mtx); + auto complex_subspace = solver->get_complex_subspace(); + + ASSERT_EQ(complex_subspace, true); +} + + +TYPED_TEST(Idr, CanSetComplexSubspaceAgain) +{ + using Solver = typename TestFixture::Solver; + using value_type = typename TestFixture::value_type; + std::shared_ptr init_crit = + gko::stop::Iteration::build().with_max_iters(3u).on(this->exec); + auto idr_factory = + Solver::build().with_criteria(init_crit).with_complex_subspace(true).on( + this->exec); + + ASSERT_EQ(idr_factory->get_parameters().complex_subspace, true); + + auto solver = idr_factory->generate(this->mtx); + + solver->set_complex_subpsace(false); + + ASSERT_EQ(solver->get_complex_subspace(), false); +} + + +} // namespace diff --git a/core/test/solver/ir.cpp b/core/test/solver/ir.cpp index b711c511e97..93d18c99260 100644 --- a/core/test/solver/ir.cpp +++ b/core/test/solver/ir.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -67,7 +67,7 @@ class Ir : public ::testing::Test { Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(r::value) .on(exec)) .on(exec)), @@ -91,7 +91,7 @@ class Ir : public ::testing::Test { } }; -TYPED_TEST_CASE(Ir, gko::test::ValueTypes); +TYPED_TEST_SUITE(Ir, gko::test::ValueTypes); TYPED_TEST(Ir, IrFactoryKnowsItsExecutor) @@ -179,7 +179,7 @@ TYPED_TEST(Ir, CanSetInnerSolverInFactory) Solver::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(this->exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(r::value) .on(this->exec)) .with_solver( @@ -252,7 +252,7 @@ TYPED_TEST(Ir, ThrowsOnWrongInnerSolverInFactory) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; std::shared_ptr wrong_sized_mtx = - Mtx::create(this->exec, gko::dim<2>{1, 3}); + Mtx::create(this->exec, gko::dim<2>{2, 2}); std::shared_ptr ir_solver = Solver::build() .with_criteria( @@ -300,7 +300,7 @@ TYPED_TEST(Ir, ThrowOnWrongInnerSolverSet) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; std::shared_ptr wrong_sized_mtx = - Mtx::create(this->exec, gko::dim<2>{1, 3}); + Mtx::create(this->exec, gko::dim<2>{2, 2}); std::shared_ptr ir_solver = Solver::build() .with_criteria( @@ -319,6 +319,18 @@ TYPED_TEST(Ir, ThrowOnWrongInnerSolverSet) } +TYPED_TEST(Ir, ThrowsOnRectangularMatrixInFactory) +{ + using Mtx = typename TestFixture::Mtx; + using Solver = typename TestFixture::Solver; + std::shared_ptr rectangular_mtx = + Mtx::create(this->exec, gko::dim<2>{1, 2}); + + ASSERT_THROW(this->ir_factory->generate(rectangular_mtx), + gko::DimensionMismatch); +} + + TYPED_TEST(Ir, DefaultRelaxationFactor) { using value_type = typename TestFixture::value_type; @@ -328,7 +340,7 @@ TYPED_TEST(Ir, DefaultRelaxationFactor) gko::solver::Richardson::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(this->exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(r::value) .on(this->exec)) .on(this->exec) @@ -347,7 +359,7 @@ TYPED_TEST(Ir, UseAsRichardson) gko::solver::Richardson::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(3u).on(this->exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(r::value) .on(this->exec)) .with_relaxation_factor(relaxation_factor) diff --git a/core/test/solver/lower_trs.cpp b/core/test/solver/lower_trs.cpp index be12f10ef53..42c6342f1d9 100644 --- a/core/test/solver/lower_trs.cpp +++ b/core/test/solver/lower_trs.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -66,7 +66,7 @@ class LowerTrs : public ::testing::Test { std::unique_ptr lower_trs_factory; }; -TYPED_TEST_CASE(LowerTrs, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(LowerTrs, gko::test::ValueIndexTypes); TYPED_TEST(LowerTrs, LowerTrsFactoryKnowsItsExecutor) @@ -75,4 +75,15 @@ TYPED_TEST(LowerTrs, LowerTrsFactoryKnowsItsExecutor) } +TYPED_TEST(LowerTrs, ThrowsOnRectangularMatrixInFactory) +{ + using Mtx = gko::matrix::Dense; + std::shared_ptr rectangular_matrix = + Mtx::create(this->exec, gko::dim<2>{1, 2}); + + ASSERT_THROW(this->lower_trs_factory->generate(rectangular_matrix), + gko::DimensionMismatch); +} + + } // namespace diff --git a/core/test/solver/upper_trs.cpp b/core/test/solver/upper_trs.cpp index 1ec759fa47d..be542597a74 100644 --- a/core/test/solver/upper_trs.cpp +++ b/core/test/solver/upper_trs.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -66,7 +66,7 @@ class UpperTrs : public ::testing::Test { std::unique_ptr upper_trs_factory; }; -TYPED_TEST_CASE(UpperTrs, gko::test::ValueIndexTypes); +TYPED_TEST_SUITE(UpperTrs, gko::test::ValueIndexTypes); TYPED_TEST(UpperTrs, UpperTrsFactoryKnowsItsExecutor) @@ -75,4 +75,15 @@ TYPED_TEST(UpperTrs, UpperTrsFactoryKnowsItsExecutor) } +TYPED_TEST(UpperTrs, ThrowsOnRectangularMatrixInFactory) +{ + using Mtx = gko::matrix::Dense; + std::shared_ptr rectangular_matrix = + Mtx::create(this->exec, gko::dim<2>{1, 2}); + + ASSERT_THROW(this->upper_trs_factory->generate(rectangular_matrix), + gko::DimensionMismatch); +} + + } // namespace diff --git a/core/test/stop/combined.cpp b/core/test/stop/combined.cpp index 8a443790429..28cfc9dc9df 100644 --- a/core/test/stop/combined.cpp +++ b/core/test/stop/combined.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/stop/iteration.cpp b/core/test/stop/iteration.cpp index aedc443eb76..f0540b419e9 100644 --- a/core/test/stop/iteration.cpp +++ b/core/test/stop/iteration.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/stop/stopping_status.cpp b/core/test/stop/stopping_status.cpp index d9cdebc165e..45684a3d0d6 100644 --- a/core/test/stop/stopping_status.cpp +++ b/core/test/stop/stopping_status.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/stop/time.cpp b/core/test/stop/time.cpp index 53966fbacad..13e8e482d87 100644 --- a/core/test/stop/time.cpp +++ b/core/test/stop/time.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/utils.hpp b/core/test/utils.hpp index 89b135a01f3..f493b375831 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -36,15 +36,23 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include +#include + + #include #include +#include "core/base/extended_float.hpp" +#include "core/test/utils/array_generator.hpp" #include "core/test/utils/assertions.hpp" #include "core/test/utils/matrix_generator.hpp" +#include "core/test/utils/matrix_utils.hpp" +#include "core/test/utils/value_generator.hpp" namespace gko { @@ -52,58 +60,108 @@ namespace test { using ValueTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types>; +#else ::testing::Types, std::complex>; - +#endif using ComplexValueTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types>; +#else ::testing::Types, std::complex>; +#endif using IndexTypes = ::testing::Types; using ValueAndIndexTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types, gko::int32, gko::int64, + gko::size_type>; +#else ::testing::Types, std::complex, gko::int32, gko::int64, gko::size_type>; - - -using ValueIndexTypes = ::testing::Types< - std::tuple, std::tuple, - std::tuple, gko::int32>, - std::tuple, gko::int32>, std::tuple, - std::tuple, std::tuple, gko::int64>, - std::tuple, gko::int64>>; - - -using RealValueIndexTypes = ::testing::Types< - std::tuple, std::tuple, - std::tuple, std::tuple>; +#endif + + +using RealValueAndIndexTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types; +#else + ::testing::Types; +#endif + + +using ValueIndexTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types, + std::tuple, gko::int32>, + std::tuple, + std::tuple, gko::int64>>; +#else + ::testing::Types< + std::tuple, std::tuple, + std::tuple, gko::int32>, + std::tuple, gko::int32>, + std::tuple, std::tuple, + std::tuple, gko::int64>, + std::tuple, gko::int64>>; +#endif + + +using RealValueIndexTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types, + std::tuple>; +#else + ::testing::Types< + std::tuple, std::tuple, + std::tuple, std::tuple>; +#endif using ComplexValueIndexTypes = +#if GINKGO_DPCPP_SINGLE_MODE + ::testing::Types, gko::int32>, + std::tuple, gko::int64>>; +#else ::testing::Types, gko::int32>, std::tuple, gko::int32>, std::tuple, gko::int64>, std::tuple, gko::int64>>; +#endif -template +template struct reduction_factor { - static constexpr gko::remove_complex value = - std::is_same, float>::value ? 1.0e-7 : 1.0e-14; + using nc_output = remove_complex; + using nc_precision = remove_complex; + static constexpr nc_output value{ + std::numeric_limits::epsilon() * nc_output{10}}; }; -template -constexpr gko::remove_complex reduction_factor::value; +template +constexpr remove_complex + reduction_factor::value; } // namespace test } // namespace gko -template -using r = typename gko::test::reduction_factor; +template +using r = typename gko::test::reduction_factor; + + +template +constexpr double r_mixed() +{ + return std::max(r::value, r::value); +} template diff --git a/core/test/utils/CMakeLists.txt b/core/test/utils/CMakeLists.txt index 223dd62e1f4..9b3e0e5e349 100644 --- a/core/test/utils/CMakeLists.txt +++ b/core/test/utils/CMakeLists.txt @@ -1,2 +1,6 @@ -ginkgo_create_test(matrix_generator_test) +ginkgo_create_test(array_generator_test) ginkgo_create_test(assertions_test) +ginkgo_create_test(matrix_generator_test) +ginkgo_create_test(matrix_utils_test) +ginkgo_create_test(unsort_matrix_test) +ginkgo_create_test(value_generator_test) diff --git a/core/test/utils/array_generator.hpp b/core/test/utils/array_generator.hpp new file mode 100644 index 00000000000..8dbdeacc9a7 --- /dev/null +++ b/core/test/utils/array_generator.hpp @@ -0,0 +1,83 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_TEST_UTILS_ARRAY_GENERATOR_HPP_ +#define GKO_CORE_TEST_UTILS_ARRAY_GENERATOR_HPP_ + + +#include +#include +#include + + +#include "core/test/utils/value_generator.hpp" + + +namespace gko { +namespace test { + + +/** + * Generate a random array + * + * @tparam ValueType valuetype of the array to generate + * @tparam ValueDistribution type of value distribution + * @tparam Engine type of random engine + * + * @param num the number of elements of array + * @param value_dist distribution of array values + * @param engine a random engine + * @param exec executor where the array should be allocated + * + * @return Array + */ +template +Array generate_random_array(size_type num, + ValueDistribution &&value_dist, + Engine &&engine, + std::shared_ptr exec) +{ + Array array(exec->get_master(), num); + auto val = array.get_data(); + for (int i = 0; i < num; i++) { + val[i] = detail::get_rand_value(value_dist, engine); + } + array.set_executor(exec); + return array; +} + + +} // namespace test +} // namespace gko + + +#endif // GKO_CORE_TEST_UTILS_ARRAY_GENERATOR_HPP_ diff --git a/core/test/utils/array_generator_test.cpp b/core/test/utils/array_generator_test.cpp new file mode 100644 index 00000000000..ba8c6651be9 --- /dev/null +++ b/core/test/utils/array_generator_test.cpp @@ -0,0 +1,125 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/test/utils/array_generator.hpp" + + +#include +#include + + +#include + + +#include "core/test/utils.hpp" + +namespace { + + +template +class ArrayGenerator : public ::testing::Test { +protected: + using value_type = T; + + ArrayGenerator() : exec(gko::ReferenceExecutor::create()) + { + array = gko::test::generate_random_array( + 500, std::normal_distribution>(20.0, 5.0), + std::ranlux48(42), exec); + } + + std::shared_ptr exec; + gko::Array array; + + template + ValueType get_nth_moment(int n, ValueType c, InputIterator sample_start, + InputIterator sample_end, Closure closure_op) + { + using std::pow; + ValueType res = 0; + ValueType num_elems = 0; + while (sample_start != sample_end) { + auto tmp = *(sample_start++); + res += pow(closure_op(tmp) - c, n); + num_elems += 1; + } + return res / num_elems; + } + + template + void check_average_and_deviation( + InputIterator sample_start, InputIterator sample_end, + gko::remove_complex average_ans, + gko::remove_complex deviation_ans, Closure closure_op) + { + auto average = + this->get_nth_moment(1, gko::zero>(), + sample_start, sample_end, closure_op); + auto deviation = sqrt(this->get_nth_moment(2, average, sample_start, + sample_end, closure_op)); + + // check that average & deviation is within 10% of the required amount + ASSERT_NEAR(average, average_ans, average_ans * 0.1); + ASSERT_NEAR(deviation, deviation_ans, deviation_ans * 0.1); + } +}; + +TYPED_TEST_SUITE(ArrayGenerator, gko::test::ValueTypes); + + +TYPED_TEST(ArrayGenerator, OutputHasCorrectSize) +{ + ASSERT_EQ(this->array.get_num_elems(), 500); +} + + +TYPED_TEST(ArrayGenerator, OutputHasCorrectAverageAndDeviation) +{ + using std::sqrt; + using T = typename TestFixture::value_type; + + // check the real part + this->template check_average_and_deviation( + this->array.get_const_data(), + this->array.get_const_data() + this->array.get_num_elems(), 20.0, 5.0, + [](T &val) { return gko::real(val); }); + // check the imag part when the type is complex + if (!std::is_same>::value) { + this->template check_average_and_deviation( + this->array.get_const_data(), + this->array.get_const_data() + this->array.get_num_elems(), 20.0, + 5.0, [](T &val) { return gko::imag(val); }); + } +} + + +} // namespace diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp index 0515b32879c..57e885e569c 100644 --- a/core/test/utils/assertions.hpp +++ b/core/test/utils/assertions.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -680,14 +680,14 @@ namespace detail { template -std::initializer_list> l( - std::initializer_list> list) +const std::initializer_list> &l( + const std::initializer_list> &list) { return list; } template -std::initializer_list l(std::initializer_list list) +const std::initializer_list &l(const std::initializer_list &list) { return list; } @@ -711,7 +711,20 @@ T *plain_ptr(const std::unique_ptr &ptr) } template -T plain_ptr(T ptr) +const std::initializer_list &plain_ptr(const std::initializer_list &ptr) +{ + return ptr; +} + +template +const std::initializer_list> &plain_ptr( + const std::initializer_list> &ptr) +{ + return ptr; +} + +template +T *plain_ptr(T *ptr) { return ptr; } diff --git a/core/test/utils/assertions_test.cpp b/core/test/utils/assertions_test.cpp index 89d15ca585a..c5ebccb10ec 100644 --- a/core/test/utils/assertions_test.cpp +++ b/core/test/utils/assertions_test.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp index 171e4b2dd69..9a101f4b043 100644 --- a/core/test/utils/matrix_generator.hpp +++ b/core/test/utils/matrix_generator.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,28 +46,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -namespace gko { -namespace test { -namespace detail { +#include "core/test/utils/value_generator.hpp" -template -typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) -{ - return dist(gen); -} - - -template -typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) -{ - return ValueType(dist(gen), dist(gen)); -} - - -} // namespace detail +namespace gko { +namespace test { /** @@ -87,6 +70,8 @@ get_rand_value(Distribution &&dist, Generator &&gen) * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> @@ -145,6 +130,8 @@ std::unique_ptr generate_random_matrix( * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename NonzeroDistribution, typename Engine, typename... MatrixArgs> @@ -206,6 +193,8 @@ std::unique_ptr generate_random_sparsity_matrix( * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> @@ -291,6 +280,8 @@ std::unique_ptr generate_random_triangular_matrix( * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> @@ -325,6 +316,8 @@ std::unique_ptr generate_random_lower_triangular_matrix( * @param engine a random engine * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType */ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> @@ -340,6 +333,53 @@ std::unique_ptr generate_random_upper_triangular_matrix( } +/** + * Generates a random square band matrix. + * + * @tparam MatrixType type of matrix to generate (matrix::Dense must implement + * the interface `ConvertibleTo`) + * @tparam ValueDistribution type of value distribution + * @tparam Engine type of random engine + * @tparam MatrixArgs the arguments from the matrix to be forwarded. + * + * @param size number of rows and columns + * @param lower_bandwidth number of nonzeros in each row left of the main + * diagonal + * @param upper_bandwidth number of nonzeros in each row right of the main + * diagonal + * @param value_dist distribution of matrix values + * @param engine a random engine + * @param exec executor where the matrix should be allocated + * @param args additional arguments for the matrix constructor + * + * @return the unique pointer of MatrixType + */ +template , typename ValueDistribution, + typename Engine, typename... MatrixArgs> +std::unique_ptr generate_random_band_matrix( + size_type size, size_type lower_bandwidth, size_type upper_bandwidth, + ValueDistribution &&value_dist, Engine &&engine, + std::shared_ptr exec, MatrixArgs &&... args) +{ + using value_type = typename MatrixType::value_type; + using index_type = typename MatrixType::index_type; + + matrix_data data{gko::dim<2>{size, size}, {}}; + for (size_type row = 0; row < size; ++row) { + for (size_type col = row < lower_bandwidth ? 0 : row - lower_bandwidth; + col <= std::min(row + upper_bandwidth, size - 1); col++) { + auto val = detail::get_rand_value(value_dist, engine); + data.nonzeros.emplace_back(row, col, val); + } + } + + // convert to the correct matrix type + auto result = MatrixType::create(exec, std::forward(args)...); + result->read(data); + return result; +} + + } // namespace test } // namespace gko diff --git a/core/test/utils/matrix_generator_test.cpp b/core/test/utils/matrix_generator_test.cpp index 8a585994dc0..687560a202b 100644 --- a/core/test/utils/matrix_generator_test.cpp +++ b/core/test/utils/matrix_generator_test.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -40,129 +40,204 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/test/utils.hpp" + + namespace { +template class MatrixGenerator : public ::testing::Test { protected: + using value_type = T; + using real_type = gko::remove_complex; + using mtx_type = gko::matrix::Dense; + MatrixGenerator() : exec(gko::ReferenceExecutor::create()), - mtx(gko::test::generate_random_matrix( - 500, 100, std::normal_distribution(50, 5), - std::normal_distribution(20.0, 5.0), std::ranlux48(42), + mtx(gko::test::generate_random_matrix( + 500, 100, std::normal_distribution(50, 5), + std::normal_distribution(20.0, 5.0), std::ranlux48(42), exec)), - l_mtx(gko::test::generate_random_lower_triangular_matrix( - 4, 3, true, std::normal_distribution(50, 5), - std::normal_distribution(20.0, 5.0), std::ranlux48(42), + l_mtx(gko::test::generate_random_lower_triangular_matrix( + 4, 3, true, std::normal_distribution(50, 5), + std::normal_distribution(20.0, 5.0), std::ranlux48(42), exec)), - u_mtx(gko::test::generate_random_upper_triangular_matrix( - 3, 4, true, std::normal_distribution(50, 5), - std::normal_distribution(20.0, 5.0), std::ranlux48(42), + u_mtx(gko::test::generate_random_upper_triangular_matrix( + 3, 4, true, std::normal_distribution(50, 5), + std::normal_distribution(20.0, 5.0), std::ranlux48(42), + exec)), + lower_bandwidth(2), + upper_bandwidth(3), + band_mtx(gko::test::generate_random_band_matrix( + 100, lower_bandwidth, upper_bandwidth, + std::normal_distribution(20.0, 5.0), std::ranlux48(42), exec)), nnz_per_row_sample(500, 0), - values_sample(0) + values_sample(0), + band_values_sample(0) { // collect samples of nnz/row and values from the matrix for (int row = 0; row < mtx->get_size()[0]; ++row) { for (int col = 0; col < mtx->get_size()[1]; ++col) { auto val = mtx->at(row, col); - if (val != 0.0) { + if (val != gko::zero()) { ++nnz_per_row_sample[row]; values_sample.push_back(val); } } } + + // collect samples of values from the band matrix + for (int row = 0; row < band_mtx->get_size()[0]; ++row) { + for (int col = 0; col < band_mtx->get_size()[1]; ++col) { + auto val = band_mtx->at(row, col); + if ((col - row <= upper_bandwidth) && + (row - col <= lower_bandwidth)) { + band_values_sample.push_back(val); + } + } + } } std::shared_ptr exec; - std::unique_ptr> mtx; - std::unique_ptr> l_mtx; - std::unique_ptr> u_mtx; + int lower_bandwidth; + int upper_bandwidth; + std::unique_ptr mtx; + std::unique_ptr l_mtx; + std::unique_ptr u_mtx; + std::unique_ptr band_mtx; std::vector nnz_per_row_sample; - std::vector values_sample; + std::vector values_sample; + std::vector band_values_sample; + - template + template ValueType get_nth_moment(int n, ValueType c, InputIterator sample_start, - InputIterator sample_end) + InputIterator sample_end, Closure closure_op) { using std::pow; ValueType res = 0; ValueType num_elems = 0; while (sample_start != sample_end) { auto tmp = *(sample_start++); - res += pow(tmp - c, n); + res += pow(closure_op(tmp) - c, n); num_elems += 1; } return res / num_elems; } + + template + void check_average_and_deviation( + InputIterator sample_start, InputIterator sample_end, + gko::remove_complex average_ans, + gko::remove_complex deviation_ans, Closure closure_op) + { + auto average = + this->get_nth_moment(1, gko::zero>(), + sample_start, sample_end, closure_op); + auto deviation = sqrt(this->get_nth_moment(2, average, sample_start, + sample_end, closure_op)); + + // check that average & deviation is within 10% of the required amount + ASSERT_NEAR(average, average_ans, average_ans * 0.1); + ASSERT_NEAR(deviation, deviation_ans, deviation_ans * 0.1); + } }; +TYPED_TEST_SUITE(MatrixGenerator, gko::test::ValueTypes); + -TEST_F(MatrixGenerator, OutputHasCorrectSize) +TYPED_TEST(MatrixGenerator, OutputHasCorrectSize) { - ASSERT_EQ(mtx->get_size(), gko::dim<2>(500, 100)); + ASSERT_EQ(this->mtx->get_size(), gko::dim<2>(500, 100)); } -TEST_F(MatrixGenerator, OutputHasCorrectNonzeroAverageAndDeviation) +TYPED_TEST(MatrixGenerator, OutputHasCorrectNonzeroAverageAndDeviation) { - using std::sqrt; - auto average = get_nth_moment(1, 0.0, begin(nnz_per_row_sample), - end(nnz_per_row_sample)); - auto deviation = sqrt(get_nth_moment(2, average, begin(nnz_per_row_sample), - end(nnz_per_row_sample))); - - // check that average & deviation is within 10% of the required amount - ASSERT_NEAR(average, 50.0, 5); - ASSERT_NEAR(deviation, 5.0, 0.5); + using T = typename TestFixture::value_type; + // the nonzeros only needs to check the real part + this->template check_average_and_deviation( + begin(this->nnz_per_row_sample), end(this->nnz_per_row_sample), 50.0, + 5.0, [](T val) { return gko::real(val); }); } -TEST_F(MatrixGenerator, OutputHasCorrectValuesAverageAndDeviation) +TYPED_TEST(MatrixGenerator, OutputHasCorrectValuesAverageAndDeviation) { - using std::sqrt; - auto average = - get_nth_moment(1, 0.0, begin(values_sample), end(values_sample)); - auto deviation = sqrt( - get_nth_moment(2, average, begin(values_sample), end(values_sample))); - - // check that average and deviation is within 10% of the required amount - ASSERT_NEAR(average, 20.0, 2.0); - ASSERT_NEAR(deviation, 5.0, 0.5); + using T = typename TestFixture::value_type; + // check the real part + this->template check_average_and_deviation( + begin(this->values_sample), end(this->values_sample), 20.0, 5.0, + [](T &val) { return gko::real(val); }); + // check the imag part when the type is complex + if (!std::is_same>::value) { + this->template check_average_and_deviation( + begin(this->values_sample), end(this->values_sample), 20.0, 5.0, + [](T &val) { return gko::imag(val); }); + } } -TEST_F(MatrixGenerator, CanGenerateLowerTriangularMatrixWithDiagonalOnes) +TYPED_TEST(MatrixGenerator, CanGenerateLowerTriangularMatrixWithDiagonalOnes) { - ASSERT_EQ(l_mtx->at(0, 0), 1.0); - ASSERT_EQ(l_mtx->at(0, 1), 0.0); - ASSERT_EQ(l_mtx->at(0, 2), 0.0); - ASSERT_NE(l_mtx->at(1, 0), 0.0); - ASSERT_EQ(l_mtx->at(1, 1), 1.0); - ASSERT_EQ(l_mtx->at(1, 2), 0.0); - ASSERT_NE(l_mtx->at(2, 0), 0.0); - ASSERT_NE(l_mtx->at(2, 1), 0.0); - ASSERT_EQ(l_mtx->at(2, 2), 1.0); - ASSERT_NE(l_mtx->at(3, 0), 0.0); - ASSERT_NE(l_mtx->at(3, 1), 0.0); - ASSERT_NE(l_mtx->at(3, 2), 0.0); + using T = typename TestFixture::value_type; + ASSERT_EQ(this->l_mtx->at(0, 0), T{1.0}); + ASSERT_EQ(this->l_mtx->at(0, 1), T{0.0}); + ASSERT_EQ(this->l_mtx->at(0, 2), T{0.0}); + ASSERT_NE(this->l_mtx->at(1, 0), T{0.0}); + ASSERT_EQ(this->l_mtx->at(1, 1), T{1.0}); + ASSERT_EQ(this->l_mtx->at(1, 2), T{0.0}); + ASSERT_NE(this->l_mtx->at(2, 0), T{0.0}); + ASSERT_NE(this->l_mtx->at(2, 1), T{0.0}); + ASSERT_EQ(this->l_mtx->at(2, 2), T{1.0}); + ASSERT_NE(this->l_mtx->at(3, 0), T{0.0}); + ASSERT_NE(this->l_mtx->at(3, 1), T{0.0}); + ASSERT_NE(this->l_mtx->at(3, 2), T{0.0}); } -TEST_F(MatrixGenerator, CanGenerateUpperTriangularMatrixWithDiagonalOnes) +TYPED_TEST(MatrixGenerator, CanGenerateUpperTriangularMatrixWithDiagonalOnes) { - ASSERT_EQ(u_mtx->at(0, 0), 1.0); - ASSERT_NE(u_mtx->at(0, 1), 0.0); - ASSERT_NE(u_mtx->at(0, 2), 0.0); - ASSERT_NE(u_mtx->at(0, 3), 0.0); - ASSERT_EQ(u_mtx->at(1, 0), 0.0); - ASSERT_EQ(u_mtx->at(1, 1), 1.0); - ASSERT_NE(u_mtx->at(1, 2), 0.0); - ASSERT_NE(u_mtx->at(1, 3), 0.0); - ASSERT_EQ(u_mtx->at(2, 0), 0.0); - ASSERT_EQ(u_mtx->at(2, 1), 0.0); - ASSERT_EQ(u_mtx->at(2, 2), 1.0); - ASSERT_NE(u_mtx->at(2, 3), 0.0); + using T = typename TestFixture::value_type; + ASSERT_EQ(this->u_mtx->at(0, 0), T{1.0}); + ASSERT_NE(this->u_mtx->at(0, 1), T{0.0}); + ASSERT_NE(this->u_mtx->at(0, 2), T{0.0}); + ASSERT_NE(this->u_mtx->at(0, 3), T{0.0}); + ASSERT_EQ(this->u_mtx->at(1, 0), T{0.0}); + ASSERT_EQ(this->u_mtx->at(1, 1), T{1.0}); + ASSERT_NE(this->u_mtx->at(1, 2), T{0.0}); + ASSERT_NE(this->u_mtx->at(1, 3), T{0.0}); + ASSERT_EQ(this->u_mtx->at(2, 0), T{0.0}); + ASSERT_EQ(this->u_mtx->at(2, 1), T{0.0}); + ASSERT_EQ(this->u_mtx->at(2, 2), T{1.0}); + ASSERT_NE(this->u_mtx->at(2, 3), T{0.0}); +} + + +TYPED_TEST(MatrixGenerator, CanGenerateBandMatrix) +{ + using T = typename TestFixture::value_type; + // the elements out of band are zero + for (int row = 0; row < this->band_mtx->get_size()[0]; row++) { + for (int col = 0; col < this->band_mtx->get_size()[1]; col++) { + if ((col - row > this->upper_bandwidth) || + (row - col > this->lower_bandwidth)) { + ASSERT_EQ(this->band_mtx->at(row, col), T{0.0}); + } + } + } + // check the real part of elements in band + this->template check_average_and_deviation( + begin(this->band_values_sample), end(this->band_values_sample), 20.0, + 5.0, [](T &val) { return gko::real(val); }); + // check the imag part when the type is complex + if (!std::is_same>::value) { + this->template check_average_and_deviation( + begin(this->band_values_sample), end(this->band_values_sample), + 20.0, 5.0, [](T &val) { return gko::imag(val); }); + } } diff --git a/core/test/utils/matrix_utils.hpp b/core/test/utils/matrix_utils.hpp new file mode 100644 index 00000000000..dc6586f07b7 --- /dev/null +++ b/core/test/utils/matrix_utils.hpp @@ -0,0 +1,158 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_TEST_UTILS_MATRIX_UTILS_HPP_ +#define GKO_CORE_TEST_UTILS_MATRIX_UTILS_HPP_ + + +#include +#include +#include + + +#include "core/test/utils/value_generator.hpp" + + +namespace gko { +namespace test { + + +/** + * Make a symmetric matrix + * + * @tparam ValueType valuetype of Dense matrix to process + * + * @param mtx the dense matrix + */ +template +void make_symmetric(matrix::Dense *mtx) +{ + GKO_ASSERT_IS_SQUARE_MATRIX(mtx); + auto mtx_host = + make_temporary_clone(mtx->get_executor()->get_master(), mtx); + + for (size_type i = 0; i < mtx_host->get_size()[0]; ++i) { + for (size_type j = i + 1; j < mtx_host->get_size()[1]; ++j) { + mtx_host->at(i, j) = mtx_host->at(j, i); + } + } +} + + +/** + * Make a hermitian matrix + * + * @tparam ValueType valuetype of Dense matrix to process + * + * @param mtx the dense matrix + */ +template +void make_hermitian(matrix::Dense *mtx) +{ + GKO_ASSERT_IS_SQUARE_MATRIX(mtx); + auto mtx_host = + make_temporary_clone(mtx->get_executor()->get_master(), mtx); + + for (size_type i = 0; i < mtx_host->get_size()[0]; ++i) { + for (size_type j = i + 1; j < mtx_host->get_size()[1]; ++j) { + mtx_host->at(i, j) = conj(mtx_host->at(j, i)); + } + mtx_host->at(i, i) = gko::real(mtx_host->at(i, i)); + } +} + + +/** + * Make a (strictly) diagonal dominant matrix. It will set the diag value from + * the summation among the absoulue value of the row's elements. When ratio is + * larger than 1, the result will be strictly diagonal dominant matrix except + * for the empty row. When ratio is 1, the result will be diagonal dominant + * matrix. + * + * @tparam ValueType valuetype of Dense matrix to process + * + * @param mtx the dense matrix + * @param ratio the scale to set the diagonal value. default is 1 and it must + * be larger than or equal to 1. + */ +template +void make_diag_dominant(matrix::Dense *mtx, + remove_complex ratio = 1.0) +{ + // To keep the diag dominant, the ratio should be larger than or equal to 1 + GKO_ASSERT_EQ(ratio >= 1.0, true); + auto mtx_host = + make_temporary_clone(mtx->get_executor()->get_master(), mtx); + + using std::abs; + for (size_type i = 0; i < mtx_host->get_size()[0]; ++i) { + auto sum = gko::zero(); + for (size_type j = 0; j < mtx_host->get_size()[1]; ++j) { + sum += abs(mtx_host->at(i, j)); + } + mtx_host->at(i, i) = sum * ratio; + } +} + + +/** + * Make a Hermitian postive definite matrix. + * + * @tparam ValueType valuetype of Dense matrix to process + * + * @param mtx the dense matrix + * @param ratio the ratio for make_diag_dominant. default is 1.001 and it must + * be larger than 1. + */ +template +void make_hpd(matrix::Dense *mtx, + remove_complex ratio = 1.001) +{ + GKO_ASSERT_IS_SQUARE_MATRIX(mtx); + // To get strictly diagonally dominant matrix, the ratio should be larger + // than 1. + GKO_ASSERT_EQ(ratio > 1.0, true); + + auto mtx_host = + make_temporary_clone(mtx->get_executor()->get_master(), mtx); + make_hermitian(mtx_host.get()); + // Construct strictly diagonally dominant matrix to ensure positive + // definite. In complex case, the diagonal is set as absolute value and is + // larger than 0, so it still gives positive definite. + make_diag_dominant(mtx_host.get(), ratio); +} + + +} // namespace test +} // namespace gko + +#endif // GKO_CORE_TEST_UTILS_MATRIX_UTILS_HPP_ diff --git a/core/test/utils/matrix_utils_test.cpp b/core/test/utils/matrix_utils_test.cpp new file mode 100644 index 00000000000..dea1b4a55d6 --- /dev/null +++ b/core/test/utils/matrix_utils_test.cpp @@ -0,0 +1,201 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/test/utils/matrix_utils.hpp" + + +#include +#include +#include + + +#include + + +#include "core/test/utils.hpp" +#include "core/test/utils/matrix_generator.hpp" + + +namespace { + + +template +class MatrixUtils : public ::testing::Test { +protected: + using value_type = T; + using real_type = gko::remove_complex; + using mtx_type = gko::matrix::Dense; + + MatrixUtils() + : exec(gko::ReferenceExecutor::create()), + mtx(gko::test::generate_random_matrix( + 500, 500, std::normal_distribution(50, 5), + std::normal_distribution(20.0, 5.0), std::ranlux48(42), + exec)), + unsquare_mtx(mtx_type::create(exec, gko::dim<2>(500, 100))) + {} + + std::shared_ptr exec; + std::unique_ptr mtx; + std::unique_ptr unsquare_mtx; +}; + +TYPED_TEST_SUITE(MatrixUtils, gko::test::ValueTypes); + + +TYPED_TEST(MatrixUtils, MakeSymmetricThrowsError) +{ + ASSERT_THROW(gko::test::make_symmetric(gko::lend(this->unsquare_mtx)), + gko::DimensionMismatch); +} + +TYPED_TEST(MatrixUtils, MakeHermitianThrowsError) +{ + ASSERT_THROW(gko::test::make_hermitian(gko::lend(this->unsquare_mtx)), + gko::DimensionMismatch); +} + + +TYPED_TEST(MatrixUtils, MakeDiagDominantThrowsError) +{ + ASSERT_THROW(gko::test::make_diag_dominant(gko::lend(this->mtx), 0.9), + gko::ValueMismatch); +} + + +TYPED_TEST(MatrixUtils, MakeHpdMatrixThrowsError) +{ + ASSERT_THROW(gko::test::make_hpd(gko::lend(this->mtx), 1.0), + gko::ValueMismatch); +} + + +TYPED_TEST(MatrixUtils, MakeSymmetricCorrectly) +{ + gko::test::make_symmetric(gko::lend(this->mtx)); + + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + for (gko::size_type j = 0; j <= i; j++) { + ASSERT_EQ(this->mtx->at(i, j), this->mtx->at(j, i)); + } + } +} + + +TYPED_TEST(MatrixUtils, MakeHermitianCorrectly) +{ + gko::test::make_hermitian(gko::lend(this->mtx)); + + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + for (gko::size_type j = 0; j <= i; j++) { + ASSERT_EQ(this->mtx->at(i, j), gko::conj(this->mtx->at(j, i))); + } + } +} + + +TYPED_TEST(MatrixUtils, MakeDiagDominantCorrectly) +{ + using T = typename TestFixture::value_type; + // make_diag_dominant also consider diag value. + // To check the ratio easily, set the diag zeros + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + this->mtx->at(i, i) = 0; + } + + gko::test::make_diag_dominant(gko::lend(this->mtx)); + + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + gko::remove_complex off_diag_abs = 0; + for (gko::size_type j = 0; j < this->mtx->get_size()[1]; j++) { + if (j != i) { + off_diag_abs += std::abs(this->mtx->at(i, j)); + } + } + ASSERT_NEAR(gko::real(this->mtx->at(i, i)), off_diag_abs, r::value); + } +} + + +TYPED_TEST(MatrixUtils, MakeDiagDominantWithRatioCorrectly) +{ + using T = typename TestFixture::value_type; + gko::remove_complex ratio = 1.001; + // make_diag_dominant also consider diag value. + // To check the ratio easily, set the diag zeros + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + this->mtx->at(i, i) = 0; + } + + gko::test::make_diag_dominant(gko::lend(this->mtx), ratio); + + for (gko::size_type i = 0; i < this->mtx->get_size()[0]; i++) { + gko::remove_complex off_diag_abs = 0; + for (gko::size_type j = 0; j < this->mtx->get_size()[1]; j++) { + if (j != i) { + off_diag_abs += std::abs(this->mtx->at(i, j)); + } + } + ASSERT_NEAR(gko::real(this->mtx->at(i, i)), off_diag_abs * ratio, + r::value); + } +} + + +TYPED_TEST(MatrixUtils, MakeHpdMatrixCorrectly) +{ + using T = typename TestFixture::value_type; + auto cpy_mtx = this->mtx->clone(); + + gko::test::make_hpd(gko::lend(this->mtx)); + gko::test::make_hermitian(gko::lend(cpy_mtx)); + gko::test::make_diag_dominant(gko::lend(cpy_mtx), 1.001); + + GKO_ASSERT_MTX_NEAR(this->mtx, cpy_mtx, r::value); +} + + +TYPED_TEST(MatrixUtils, MakeHpdMatrixWithRatioCorrectly) +{ + using T = typename TestFixture::value_type; + gko::remove_complex ratio = 1.00001; + auto cpy_mtx = this->mtx->clone(); + + gko::test::make_hpd(gko::lend(this->mtx), ratio); + gko::test::make_hermitian(gko::lend(cpy_mtx)); + gko::test::make_diag_dominant(gko::lend(cpy_mtx), ratio); + + GKO_ASSERT_MTX_NEAR(this->mtx, cpy_mtx, r::value); +} + + +} // namespace diff --git a/core/test/utils/unsort_matrix.hpp b/core/test/utils/unsort_matrix.hpp new file mode 100644 index 00000000000..b8064dfbade --- /dev/null +++ b/core/test/utils/unsort_matrix.hpp @@ -0,0 +1,130 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_TEST_UTILS_UNSORT_MATRIX_HPP_ +#define GKO_CORE_TEST_UTILS_UNSORT_MATRIX_HPP_ + + +#include +#include + + +#include +#include +#include + + +#include "core/base/iterator_factory.hpp" + + +namespace gko { +namespace test { + + +// Plan for now: shuffle values and column indices to unsort the given matrix +// without changing the represented matrix. +template +void unsort_matrix(matrix::Csr *mtx, + RandomEngine &&engine) +{ + using value_type = ValueType; + using index_type = IndexType; + auto size = mtx->get_size(); + if (mtx->get_num_stored_elements() <= 0) { + return; + } + const auto &exec = mtx->get_executor(); + const auto &master = exec->get_master(); + + // If exec is not the master/host, extract the master and perform the + // unsorting there, followed by copying it back + if (exec != master) { + auto h_mtx = mtx->clone(master); + unsort_matrix(lend(h_mtx), engine); + mtx->copy_from(lend(h_mtx)); + return; + } + + auto vals = mtx->get_values(); + auto row_ptrs = mtx->get_row_ptrs(); + auto cols = mtx->get_col_idxs(); + + for (index_type row = 0; row < size[0]; ++row) { + auto start = row_ptrs[row]; + auto end = row_ptrs[row + 1]; + auto sort_wrapper = gko::detail::IteratorFactory( + cols + start, vals + start, end - start); + std::shuffle(sort_wrapper.begin(), sort_wrapper.end(), engine); + } +} + + +// Plan for now: shuffle values and column indices to unsort the given matrix +// without changing the represented matrix. +template +void unsort_matrix(matrix::Coo *mtx, + RandomEngine &&engine) +{ + using value_type = ValueType; + using index_type = IndexType; + auto nnz = mtx->get_num_stored_elements(); + if (nnz <= 0) { + return; + } + + const auto &exec = mtx->get_executor(); + const auto &master = exec->get_master(); + + // If exec is not the master/host, extract the master and perform the + // unsorting there, followed by copying it back + if (exec != master) { + auto h_mtx = mtx->clone(master); + unsort_matrix(lend(h_mtx), engine); + mtx->copy_from(lend(h_mtx)); + return; + } + matrix_data data; + mtx->write(data); + auto &nonzeros = data.nonzeros; + using nz_type = typename decltype(data)::nonzero_type; + + std::shuffle(nonzeros.begin(), nonzeros.end(), engine); + std::stable_sort(nonzeros.begin(), nonzeros.end(), + [](nz_type a, nz_type b) { return a.row < b.row; }); + mtx->read(data); +} + + +} // namespace test +} // namespace gko + +#endif // GKO_CORE_TEST_UTILS_UNSORT_MATRIX_HPP_ diff --git a/core/test/utils/unsort_matrix_test.cpp b/core/test/utils/unsort_matrix_test.cpp new file mode 100644 index 00000000000..90fbbc2ccf2 --- /dev/null +++ b/core/test/utils/unsort_matrix_test.cpp @@ -0,0 +1,207 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/test/utils/unsort_matrix.hpp" + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class UnsortMatrix : public ::testing::Test { +protected: + using value_type = + typename std::tuple_element<0, decltype(ValueIndexType())>::type; + using index_type = + typename std::tuple_element<1, decltype(ValueIndexType())>::type; + using Csr = gko::matrix::Csr; + using Coo = gko::matrix::Coo; + using Dense = gko::matrix::Dense; + UnsortMatrix() + : exec(gko::ReferenceExecutor::create()), + rand_engine(42), + csr_empty(Csr::create(exec, gko::dim<2>(0, 0))), + coo_empty(Coo::create(exec, gko::dim<2>(0, 0))) + {} + /* + Matrix used for both CSR and COO: + 1, 2, 0, 0, 0 + 0, 0, 0, 0, 0 + 3, 4, 5, 6, 0 + 0, 0, 7, 0, 0 + 0, 0, 8, 9, 10 + */ + std::unique_ptr get_sorted_csr() + { + return Csr::create(exec, gko::dim<2>{5, 5}, + I{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + I{0, 1, 0, 1, 2, 3, 2, 2, 3, 4}, + I{0, 2, 2, 6, 7, 10}); + } + + std::unique_ptr get_sorted_coo() + { + return Coo::create(exec, gko::dim<2>{5, 5}, + I{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + I{0, 1, 0, 1, 2, 3, 2, 2, 3, 4}, + I{0, 0, 2, 2, 2, 2, 3, 4, 4, 4}); + } + + bool is_coo_matrix_sorted(Coo *mtx) + { + auto rows = mtx->get_const_row_idxs(); + auto cols = mtx->get_const_col_idxs(); + auto nnz = mtx->get_num_stored_elements(); + + if (nnz <= 0) { + return true; + } + + auto prev_row = rows[0]; + auto prev_col = cols[0]; + for (index_type i = 0; i < nnz; ++i) { + auto cur_row = rows[i]; + auto cur_col = cols[i]; + if (prev_row == cur_row && prev_col > cur_col) { + return false; + } + prev_row = cur_row; + prev_col = cur_col; + } + return true; + } + + bool is_csr_matrix_sorted(Csr *mtx) + { + auto size = mtx->get_size(); + auto rows = mtx->get_const_row_ptrs(); + auto cols = mtx->get_const_col_idxs(); + auto nnz = mtx->get_num_stored_elements(); + + if (nnz <= 0) { + return true; + } + + for (index_type row = 0; row < size[1]; ++row) { + auto prev_col = cols[rows[row]]; + for (index_type i = rows[row]; i < rows[row + 1]; ++i) { + auto cur_col = cols[i]; + if (prev_col > cur_col) { + return false; + } + prev_col = cur_col; + } + } + return true; + } + + std::shared_ptr exec; + std::ranlux48 rand_engine; + std::unique_ptr csr_empty; + std::unique_ptr coo_empty; +}; + +TYPED_TEST_SUITE(UnsortMatrix, gko::test::ValueIndexTypes); + + +TYPED_TEST(UnsortMatrix, CsrWorks) +{ + auto csr = this->get_sorted_csr(); + const auto ref_mtx = this->get_sorted_csr(); + bool was_sorted = this->is_csr_matrix_sorted(gko::lend(csr)); + + gko::test::unsort_matrix(gko::lend(csr), this->rand_engine); + + ASSERT_FALSE(this->is_csr_matrix_sorted(gko::lend(csr))); + ASSERT_TRUE(was_sorted); + GKO_ASSERT_MTX_NEAR(csr, ref_mtx, 0.); +} + + +TYPED_TEST(UnsortMatrix, CsrWorksWithEmpty) +{ + const bool was_sorted = + this->is_csr_matrix_sorted(gko::lend(this->csr_empty)); + + gko::test::unsort_matrix(gko::lend(this->csr_empty), this->rand_engine); + + ASSERT_TRUE(was_sorted); + ASSERT_EQ(this->csr_empty->get_num_stored_elements(), 0); +} + + +TYPED_TEST(UnsortMatrix, CooWorks) +{ + auto coo = this->get_sorted_coo(); + const auto ref_mtx = this->get_sorted_coo(); + const bool was_sorted = this->is_coo_matrix_sorted(gko::lend(coo)); + + gko::test::unsort_matrix(gko::lend(coo), this->rand_engine); + + ASSERT_FALSE(this->is_coo_matrix_sorted(gko::lend(coo))); + ASSERT_TRUE(was_sorted); + GKO_ASSERT_MTX_NEAR(coo, ref_mtx, 0.); +} + + +TYPED_TEST(UnsortMatrix, CooWorksWithEmpty) +{ + const bool was_sorted = + this->is_coo_matrix_sorted(gko::lend(this->coo_empty)); + + gko::test::unsort_matrix(gko::lend(this->coo_empty), this->rand_engine); + + ASSERT_TRUE(was_sorted); + ASSERT_EQ(this->coo_empty->get_num_stored_elements(), 0); +} + + +} // namespace diff --git a/core/test/utils/value_generator.hpp b/core/test/utils/value_generator.hpp new file mode 100644 index 00000000000..8791bf6ce01 --- /dev/null +++ b/core/test/utils/value_generator.hpp @@ -0,0 +1,85 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_TEST_UTILS_VALUE_GENERATOR_HPP_ +#define GKO_CORE_TEST_UTILS_VALUE_GENERATOR_HPP_ + + +#include +#include + + +#include + + +namespace gko { +namespace test { +namespace detail { + + +/** + * Generate a random value. + * + * @tparam ValueType valuetype of the value + * @tparam ValueDistribution type of value distribution + * @tparam Engine type of random engine + * + * @param value_dist distribution of array values + * @param engine a random engine + * + * @return ValueType + */ +template +typename std::enable_if::value, ValueType>::type +get_rand_value(ValueDistribution &&value_dist, Engine &&gen) +{ + return value_dist(gen); +} + +/** + * Specialization for complex types. + * + * @copydoc get_rand_value + */ +template +typename std::enable_if::value, ValueType>::type +get_rand_value(ValueDistribution &&value_dist, Engine &&gen) +{ + return ValueType(value_dist(gen), value_dist(gen)); +} + + +} // namespace detail +} // namespace test +} // namespace gko + +#endif // GKO_CORE_TEST_UTILS_VALUE_GENERATOR_HPP_ diff --git a/core/test/utils/value_generator_test.cpp b/core/test/utils/value_generator_test.cpp new file mode 100644 index 00000000000..58f033404a9 --- /dev/null +++ b/core/test/utils/value_generator_test.cpp @@ -0,0 +1,118 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/test/utils/value_generator.hpp" + + +#include +#include +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class ValueGenerator : public ::testing::Test { +protected: + using value_type = T; + + ValueGenerator() {} + + template + ValueType get_nth_moment(int n, ValueType c, InputIterator sample_start, + InputIterator sample_end, Closure closure_op) + { + using std::pow; + ValueType res = 0; + ValueType num_elems = 0; + while (sample_start != sample_end) { + auto tmp = *(sample_start++); + res += pow(closure_op(tmp) - c, n); + num_elems += 1; + } + return res / num_elems; + } + + template + void check_average_and_deviation( + InputIterator sample_start, InputIterator sample_end, + gko::remove_complex average_ans, + gko::remove_complex deviation_ans, Closure closure_op) + { + auto average = + this->get_nth_moment(1, gko::zero>(), + sample_start, sample_end, closure_op); + auto deviation = sqrt(this->get_nth_moment(2, average, sample_start, + sample_end, closure_op)); + + // check that average & deviation is within 10% of the required amount + ASSERT_NEAR(average, average_ans, average_ans * 0.1); + ASSERT_NEAR(deviation, deviation_ans, deviation_ans * 0.1); + } +}; + +TYPED_TEST_SUITE(ValueGenerator, gko::test::ValueTypes); + + +TYPED_TEST(ValueGenerator, OutputHasCorrectAverageAndDeviation) +{ + using T = typename TestFixture::value_type; + int num = 500; + std::vector values(num); + auto dist = std::normal_distribution(20.0, 5.0); + auto engine = std::ranlux48(42); + + for (int i = 0; i < num; i++) { + values.at(i) = gko::test::detail::get_rand_value(dist, engine); + } + + // check the real part + this->template check_average_and_deviation( + begin(values), end(values), 20.0, 5.0, + [](T &val) { return gko::real(val); }); + // check the imag part when the type is complex + if (!std::is_same>::value) { + this->template check_average_and_deviation( + begin(values), end(values), 20.0, 5.0, + [](T &val) { return gko::imag(val); }); + } +} + + +} // namespace diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index 52560f7be07..7fa32610a40 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -16,13 +16,6 @@ if(MSVC) if("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" STREQUAL "") set(CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES "${CMAKE_CUDA_ROOT_DIR}/lib/x64") endif() - - # This is modified from https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace - if(BUILD_SHARED_LIBS) - ginkgo_switch_to_windows_dynamic("CUDA") - else() - ginkgo_switch_to_windows_static("CUDA") - endif() endif() include(CudaArchitectureSelector) @@ -35,6 +28,11 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}" PARENT_SCOPE) set(CMAKE_CUDA_COMPILER_VERSION ${CMAKE_CUDA_COMPILER_VERSION} PARENT_SCOPE) set(CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS} PARENT_SCOPE) +# Detect the CUDA architecture flags and propagate to all the project +cas_variable_cuda_architectures(GINKGO_CUDA_ARCH_FLAGS + ARCHITECTURES ${GINKGO_CUDA_ARCHITECTURES} + UNSUPPORTED "20" "21") +set(GINKGO_CUDA_ARCH_FLAGS "${GINKGO_CUDA_ARCH_FLAGS}" PARENT_SCOPE) # MSVC nvcc uses static cudartlibrary by default, and other platforms use shared cudartlibrary. # add `-cudart shared` or `-cudart=shared` according system into CMAKE_CUDA_FLAGS @@ -63,24 +61,42 @@ find_library(CUBLAS cublas HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} NO_DEFAULT_PATH) find_library(CUSPARSE cusparse HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +find_library(CURAND curand + HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) add_library(ginkgo_cuda $ "") +set(GKO_CUDA_COMMON_SOURCES + ../common/unified/components/precision_conversion.cpp + ../common/unified/matrix/coo_kernels.cpp + ../common/unified/matrix/csr_kernels.cpp + ../common/unified/matrix/dense_kernels.cpp + ../common/unified/matrix/diagonal_kernels.cpp + ../common/unified/preconditioner/jacobi_kernels.cpp + ../common/unified/solver/bicg_kernels.cpp + ../common/unified/solver/bicgstab_kernels.cpp + ../common/unified/solver/cg_kernels.cpp + ../common/unified/solver/cgs_kernels.cpp + ../common/unified/solver/fcg_kernels.cpp + ../common/unified/solver/ir_kernels.cpp + ) target_sources(ginkgo_cuda PRIVATE base/exception.cpp base/executor.cpp base/version.cpp + components/absolute_array.cu components/fill_array.cu - components/precision_conversion.cu components/prefix_sum.cu - factorization/ilu_kernels.cu factorization/factorization_kernels.cu + factorization/ic_kernels.cu + factorization/ilu_kernels.cu + factorization/par_ic_kernels.cu factorization/par_ict_kernels.cu factorization/par_ilu_kernels.cu factorization/par_ilut_approx_filter_kernel.cu factorization/par_ilut_filter_kernel.cu - factorization/par_ilut_select_kernel.cu factorization/par_ilut_select_common.cu + factorization/par_ilut_select_kernel.cu factorization/par_ilut_spgeam_kernel.cu factorization/par_ilut_sweep_kernel.cu matrix/coo_kernels.cu @@ -88,34 +104,46 @@ target_sources(ginkgo_cuda matrix/dense_kernels.cu matrix/diagonal_kernels.cu matrix/ell_kernels.cu + matrix/fbcsr_kernels.cu matrix/hybrid_kernels.cu matrix/sellp_kernels.cu matrix/sparsity_csr_kernels.cu + multigrid/amgx_pgm_kernels.cu preconditioner/isai_kernels.cu preconditioner/jacobi_advanced_apply_kernel.cu preconditioner/jacobi_generate_kernel.cu preconditioner/jacobi_kernels.cu preconditioner/jacobi_simple_apply_kernel.cu - solver/bicg_kernels.cu - solver/bicgstab_kernels.cu - solver/cg_kernels.cu - solver/cgs_kernels.cu - solver/fcg_kernels.cu + reorder/rcm_kernels.cu solver/gmres_kernels.cu - solver/ir_kernels.cu + solver/cb_gmres_kernels.cu + solver/idr_kernels.cu solver/lower_trs_kernels.cu solver/upper_trs_kernels.cu stop/criterion_kernels.cu - stop/residual_norm_kernels.cu) - -# This creates a compilation bug on nvcc 9.0.102 *with* the new array_deleter -# merged at commit ed12b3df5d26, and the parameter is not recognized by clang-cuda -if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND - NOT CMAKE_CUDA_COMPILER_VERSION MATCHES "9.0") + stop/residual_norm_kernels.cu + ${GKO_CUDA_COMMON_SOURCES} + ) +# override the default language mapping for the common files, set them to CUDA +foreach(source_file IN LISTS GKO_CUDA_COMMON_SOURCES) + set_source_files_properties(${source_file} PROPERTIES LANGUAGE CUDA) +endforeach(source_file) + +if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") # remove false positive CUDA warnings when calling one() and zero() + # and allows the usage of std::array for nvidia GPUs target_compile_options(ginkgo_cuda PRIVATE $<$:--expt-relaxed-constexpr>) + if(MSVC) + target_compile_options(ginkgo_cuda + PRIVATE + $<$:--extended-lambda>) + else() + target_compile_options(ginkgo_cuda + PRIVATE + $<$:--expt-extended-lambda>) + endif() endif() if (NOT CMAKE_CUDA_HOST_COMPILER AND NOT GINKGO_CUDA_DEFAULT_HOST_COMPILER) @@ -145,22 +173,25 @@ endif() target_compile_options(ginkgo_cuda PRIVATE $<$:${GINKGO_CUDA_COMPILER_FLAGS}>) target_compile_options(ginkgo_cuda PRIVATE $<$:${GINKGO_COMPILER_FLAGS}>) ginkgo_compile_features(ginkgo_cuda) +target_compile_definitions(ginkgo_cuda PRIVATE GKO_COMPILING_CUDA) target_include_directories(ginkgo_cuda SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS}) -target_link_libraries(ginkgo_cuda PRIVATE ${CUDA_RUNTIME_LIBS} ${CUBLAS} ${CUSPARSE}) - -# Need to link against ginkgo_hip for the `raw_copy_to(HipExecutor ...)` method -target_link_libraries(ginkgo_cuda PUBLIC ginkgo_hip) - -cas_target_cuda_architectures(ginkgo_cuda - ARCHITECTURES ${GINKGO_CUDA_ARCHITECTURES} - UNSUPPORTED "20" "21") +target_link_libraries(ginkgo_cuda PRIVATE ${CUDA_RUNTIME_LIBS} ${CUBLAS} ${CUSPARSE} ${CURAND}) +target_link_libraries(ginkgo_cuda PUBLIC ginkgo_device) +target_compile_options(ginkgo_cuda + PRIVATE "$<$:${GINKGO_CUDA_ARCH_FLAGS}>") +# we handle CUDA architecture flags for now, disable CMake handling +if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) + set_target_properties(ginkgo_cuda PROPERTIES CUDA_ARCHITECTURES OFF) +endif() +list(GET CUDA_RUNTIME_LIBS 0 CUDA_FIRST_LIB) +get_filename_component(GKO_CUDA_LIBDIR "${CUDA_FIRST_LIB}" DIRECTORY) ginkgo_default_includes(ginkgo_cuda) -ginkgo_install_library(ginkgo_cuda cuda) +ginkgo_install_library(ginkgo_cuda "${GKO_CUDA_LIBDIR}") if (GINKGO_CHECK_CIRCULAR_DEPS) - ginkgo_check_headers(ginkgo_cuda) + ginkgo_check_headers(ginkgo_cuda GKO_COMPILING_CUDA) endif() if(GINKGO_BUILD_TESTS) diff --git a/cuda/base/config.hpp b/cuda/base/config.hpp index cd69b6a2c56..3aabff70d38 100644 --- a/cuda/base/config.hpp +++ b/cuda/base/config.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/base/cublas_bindings.hpp b/cuda/base/cublas_bindings.hpp index 72a67d958e9..6dfb383ca39 100644 --- a/cuda/base/cublas_bindings.hpp +++ b/cuda/base/cublas_bindings.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -206,14 +206,37 @@ GKO_BIND_CUBLAS_AXPY(ValueType, detail::not_implemented); GKO_BIND_CUBLAS_DOT(float, cublasSdot); GKO_BIND_CUBLAS_DOT(double, cublasDdot); -GKO_BIND_CUBLAS_DOT(std::complex, cublasCdotc); -GKO_BIND_CUBLAS_DOT(std::complex, cublasZdotc); +GKO_BIND_CUBLAS_DOT(std::complex, cublasCdotu); +GKO_BIND_CUBLAS_DOT(std::complex, cublasZdotu); template GKO_BIND_CUBLAS_DOT(ValueType, detail::not_implemented); #undef GKO_BIND_CUBLAS_DOT +#define GKO_BIND_CUBLAS_CONJ_DOT(ValueType, CublasName) \ + inline void conj_dot(cublasHandle_t handle, int n, const ValueType *x, \ + int incx, const ValueType *y, int incy, \ + ValueType *result) \ + { \ + GKO_ASSERT_NO_CUBLAS_ERRORS(CublasName(handle, n, as_culibs_type(x), \ + incx, as_culibs_type(y), incy, \ + as_culibs_type(result))); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +GKO_BIND_CUBLAS_CONJ_DOT(float, cublasSdot); +GKO_BIND_CUBLAS_CONJ_DOT(double, cublasDdot); +GKO_BIND_CUBLAS_CONJ_DOT(std::complex, cublasCdotc); +GKO_BIND_CUBLAS_CONJ_DOT(std::complex, cublasZdotc); +template +GKO_BIND_CUBLAS_CONJ_DOT(ValueType, detail::not_implemented); + +#undef GKO_BIND_CUBLAS_CONJ_DOT + + #define GKO_BIND_CUBLAS_NORM2(ValueType, CublasName) \ inline void norm2(cublasHandle_t handle, int n, const ValueType *x, \ int incx, remove_complex *result) \ diff --git a/cuda/base/curand_bindings.hpp b/cuda/base/curand_bindings.hpp new file mode 100644 index 00000000000..9df87b230ef --- /dev/null +++ b/cuda/base/curand_bindings.hpp @@ -0,0 +1,113 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CUDA_BASE_CURAND_BINDINGS_HPP_ +#define GKO_CUDA_BASE_CURAND_BINDINGS_HPP_ + + +#include + + +#include + + +#include "cuda/base/math.hpp" +#include "cuda/base/types.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The CURAND namespace. + * + * @ingroup curand + */ +namespace curand { + + +template +struct is_supported : std::false_type {}; + +template <> +struct is_supported : std::true_type {}; + +template <> +struct is_supported : std::true_type {}; + +template <> +struct is_supported> : std::true_type {}; + +template <> +struct is_supported> : std::true_type {}; + + +inline curandGenerator_t rand_generator(int64 seed, + curandRngType generator_type) +{ + curandGenerator_t gen; + curandCreateGenerator(&gen, generator_type); + curandSetPseudoRandomGeneratorSeed(gen, seed); + return gen; +} + + +#define GKO_BIND_CURAND_RANDOM_VECTOR(ValueType, CurandName) \ + inline void rand_vector( \ + curandGenerator_t &gen, int n, remove_complex mean, \ + remove_complex stddev, ValueType *values) \ + { \ + n = is_complex() ? 2 * n : n; \ + GKO_ASSERT_NO_CURAND_ERRORS(CurandName( \ + gen, reinterpret_cast *>(values), n, \ + mean, stddev)); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +GKO_BIND_CURAND_RANDOM_VECTOR(float, curandGenerateNormal); +GKO_BIND_CURAND_RANDOM_VECTOR(double, curandGenerateNormalDouble); +GKO_BIND_CURAND_RANDOM_VECTOR(std::complex, curandGenerateNormal); +GKO_BIND_CURAND_RANDOM_VECTOR(std::complex, curandGenerateNormalDouble); + + +#undef GKO_BIND_CURAND_RANDOM_VECTOR + + +} // namespace curand +} // namespace cuda +} // namespace kernels +} // namespace gko + + +#endif // GKO_CUDA_BASE_CURAND_BINDINGS_HPP_ diff --git a/cuda/base/cusparse_bindings.hpp b/cuda/base/cusparse_bindings.hpp index 0cbb962bb15..8a3de85293b 100644 --- a/cuda/base/cusparse_bindings.hpp +++ b/cuda/base/cusparse_bindings.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -903,6 +903,20 @@ inline void destroy(csrilu02Info_t info) } +inline csric02Info_t create_ic0_info() +{ + csric02Info_t info{}; + GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCreateCsric02Info(&info)); + return info; +} + + +inline void destroy(csric02Info_t info) +{ + GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyCsric02Info(info)); +} + + // CUDA versions 9.2 and above have csrsm2. #if (defined(CUDA_VERSION) && (CUDA_VERSION >= 9020)) @@ -1352,6 +1366,104 @@ GKO_BIND_CUSPARSE_ILU0(std::complex, cusparseZcsrilu02); #undef GKO_BIND_CUSPARSE_ILU0 +template +void ic0_buffer_size(cusparseHandle_t handle, IndexType m, IndexType nnz, + const cusparseMatDescr_t descr, const ValueType *vals, + const IndexType *row_ptrs, const IndexType *col_idxs, + csric02Info_t info, + size_type &buffer_size) GKO_NOT_IMPLEMENTED; + +#define GKO_BIND_CUSPARSE_IC0_BUFFER_SIZE(ValueType, CusparseName) \ + template <> \ + inline void ic0_buffer_size( \ + cusparseHandle_t handle, int32 m, int32 nnz, \ + const cusparseMatDescr_t descr, const ValueType *vals, \ + const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ + size_type &buffer_size) \ + { \ + int tmp_buffer_size{}; \ + GKO_ASSERT_NO_CUSPARSE_ERRORS( \ + CusparseName(handle, m, nnz, descr, \ + as_culibs_type(const_cast(vals)), \ + row_ptrs, col_idxs, info, &tmp_buffer_size)); \ + buffer_size = tmp_buffer_size; \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +GKO_BIND_CUSPARSE_IC0_BUFFER_SIZE(float, cusparseScsric02_bufferSize); +GKO_BIND_CUSPARSE_IC0_BUFFER_SIZE(double, cusparseDcsric02_bufferSize); +GKO_BIND_CUSPARSE_IC0_BUFFER_SIZE(std::complex, + cusparseCcsric02_bufferSize); +GKO_BIND_CUSPARSE_IC0_BUFFER_SIZE(std::complex, + cusparseZcsric02_bufferSize); + +#undef GKO_BIND_CUSPARSE_IC0_BUFFER_SIZE + + +template +void ic0_analysis(cusparseHandle_t handle, IndexType m, IndexType nnz, + const cusparseMatDescr_t descr, const ValueType *vals, + const IndexType *row_ptrs, const IndexType *col_idxs, + csric02Info_t info, cusparseSolvePolicy_t policy, + void *buffer) GKO_NOT_IMPLEMENTED; + +#define GKO_BIND_CUSPARSE_IC0_ANALYSIS(ValueType, CusparseName) \ + template <> \ + inline void ic0_analysis( \ + cusparseHandle_t handle, int32 m, int32 nnz, \ + const cusparseMatDescr_t descr, const ValueType *vals, \ + const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ + cusparseSolvePolicy_t policy, void *buffer) \ + { \ + GKO_ASSERT_NO_CUSPARSE_ERRORS( \ + CusparseName(handle, m, nnz, descr, as_culibs_type(vals), \ + row_ptrs, col_idxs, info, policy, buffer)); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +GKO_BIND_CUSPARSE_IC0_ANALYSIS(float, cusparseScsric02_analysis); +GKO_BIND_CUSPARSE_IC0_ANALYSIS(double, cusparseDcsric02_analysis); +GKO_BIND_CUSPARSE_IC0_ANALYSIS(std::complex, cusparseCcsric02_analysis); +GKO_BIND_CUSPARSE_IC0_ANALYSIS(std::complex, cusparseZcsric02_analysis); + +#undef GKO_BIND_CUSPARSE_ILU0_ANALYSIS + + +template +void ic0(cusparseHandle_t handle, IndexType m, IndexType nnz, + const cusparseMatDescr_t descr, ValueType *vals, + const IndexType *row_ptrs, const IndexType *col_idxs, + csric02Info_t info, cusparseSolvePolicy_t policy, + void *buffer) GKO_NOT_IMPLEMENTED; + +#define GKO_BIND_CUSPARSE_IC0(ValueType, CusparseName) \ + template <> \ + inline void ic0( \ + cusparseHandle_t handle, int32 m, int32 nnz, \ + const cusparseMatDescr_t descr, ValueType *vals, \ + const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ + cusparseSolvePolicy_t policy, void *buffer) \ + { \ + GKO_ASSERT_NO_CUSPARSE_ERRORS( \ + CusparseName(handle, m, nnz, descr, as_culibs_type(vals), \ + row_ptrs, col_idxs, info, policy, buffer)); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +GKO_BIND_CUSPARSE_IC0(float, cusparseScsric02); +GKO_BIND_CUSPARSE_IC0(double, cusparseDcsric02); +GKO_BIND_CUSPARSE_IC0(std::complex, cusparseCcsric02); +GKO_BIND_CUSPARSE_IC0(std::complex, cusparseZcsric02); + +#undef GKO_BIND_CUSPARSE_IC0 + + } // namespace cusparse } // namespace cuda } // namespace kernels diff --git a/cuda/base/cusparse_handle.hpp b/cuda/base/cusparse_handle.hpp index 0f2733c0ed4..14858a2487d 100644 --- a/cuda/base/cusparse_handle.hpp +++ b/cuda/base/cusparse_handle.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/base/device_guard.hpp b/cuda/base/device_guard.hpp index aa347994327..63928671953 100644 --- a/cuda/base/device_guard.hpp +++ b/cuda/base/device_guard.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/base/exception.cpp b/cuda/base/exception.cpp index 93fcd5e7cfd..1a3bd3c7413 100644 --- a/cuda/base/exception.cpp +++ b/cuda/base/exception.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include @@ -78,6 +79,31 @@ std::string CublasError::get_error(int64 error_code) } +std::string CurandError::get_error(int64 error_code) +{ +#define GKO_REGISTER_CURAND_ERROR(error_name) \ + if (error_code == static_cast(error_name)) { \ + return #error_name; \ + } + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_SUCCESS); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_VERSION_MISMATCH); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_NOT_INITIALIZED); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_ALLOCATION_FAILED); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_TYPE_ERROR); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_OUT_OF_RANGE); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_LENGTH_NOT_MULTIPLE); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_DOUBLE_PRECISION_REQUIRED); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_LAUNCH_FAILURE); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_PREEXISTING_FAILURE); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_INITIALIZATION_FAILED); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_ARCH_MISMATCH); + GKO_REGISTER_CURAND_ERROR(CURAND_STATUS_INTERNAL_ERROR); + return "Unknown error"; + +#undef GKO_REGISTER_CURAND_ERROR +} + + std::string CusparseError::get_error(int64 error_code) { #define GKO_REGISTER_CUSPARSE_ERROR(error_name) \ diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 543e78131e0..b256cb93a2c 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -52,18 +53,24 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -#include "common/base/executor.hpp.inc" +#include "common/cuda_hip/base/executor.hpp.inc" std::shared_ptr CudaExecutor::create( - int device_id, std::shared_ptr master, bool device_reset) + int device_id, std::shared_ptr master, bool device_reset, + allocation_mode alloc_mode) { return std::shared_ptr( - new CudaExecutor(device_id, std::move(master), device_reset), + new CudaExecutor(device_id, std::move(master), device_reset, + alloc_mode), [device_id](CudaExecutor *exec) { + auto device_reset = exec->get_device_reset(); + std::lock_guard guard( + nvidia_device::get_mutex(device_id)); delete exec; - if (!CudaExecutor::get_num_execs(device_id) && - exec->get_device_reset()) { + auto &num_execs = nvidia_device::get_num_execs(device_id); + num_execs--; + if (!num_execs && device_reset) { cuda::device_guard g(device_id); cudaDeviceReset(); } @@ -71,6 +78,26 @@ std::shared_ptr CudaExecutor::create( } +void CudaExecutor::populate_exec_info(const MachineTopology *mach_topo) +{ + if (this->get_device_id() < this->get_num_devices() && + this->get_device_id() >= 0) { + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaDeviceGetPCIBusId(&(this->get_exec_info().pci_bus_id.front()), + 13, this->get_device_id())); + + auto cuda_hwloc_obj = + mach_topo->get_pci_device(this->get_exec_info().pci_bus_id); + if (cuda_hwloc_obj) { + this->get_exec_info().numa_node = cuda_hwloc_obj->closest_numa; + this->get_exec_info().closest_pu_ids = + cuda_hwloc_obj->closest_pu_ids; + } + } +} + + void OmpExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, const void *src_ptr, void *dest_ptr) const { @@ -89,9 +116,10 @@ void CudaExecutor::raw_free(void *ptr) const noexcept if (error_code != cudaSuccess) { #if GKO_VERBOSE_LEVEL >= 1 // Unfortunately, if memory free fails, there's not much we can do - std::cerr << "Unrecoverable CUDA error on device " << this->device_id_ - << " in " << __func__ << ": " << cudaGetErrorName(error_code) - << ": " << cudaGetErrorString(error_code) << std::endl + std::cerr << "Unrecoverable CUDA error on device " + << this->get_device_id() << " in " << __func__ << ": " + << cudaGetErrorName(error_code) << ": " + << cudaGetErrorString(error_code) << std::endl << "Exiting program" << std::endl; #endif // GKO_VERBOSE_LEVEL >= 1 std::exit(error_code); @@ -103,11 +131,17 @@ void *CudaExecutor::raw_alloc(size_type num_bytes) const { void *dev_ptr = nullptr; cuda::device_guard g(this->get_device_id()); -#ifdef NDEBUG - auto error_code = cudaMalloc(&dev_ptr, num_bytes); -#else - auto error_code = cudaMallocManaged(&dev_ptr, num_bytes); -#endif + int error_code = 0; + if (this->alloc_mode_ == allocation_mode::unified_host) { + error_code = cudaMallocManaged(&dev_ptr, num_bytes, cudaMemAttachHost); + } else if (this->alloc_mode_ == allocation_mode::unified_global) { + error_code = + cudaMallocManaged(&dev_ptr, num_bytes, cudaMemAttachGlobal); + } else if (this->alloc_mode_ == allocation_mode::device) { + error_code = cudaMalloc(&dev_ptr, num_bytes); + } else { + GKO_NOT_SUPPORTED(this->alloc_mode_); + } if (error_code != cudaErrorMemoryAllocation) { GKO_ASSERT_NO_CUDA_ERRORS(error_code); } @@ -127,31 +161,38 @@ void CudaExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, } -void CudaExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, +void CudaExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, const void *src_ptr, void *dest_ptr) const { +#if GINKGO_HIP_PLATFORM_NVCC == 1 if (num_bytes > 0) { cuda::device_guard g(this->get_device_id()); GKO_ASSERT_NO_CUDA_ERRORS( cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr, this->get_device_id(), num_bytes)); } +#else + GKO_NOT_SUPPORTED(dest); +#endif } -void CudaExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, +void CudaExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + GKO_NOT_SUPPORTED(dest); +} + + +void CudaExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, const void *src_ptr, void *dest_ptr) const { -#if GINKGO_HIP_PLATFORM_NVCC == 1 if (num_bytes > 0) { cuda::device_guard g(this->get_device_id()); GKO_ASSERT_NO_CUDA_ERRORS( cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr, this->get_device_id(), num_bytes)); } -#else - GKO_NOT_SUPPORTED(this); -#endif } @@ -186,24 +227,48 @@ int CudaExecutor::get_num_devices() void CudaExecutor::set_gpu_property() { - if (device_id_ < this->get_num_devices() && device_id_ >= 0) { + if (this->get_device_id() < this->get_num_devices() && + this->get_device_id() >= 0) { cuda::device_guard g(this->get_device_id()); GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( - &major_, cudaDevAttrComputeCapabilityMajor, device_id_)); + &this->get_exec_info().major, cudaDevAttrComputeCapabilityMajor, + this->get_device_id())); + GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( + &this->get_exec_info().minor, cudaDevAttrComputeCapabilityMinor, + this->get_device_id())); + GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( + &this->get_exec_info().num_computing_units, + cudaDevAttrMultiProcessorCount, this->get_device_id())); + auto max_threads_per_block = 0; + GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( + &max_threads_per_block, cudaDevAttrMaxThreadsPerBlock, + this->get_device_id())); + std::vector max_threads_per_block_dim(3, 0); + GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( + &max_threads_per_block_dim[0], cudaDevAttrMaxBlockDimX, + this->get_device_id())); GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( - &minor_, cudaDevAttrComputeCapabilityMinor, device_id_)); + &max_threads_per_block_dim[1], cudaDevAttrMaxBlockDimY, + this->get_device_id())); GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( - &num_multiprocessor_, cudaDevAttrMultiProcessorCount, device_id_)); - num_warps_per_sm_ = convert_sm_ver_to_cores(major_, minor_) / - kernels::cuda::config::warp_size; - warp_size_ = kernels::cuda::config::warp_size; + &max_threads_per_block_dim[2], cudaDevAttrMaxBlockDimZ, + this->get_device_id())); + this->get_exec_info().max_workgroup_size = max_threads_per_block; + this->get_exec_info().max_workitem_sizes = max_threads_per_block_dim; + this->get_exec_info().num_pu_per_cu = + convert_sm_ver_to_cores(this->get_exec_info().major, + this->get_exec_info().minor) / + kernels::cuda::config::warp_size; + this->get_exec_info().max_subgroup_size = + kernels::cuda::config::warp_size; } } void CudaExecutor::init_handles() { - if (device_id_ < this->get_num_devices() && device_id_ >= 0) { + if (this->get_device_id() < this->get_num_devices() && + this->get_device_id() >= 0) { const auto id = this->get_device_id(); cuda::device_guard g(id); this->cublas_handle_ = handle_manager( diff --git a/cuda/base/kernel_launch.cuh b/cuda/base/kernel_launch.cuh new file mode 100644 index 00000000000..50650057ca6 --- /dev/null +++ b/cuda/base/kernel_launch.cuh @@ -0,0 +1,103 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_HPP_ +#error \ + "This file can only be used from inside common/unified/base/kernel_launch.hpp" +#endif + + +#include "cuda/base/device_guard.hpp" +#include "cuda/base/types.hpp" +#include "cuda/components/thread_ids.cuh" + + +namespace gko { +namespace kernels { +namespace cuda { + + +constexpr int default_block_size = 512; + + +template +__global__ __launch_bounds__(default_block_size) void generic_kernel_1d( + size_type size, KernelFunction fn, KernelArgs... args) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= size) { + return; + } + fn(tidx, args...); +} + + +template +__global__ __launch_bounds__(default_block_size) void generic_kernel_2d( + size_type rows, size_type cols, KernelFunction fn, KernelArgs... args) +{ + auto tidx = thread::get_thread_id_flat(); + auto col = tidx % cols; + auto row = tidx / cols; + if (row >= rows) { + return; + } + fn(row, col, args...); +} + + +template +void run_kernel(std::shared_ptr exec, KernelFunction fn, + size_type size, KernelArgs &&... args) +{ + gko::cuda::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size, block_size); + generic_kernel_1d<<>>(size, fn, + map_to_device(args)...); +} + +template +void run_kernel(std::shared_ptr exec, KernelFunction fn, + dim<2> size, KernelArgs &&... args) +{ + gko::cuda::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + generic_kernel_2d<<>>(size[0], size[1], fn, + map_to_device(args)...); +} + + +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/base/kernel_launch_solver.cuh b/cuda/base/kernel_launch_solver.cuh new file mode 100644 index 00000000000..0dcfe21aaa6 --- /dev/null +++ b/cuda/base/kernel_launch_solver.cuh @@ -0,0 +1,75 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_SOLVER_HPP_ +#error \ + "This file can only be used from inside common/unified/base/kernel_launch_solver.hpp" +#endif + + +namespace gko { +namespace kernels { +namespace cuda { + + +template +__global__ __launch_bounds__(default_block_size) void generic_kernel_2d_solver( + size_type rows, size_type cols, size_type default_stride, KernelFunction fn, + KernelArgs... args) +{ + auto tidx = thread::get_thread_id_flat(); + auto col = tidx % cols; + auto row = tidx / cols; + if (row >= rows) { + return; + } + fn(row, col, + device_unpack_solver_impl::unpack(args, default_stride)...); +} + + +template +void run_kernel_solver(std::shared_ptr exec, + KernelFunction fn, dim<2> size, size_type default_stride, + KernelArgs &&... args) +{ + gko::cuda::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + generic_kernel_2d_solver<<>>( + size[0], size[1], default_stride, fn, map_to_device(args)...); +} + + +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/base/math.hpp b/cuda/base/math.hpp index 7e970486a1e..e56904ceb45 100644 --- a/cuda/base/math.hpp +++ b/cuda/base/math.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -#include "common/base/math.hpp.inc" +#include "common/cuda_hip/base/math.hpp.inc" } // namespace gko diff --git a/cuda/base/pointer_mode_guard.hpp b/cuda/base/pointer_mode_guard.hpp index 89bd724bedf..72ac4f372d7 100644 --- a/cuda/base/pointer_mode_guard.hpp +++ b/cuda/base/pointer_mode_guard.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/base/types.hpp b/cuda/base/types.hpp index 580976b265f..54309b3ecaa 100644 --- a/cuda/base/types.hpp +++ b/cuda/base/types.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CUDA_BASE_TYPES_HPP_ +#include + + #include @@ -43,9 +46,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include - - namespace gko { @@ -200,7 +200,7 @@ constexpr cudaDataType_t cuda_data_type_impl() #if defined(CUDA_VERSION) && \ (CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) template @@ -223,7 +223,7 @@ constexpr cusparseIndexType_t cusparse_index_type_impl() #endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >= - // 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + // 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) } // namespace detail @@ -246,7 +246,7 @@ constexpr cudaDataType_t cuda_data_type() #if defined(CUDA_VERSION) && \ (CUDA_VERSION >= 11000 || \ - ((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) /** @@ -265,7 +265,7 @@ constexpr cusparseIndexType_t cusparse_index_type() #endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >= - // 10010) && !(defined(_WIN32) || defined(__CYGWIN__)))) + // 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))) /** diff --git a/cuda/base/version.cpp b/cuda/base/version.cpp index 41785e5fc1f..ce043aca833 100644 --- a/cuda/base/version.cpp +++ b/cuda/base/version.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/components/absolute_array.cu b/cuda/components/absolute_array.cu new file mode 100644 index 00000000000..2eb62b229a7 --- /dev/null +++ b/cuda/components/absolute_array.cu @@ -0,0 +1,82 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/absolute_array.hpp" + + +#include "cuda/base/types.hpp" +#include "cuda/components/thread_ids.cuh" + + +namespace gko { +namespace kernels { +namespace cuda { +namespace components { + + +constexpr int default_block_size = 512; + + +#include "common/cuda_hip/components/absolute_array.hpp.inc" + + +template +void inplace_absolute_array(std::shared_ptr exec, + ValueType *data, size_type n) +{ + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); + kernel::inplace_absolute_array_kernel<<>>( + n, as_cuda_type(data)); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL); + + +template +void outplace_absolute_array(std::shared_ptr exec, + const ValueType *in, size_type n, + remove_complex *out) +{ + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); + kernel::outplace_absolute_array_kernel<<>>( + n, as_cuda_type(in), as_cuda_type(out)); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_ARRAY_KERNEL); + + +} // namespace components +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/components/atomic.cuh b/cuda/components/atomic.cuh index a2b3abda458..243a3ab507f 100644 --- a/cuda/components/atomic.cuh +++ b/cuda/components/atomic.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,7 +46,7 @@ namespace kernels { namespace cuda { -#include "common/components/atomic.hpp.inc" +#include "common/cuda_hip/components/atomic.hpp.inc" /** diff --git a/cuda/components/cooperative_groups.cuh b/cuda/components/cooperative_groups.cuh index d48815c0c6f..4e2305966b0 100644 --- a/cuda/components/cooperative_groups.cuh +++ b/cuda/components/cooperative_groups.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/components/diagonal_block_manipulation.cuh b/cuda/components/diagonal_block_manipulation.cuh index 9fb76112de4..1dfcc0d3312 100644 --- a/cuda/components/diagonal_block_manipulation.cuh +++ b/cuda/components/diagonal_block_manipulation.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,7 +48,7 @@ namespace cuda { namespace csr { -#include "common/components/diagonal_block_manipulation.hpp.inc" +#include "common/cuda_hip/components/diagonal_block_manipulation.hpp.inc" } // namespace csr diff --git a/cuda/components/fill_array.cu b/cuda/components/fill_array.cu index 63344b7f94b..fd5b69b54bc 100644 --- a/cuda/components/fill_array.cu +++ b/cuda/components/fill_array.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,7 +46,7 @@ namespace components { constexpr int default_block_size = 512; -#include "common/components/fill_array.hpp.inc" +#include "common/cuda_hip/components/fill_array.hpp.inc" template @@ -59,9 +59,20 @@ void fill_array(std::shared_ptr exec, ValueType *array, as_cuda_type(val)); } -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); -template GKO_DECLARE_FILL_ARRAY_KERNEL(size_type); +GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); + + +template +void fill_seq_array(std::shared_ptr exec, + ValueType *array, size_type n) +{ + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); + kernel::fill_seq_array<<>>( + n, as_cuda_type(array)); +} + +GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_SEQ_ARRAY_KERNEL); } // namespace components diff --git a/cuda/components/format_conversion.cuh b/cuda/components/format_conversion.cuh index 17b20438524..3586627451d 100644 --- a/cuda/components/format_conversion.cuh +++ b/cuda/components/format_conversion.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CUDA_COMPONENTS_FORMAT_CONVERSION_CUH_ +#include #include @@ -41,6 +42,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cuda/components/thread_ids.cuh" +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING + + namespace gko { namespace kernels { namespace cuda { @@ -109,6 +115,11 @@ __host__ size_type calculate_nwarps(std::shared_ptr exec, } else if (nnz >= 2e5) { multiple = 32; } +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING + if (_tuning_flag) { + multiple = _tuned_value; + } +#endif // GINKGO_BENCHMARK_ENABLE_TUNING return std::min(multiple * nwarps_in_cuda, size_type(ceildiv(nnz, config::warp_size))); } diff --git a/cuda/components/intrinsics.cuh b/cuda/components/intrinsics.cuh index 7726062cfa7..e8ac3323550 100644 --- a/cuda/components/intrinsics.cuh +++ b/cuda/components/intrinsics.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -42,7 +42,7 @@ namespace kernels { namespace cuda { -#include "common/components/intrinsics.hpp.inc" +#include "common/cuda_hip/components/intrinsics.hpp.inc" } // namespace cuda diff --git a/cuda/components/merging.cuh b/cuda/components/merging.cuh index 80b300a4daf..1a6c1c7d965 100644 --- a/cuda/components/merging.cuh +++ b/cuda/components/merging.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,7 +45,7 @@ namespace kernels { namespace cuda { -#include "common/components/merging.hpp.inc" +#include "common/cuda_hip/components/merging.hpp.inc" } // namespace cuda diff --git a/cuda/components/prefix_sum.cu b/cuda/components/prefix_sum.cu index 08412bfbe60..ce108fa8cf9 100644 --- a/cuda/components/prefix_sum.cu +++ b/cuda/components/prefix_sum.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -49,7 +49,7 @@ template void prefix_sum(std::shared_ptr exec, IndexType *counts, size_type num_entries) { - // prefix_sum should be on the valid array + // prefix_sum should only be performed on a valid array if (num_entries > 0) { auto num_blocks = ceildiv(num_entries, prefix_sum_block_size); Array block_sum_array(exec, num_blocks - 1); @@ -57,8 +57,8 @@ void prefix_sum(std::shared_ptr exec, IndexType *counts, start_prefix_sum <<>>(num_entries, counts, block_sums); - // add the total sum of the previous block only when the number of block - // is larger than 1. + // add the total sum of the previous block only when the number of + // blocks is larger than 1. if (num_blocks > 1) { finalize_prefix_sum <<>>(num_entries, counts, diff --git a/cuda/components/prefix_sum.cuh b/cuda/components/prefix_sum.cuh index a4f699ffee7..d761d459a90 100644 --- a/cuda/components/prefix_sum.cuh +++ b/cuda/components/prefix_sum.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,7 +48,7 @@ namespace kernels { namespace cuda { -#include "common/components/prefix_sum.hpp.inc" +#include "common/cuda_hip/components/prefix_sum.hpp.inc" } // namespace cuda diff --git a/cuda/components/reduction.cuh b/cuda/components/reduction.cuh index 3badd97ddcc..9aace4fe6bd 100644 --- a/cuda/components/reduction.cuh +++ b/cuda/components/reduction.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -56,7 +56,7 @@ namespace cuda { constexpr int default_block_size = 512; -#include "common/components/reduction.hpp.inc" +#include "common/cuda_hip/components/reduction.hpp.inc" /** diff --git a/cuda/components/searching.cuh b/cuda/components/searching.cuh index 186123e04f3..2f7812075b2 100644 --- a/cuda/components/searching.cuh +++ b/cuda/components/searching.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ namespace kernels { namespace cuda { -#include "common/components/searching.hpp.inc" +#include "common/cuda_hip/components/searching.hpp.inc" } // namespace cuda diff --git a/cuda/components/segment_scan.cuh b/cuda/components/segment_scan.cuh index b9283f8bfe9..a6ea7955d55 100644 --- a/cuda/components/segment_scan.cuh +++ b/cuda/components/segment_scan.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -42,7 +42,7 @@ namespace kernels { namespace cuda { -#include "common/components/segment_scan.hpp.inc" +#include "common/cuda_hip/components/segment_scan.hpp.inc" } // namespace cuda diff --git a/cuda/components/sorting.cuh b/cuda/components/sorting.cuh index 9a5525f7a94..c3f274fc5af 100644 --- a/cuda/components/sorting.cuh +++ b/cuda/components/sorting.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ namespace kernels { namespace cuda { -#include "common/components/sorting.hpp.inc" +#include "common/cuda_hip/components/sorting.hpp.inc" } // namespace cuda diff --git a/cuda/components/thread_ids.cuh b/cuda/components/thread_ids.cuh index 31ebe0a28a6..3a5b93c8ee4 100644 --- a/cuda/components/thread_ids.cuh +++ b/cuda/components/thread_ids.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,7 +48,7 @@ namespace cuda { namespace thread { -#include "common/components/thread_ids.hpp.inc" +#include "common/cuda_hip/components/thread_ids.hpp.inc" } // namespace thread diff --git a/cuda/components/uninitialized_array.hpp b/cuda/components/uninitialized_array.hpp index b3d9096f0c9..0300a5e6556 100644 --- a/cuda/components/uninitialized_array.hpp +++ b/cuda/components/uninitialized_array.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -42,7 +42,7 @@ namespace kernels { namespace cuda { -#include "common/components/uninitialized_array.hpp.inc" +#include "common/cuda_hip/components/uninitialized_array.hpp.inc" } // namespace cuda diff --git a/cuda/components/warp_blas.cuh b/cuda/components/warp_blas.cuh index 7d77ca6b354..cf8e0e4c951 100644 --- a/cuda/components/warp_blas.cuh +++ b/cuda/components/warp_blas.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -50,7 +50,7 @@ namespace kernels { namespace cuda { -#include "common/components/warp_blas.hpp.inc" +#include "common/cuda_hip/components/warp_blas.hpp.inc" } // namespace cuda diff --git a/cuda/factorization/factorization_kernels.cu b/cuda/factorization/factorization_kernels.cu index bcf861a1acf..065d59f892a 100644 --- a/cuda/factorization/factorization_kernels.cu +++ b/cuda/factorization/factorization_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include + #include "core/components/prefix_sum.hpp" #include "core/matrix/csr_builder.hpp" #include "cuda/base/config.hpp" @@ -59,7 +60,7 @@ namespace factorization { constexpr int default_block_size{512}; -#include "common/factorization/factorization_kernels.hpp.inc" +#include "common/cuda_hip/factorization/factorization_kernels.hpp.inc" template diff --git a/cuda/factorization/ic_kernels.cu b/cuda/factorization/ic_kernels.cu new file mode 100644 index 00000000000..9eb5c906cfb --- /dev/null +++ b/cuda/factorization/ic_kernels.cu @@ -0,0 +1,99 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/ic_kernels.hpp" + + +#include + + +#include "cuda/base/cusparse_bindings.hpp" +#include "cuda/base/device_guard.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The ic factorization namespace. + * + * @ingroup factor + */ +namespace ic_factorization { + + +template +void compute(std::shared_ptr exec, + matrix::Csr *m) +{ + const auto id = exec->get_device_id(); + auto handle = exec->get_cusparse_handle(); + gko::cuda::device_guard g{id}; + auto desc = cusparse::create_mat_descr(); + auto info = cusparse::create_ic0_info(); + + // get buffer size for IC + IndexType num_rows = m->get_size()[0]; + IndexType nnz = m->get_num_stored_elements(); + size_type buffer_size{}; + cusparse::ic0_buffer_size(handle, num_rows, nnz, desc, + m->get_const_values(), m->get_const_row_ptrs(), + m->get_const_col_idxs(), info, buffer_size); + + Array buffer{exec, buffer_size}; + + // set up IC(0) + cusparse::ic0_analysis(handle, num_rows, nnz, desc, m->get_const_values(), + m->get_const_row_ptrs(), m->get_const_col_idxs(), + info, CUSPARSE_SOLVE_POLICY_USE_LEVEL, + buffer.get_data()); + + cusparse::ic0(handle, num_rows, nnz, desc, m->get_values(), + m->get_const_row_ptrs(), m->get_const_col_idxs(), info, + CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer.get_data()); + + // CUDA 11.4 has a use-after-free bug on Turing +#if (CUDA_VERSION >= 11040) + exec->synchronize(); +#endif + + cusparse::destroy(info); + cusparse::destroy(desc); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); + + +} // namespace ic_factorization +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/factorization/ilu_kernels.cu b/cuda/factorization/ilu_kernels.cu index b7debb21bc3..954843ebf1e 100644 --- a/cuda/factorization/ilu_kernels.cu +++ b/cuda/factorization/ilu_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -81,6 +81,11 @@ void compute_lu(std::shared_ptr exec, m->get_const_row_ptrs(), m->get_const_col_idxs(), info, CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer.get_data()); + // CUDA 11.4 has a use-after-free bug on Turing +#if (CUDA_VERSION >= 11040) + exec->synchronize(); +#endif + cusparse::destroy(info); cusparse::destroy(desc); } diff --git a/cuda/factorization/par_ic_kernels.cu b/cuda/factorization/par_ic_kernels.cu new file mode 100644 index 00000000000..957eca09932 --- /dev/null +++ b/cuda/factorization/par_ic_kernels.cu @@ -0,0 +1,108 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/par_ic_kernels.hpp" + + +#include +#include +#include + + +#include "cuda/base/math.hpp" +#include "cuda/base/types.hpp" +#include "cuda/components/thread_ids.cuh" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The parallel IC factorization namespace. + * + * @ingroup factor + */ +namespace par_ic_factorization { + + +constexpr int default_block_size = 512; + + +// subwarp sizes for all warp-parallel kernels (sweep) +using compiled_kernels = + syn::value_list; + + +#include "common/cuda_hip/factorization/par_ic_kernels.hpp.inc" + + +template +void init_factor(std::shared_ptr exec, + matrix::Csr *l) +{ + auto num_rows = l->get_size()[0]; + auto num_blocks = ceildiv(num_rows, default_block_size); + auto l_row_ptrs = l->get_const_row_ptrs(); + auto l_vals = l->get_values(); + kernel::ic_init<<>>( + l_row_ptrs, as_cuda_type(l_vals), num_rows); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); + + +template +void compute_factor(std::shared_ptr exec, + size_type iterations, + const matrix::Coo *a_lower, + matrix::Csr *l) +{ + auto nnz = l->get_num_stored_elements(); + auto num_blocks = ceildiv(nnz, default_block_size); + for (size_type i = 0; i < iterations; ++i) { + kernel::ic_sweep<<>>( + a_lower->get_const_row_idxs(), a_lower->get_const_col_idxs(), + as_cuda_type(a_lower->get_const_values()), l->get_const_row_ptrs(), + l->get_const_col_idxs(), as_cuda_type(l->get_values()), + static_cast(l->get_num_stored_elements())); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); + + +} // namespace par_ic_factorization +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/factorization/par_ict_kernels.cu b/cuda/factorization/par_ict_kernels.cu index 98aa1c04831..fe44da01632 100644 --- a/cuda/factorization/par_ict_kernels.cu +++ b/cuda/factorization/par_ict_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -65,7 +65,7 @@ namespace cuda { namespace par_ict_factorization { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; // subwarp sizes for all warp-parallel kernels (filter, add_candidates) @@ -73,8 +73,8 @@ using compiled_kernels = syn::value_list; -#include "common/factorization/par_ict_spgeam_kernels.hpp.inc" -#include "common/factorization/par_ict_sweep_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc" namespace { @@ -83,18 +83,18 @@ namespace { template void add_candidates(syn::value_list, std::shared_ptr exec, - const matrix::Csr *llt, + const matrix::Csr *llh, const matrix::Csr *a, const matrix::Csr *l, matrix::Csr *l_new) { - auto num_rows = static_cast(llt->get_size()[0]); + auto num_rows = static_cast(llh->get_size()[0]); auto subwarps_per_block = default_block_size / subwarp_size; auto num_blocks = ceildiv(num_rows, subwarps_per_block); matrix::CsrBuilder l_new_builder(l_new); - auto llt_row_ptrs = llt->get_const_row_ptrs(); - auto llt_col_idxs = llt->get_const_col_idxs(); - auto llt_vals = llt->get_const_values(); + auto llh_row_ptrs = llh->get_const_row_ptrs(); + auto llh_col_idxs = llh->get_const_col_idxs(); + auto llh_vals = llh->get_const_values(); auto a_row_ptrs = a->get_const_row_ptrs(); auto a_col_idxs = a->get_const_col_idxs(); auto a_vals = a->get_const_values(); @@ -104,7 +104,7 @@ void add_candidates(syn::value_list, auto l_new_row_ptrs = l_new->get_row_ptrs(); // count non-zeros per row kernel::ict_tri_spgeam_nnz - <<>>(llt_row_ptrs, llt_col_idxs, + <<>>(llh_row_ptrs, llh_col_idxs, a_row_ptrs, a_col_idxs, l_new_row_ptrs, num_rows); @@ -122,7 +122,7 @@ void add_candidates(syn::value_list, // fill columns and values kernel::ict_tri_spgeam_init <<>>( - llt_row_ptrs, llt_col_idxs, as_cuda_type(llt_vals), a_row_ptrs, + llh_row_ptrs, llh_col_idxs, as_cuda_type(llh_vals), a_row_ptrs, a_col_idxs, as_cuda_type(a_vals), l_row_ptrs, l_col_idxs, as_cuda_type(l_vals), l_new_row_ptrs, l_new_col_idxs, as_cuda_type(l_new_vals), num_rows); @@ -159,14 +159,14 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_compute_factor, compute_factor); template void add_candidates(std::shared_ptr exec, - const matrix::Csr *llt, + const matrix::Csr *llh, const matrix::Csr *a, const matrix::Csr *l, matrix::Csr *l_new) { auto num_rows = a->get_size()[0]; auto total_nnz = - llt->get_num_stored_elements() + a->get_num_stored_elements(); + llh->get_num_stored_elements() + a->get_num_stored_elements(); auto total_nnz_per_row = total_nnz / num_rows; select_add_candidates( compiled_kernels(), @@ -174,7 +174,7 @@ void add_candidates(std::shared_ptr exec, return total_nnz_per_row <= compiled_subwarp_size || compiled_subwarp_size == config::warp_size; }, - syn::value_list(), syn::type_list<>(), exec, llt, a, l, l_new); + syn::value_list(), syn::type_list<>(), exec, llh, a, l, l_new); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/cuda/factorization/par_ilu_kernels.cu b/cuda/factorization/par_ilu_kernels.cu index 4c47f5e5438..80a0125bae2 100644 --- a/cuda/factorization/par_ilu_kernels.cu +++ b/cuda/factorization/par_ilu_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -55,7 +55,7 @@ namespace par_ilu_factorization { constexpr int default_block_size{512}; -#include "common/factorization/par_ilu_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilu_kernels.hpp.inc" template @@ -74,14 +74,12 @@ void compute_l_u_factors(std::shared_ptr exec, 1, 1}; for (size_type i = 0; i < iterations; ++i) { kernel::compute_l_u_factors<<>>( - num_elements, as_cuda_type(system_matrix->get_const_row_idxs()), - as_cuda_type(system_matrix->get_const_col_idxs()), + num_elements, system_matrix->get_const_row_idxs(), + system_matrix->get_const_col_idxs(), as_cuda_type(system_matrix->get_const_values()), - as_cuda_type(l_factor->get_const_row_ptrs()), - as_cuda_type(l_factor->get_const_col_idxs()), + l_factor->get_const_row_ptrs(), l_factor->get_const_col_idxs(), as_cuda_type(l_factor->get_values()), - as_cuda_type(u_factor->get_const_row_ptrs()), - as_cuda_type(u_factor->get_const_col_idxs()), + u_factor->get_const_row_ptrs(), u_factor->get_const_col_idxs(), as_cuda_type(u_factor->get_values())); } } diff --git a/cuda/factorization/par_ilut_approx_filter_kernel.cu b/cuda/factorization/par_ilut_approx_filter_kernel.cu index 1a17572d9bd..1a286c98447 100644 --- a/cuda/factorization/par_ilut_approx_filter_kernel.cu +++ b/cuda/factorization/par_ilut_approx_filter_kernel.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -44,7 +44,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/prefix_sum.hpp" -#include "core/factorization/par_ilut_kernels.hpp" #include "core/matrix/coo_builder.hpp" #include "core/matrix/csr_builder.hpp" #include "core/matrix/csr_kernels.hpp" @@ -77,8 +76,8 @@ using compiled_kernels = syn::value_list; -#include "common/factorization/par_ilut_filter_kernels.hpp.inc" -#include "common/factorization/par_ilut_select_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc" template diff --git a/cuda/factorization/par_ilut_filter_kernel.cu b/cuda/factorization/par_ilut_filter_kernel.cu index 1b2e6e921f8..db11cb6feb9 100644 --- a/cuda/factorization/par_ilut_filter_kernel.cu +++ b/cuda/factorization/par_ilut_filter_kernel.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -64,7 +64,7 @@ namespace cuda { namespace par_ilut_factorization { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; // subwarp sizes for filter kernels @@ -72,7 +72,7 @@ using compiled_kernels = syn::value_list; -#include "common/factorization/par_ilut_filter_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc" namespace { @@ -159,4 +159,4 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( } // namespace par_ilut_factorization } // namespace cuda } // namespace kernels -} // namespace gko \ No newline at end of file +} // namespace gko diff --git a/cuda/factorization/par_ilut_select_common.cu b/cuda/factorization/par_ilut_select_common.cu index e4d9a4c1e93..1d2a94883c4 100644 --- a/cuda/factorization/par_ilut_select_common.cu +++ b/cuda/factorization/par_ilut_select_common.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -55,7 +55,7 @@ namespace cuda { namespace par_ilut_factorization { -#include "common/factorization/par_ilut_select_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc" template diff --git a/cuda/factorization/par_ilut_select_common.cuh b/cuda/factorization/par_ilut_select_common.cuh index 54b2a348c59..fe2ddef7401 100644 --- a/cuda/factorization/par_ilut_select_common.cuh +++ b/cuda/factorization/par_ilut_select_common.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,8 +45,8 @@ namespace cuda { namespace par_ilut_factorization { -constexpr auto default_block_size = 512; -constexpr auto items_per_thread = 16; +constexpr int default_block_size = 512; +constexpr int items_per_thread = 16; template diff --git a/cuda/factorization/par_ilut_select_kernel.cu b/cuda/factorization/par_ilut_select_kernel.cu index 4fc5315e182..a471e55958a 100644 --- a/cuda/factorization/par_ilut_select_kernel.cu +++ b/cuda/factorization/par_ilut_select_kernel.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -63,7 +63,7 @@ namespace cuda { namespace par_ilut_factorization { -#include "common/factorization/par_ilut_select_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc" template diff --git a/cuda/factorization/par_ilut_spgeam_kernel.cu b/cuda/factorization/par_ilut_spgeam_kernel.cu index 1efb704e272..074acc92084 100644 --- a/cuda/factorization/par_ilut_spgeam_kernel.cu +++ b/cuda/factorization/par_ilut_spgeam_kernel.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -65,7 +65,7 @@ namespace cuda { namespace par_ilut_factorization { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; // subwarp sizes for add_candidates kernels @@ -73,7 +73,7 @@ using compiled_kernels = syn::value_list; -#include "common/factorization/par_ilut_spgeam_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_spgeam_kernels.hpp.inc" namespace { diff --git a/cuda/factorization/par_ilut_sweep_kernel.cu b/cuda/factorization/par_ilut_sweep_kernel.cu index 91b68b723da..f5674942059 100644 --- a/cuda/factorization/par_ilut_sweep_kernel.cu +++ b/cuda/factorization/par_ilut_sweep_kernel.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -65,7 +65,7 @@ namespace cuda { namespace par_ilut_factorization { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; // subwarp sizes for all warp-parallel kernels (filter, add_candidates) @@ -73,7 +73,7 @@ using compiled_kernels = syn::value_list; -#include "common/factorization/par_ilut_sweep_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc" namespace { diff --git a/cuda/get_info.cmake b/cuda/get_info.cmake index 4fbf19d5dca..3d91ea9f23a 100644 --- a/cuda/get_info.cmake +++ b/cuda/get_info.cmake @@ -2,6 +2,7 @@ ginkgo_print_module_header(${detailed_log} "CUDA") ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_ARCHITECTURES") ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_COMPILER_FLAGS") ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_DEFAULT_HOST_COMPILER") +ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_ARCH_FLAGS") ginkgo_print_module_footer(${detailed_log} "CUDA variables:") ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_COMPILER") ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_COMPILER_VERSION") diff --git a/cuda/matrix/coo_kernels.cu b/cuda/matrix/coo_kernels.cu index 056925c8bdd..f41a3551296 100644 --- a/cuda/matrix/coo_kernels.cu +++ b/cuda/matrix/coo_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -40,7 +40,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/components/fill_array.hpp" #include "core/matrix/dense_kernels.hpp" #include "cuda/base/config.hpp" #include "cuda/base/cusparse_bindings.hpp" @@ -74,7 +73,7 @@ constexpr int warps_in_block = 4; constexpr int spmv_block_size = warps_in_block * config::warp_size; -#include "common/matrix/coo_kernels.hpp.inc" +#include "common/cuda_hip/matrix/coo_kernels.hpp.inc" template @@ -82,9 +81,7 @@ void spmv(std::shared_ptr exec, const matrix::Coo *a, const matrix::Dense *b, matrix::Dense *c) { - components::fill_array(exec, c->get_values(), c->get_num_stored_elements(), - zero()); - + dense::fill(exec, c, zero()); spmv2(exec, a, b, c); } @@ -246,29 +243,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_COO_CONVERT_TO_DENSE_KERNEL); -template -void extract_diagonal(std::shared_ptr exec, - const matrix::Coo *orig, - matrix::Diagonal *diag) -{ - const auto nnz = orig->get_num_stored_elements(); - const auto diag_size = diag->get_size()[0]; - const auto num_blocks = ceildiv(nnz, default_block_size); - - const auto orig_values = orig->get_const_values(); - const auto orig_row_idxs = orig->get_const_row_idxs(); - const auto orig_col_idxs = orig->get_const_col_idxs(); - auto diag_values = diag->get_values(); - - kernel::extract_diagonal<<>>( - nnz, as_cuda_type(orig_values), as_cuda_type(orig_row_idxs), - as_cuda_type(orig_col_idxs), as_cuda_type(diag_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_COO_EXTRACT_DIAGONAL_KERNEL); - - } // namespace coo } // namespace cuda } // namespace kernels diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index 7316ecfa890..c6b41ebd03b 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -80,7 +80,6 @@ namespace csr { constexpr int default_block_size = 512; constexpr int warps_in_block = 4; constexpr int spmv_block_size = warps_in_block * config::warp_size; -constexpr int wsize = config::warp_size; constexpr int classical_overweight = 32; @@ -97,7 +96,7 @@ using spgeam_kernels = syn::value_list; -#include "common/matrix/csr_kernels.hpp.inc" +#include "common/cuda_hip/matrix/csr_kernels.hpp.inc" namespace host_kernel { @@ -1144,49 +1143,82 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *row_permuted) - GKO_NOT_IMPLEMENTED; +void inv_symm_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *permuted) +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + inv_row_ptr_permute_kernel<<>>( + num_rows, perm, orig->get_const_row_ptrs(), permuted->get_row_ptrs()); + components::prefix_sum(exec, permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + inv_symm_permute_kernel + <<>>( + num_rows, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), as_cuda_type(orig->get_const_values()), + permuted->get_row_ptrs(), permuted->get_col_idxs(), + as_cuda_type(permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) - GKO_NOT_IMPLEMENTED; +void row_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *row_permuted) +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + row_ptr_permute_kernel<<>>( + num_rows, perm, orig->get_const_row_ptrs(), + row_permuted->get_row_ptrs()); + components::prefix_sum(exec, row_permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + row_permute_kernel + <<>>( + num_rows, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), as_cuda_type(orig->get_const_values()), + row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(), + as_cuda_type(row_permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL); + GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *perm, const matrix::Csr *orig, matrix::Csr *row_permuted) - GKO_NOT_IMPLEMENTED; +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + inv_row_ptr_permute_kernel<<>>( + num_rows, perm, orig->get_const_row_ptrs(), + row_permuted->get_row_ptrs()); + components::prefix_sum(exec, row_permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + inv_row_permute_kernel + <<>>( + num_rows, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), as_cuda_type(orig->get_const_values()), + row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(), + as_cuda_type(row_permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); -template -void inverse_column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL); - - template void calculate_max_nnz_per_row(std::shared_ptr exec, const matrix::Csr *source, diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index f0fe2cfae7e..477bdb89e54 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -64,10 +64,10 @@ namespace cuda { namespace dense { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; -#include "common/matrix/dense_kernels.hpp.inc" +#include "common/cuda_hip/matrix/dense_kernels.hpp.inc" template @@ -117,76 +117,6 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); -template -void scale(std::shared_ptr exec, - const matrix::Dense *alpha, matrix::Dense *x) -{ - if (cublas::is_supported::value && x->get_size()[1] == 1) { - cublas::scal(exec->get_cublas_handle(), x->get_size()[0], - alpha->get_const_values(), x->get_values(), - x->get_stride()); - } else { - // TODO: tune this parameter - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(x->get_size()[0] * x->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, - block_size / config::warp_size}; - kernel::scale<<>>( - x->get_size()[0], x->get_size()[1], alpha->get_size()[1], - as_cuda_type(alpha->get_const_values()), - as_cuda_type(x->get_values()), x->get_stride()); - } -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SCALE_KERNEL); - - -template -void add_scaled(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *x, matrix::Dense *y) -{ - if (cublas::is_supported::value && x->get_size()[1] == 1) { - cublas::axpy(exec->get_cublas_handle(), x->get_size()[0], - alpha->get_const_values(), x->get_const_values(), - x->get_stride(), y->get_values(), y->get_stride()); - } else { - // TODO: tune this parameter - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(x->get_size()[0] * x->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, - block_size / config::warp_size}; - kernel::add_scaled<<>>( - x->get_size()[0], x->get_size()[1], alpha->get_size()[1], - as_cuda_type(alpha->get_const_values()), - as_cuda_type(x->get_const_values()), x->get_stride(), - as_cuda_type(y->get_values()), y->get_stride()); - } -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_KERNEL); - - -template -void add_scaled_diag(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Diagonal *x, - matrix::Dense *y) -{ - const auto size = y->get_size()[0]; - const auto grid_dim = ceildiv(size, default_block_size); - - kernel::add_scaled_diag<<>>( - size, as_cuda_type(alpha->get_const_values()), - as_cuda_type(x->get_const_values()), as_cuda_type(y->get_values()), - y->get_stride()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL); - - template void compute_dot(std::shared_ptr exec, const matrix::Dense *x, @@ -205,8 +135,8 @@ void compute_dot(std::shared_ptr exec, // TODO: these are tuning parameters obtained experimentally, once // we decide how to handle this uniformly, they should be modified // appropriately - constexpr auto work_per_thread = 32; - constexpr auto block_size = 1024; + constexpr int work_per_thread = 32; + constexpr int block_size = 1024; constexpr auto work_per_block = work_per_thread * block_size; const dim3 grid_dim = ceildiv(x->get_size()[0], work_per_block); @@ -219,9 +149,10 @@ void compute_dot(std::shared_ptr exec, x->get_size()[0], as_cuda_type(x->get_const_values() + col), x->get_stride(), as_cuda_type(y->get_const_values() + col), y->get_stride(), as_cuda_type(work.get_data())); - kernel::finalize_dot_computation<<<1, block_dim>>>( - grid_dim.x, as_cuda_type(work.get_const_data()), - as_cuda_type(result->get_values() + col)); + kernel::finalize_sum_reduce_computation + <<<1, block_dim>>>(grid_dim.x, + as_cuda_type(work.get_const_data()), + as_cuda_type(result->get_values() + col)); } } } @@ -229,6 +160,50 @@ void compute_dot(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); +template +void compute_conj_dot(std::shared_ptr exec, + const matrix::Dense *x, + const matrix::Dense *y, + matrix::Dense *result) +{ + if (cublas::is_supported::value) { + // TODO: write a custom kernel which does this more efficiently + for (size_type col = 0; col < x->get_size()[1]; ++col) { + cublas::conj_dot(exec->get_cublas_handle(), x->get_size()[0], + x->get_const_values() + col, x->get_stride(), + y->get_const_values() + col, y->get_stride(), + result->get_values() + col); + } + } else { + // TODO: these are tuning parameters obtained experimentally, once + // we decide how to handle this uniformly, they should be modified + // appropriately + constexpr int work_per_thread = 32; + constexpr int block_size = 1024; + + constexpr auto work_per_block = work_per_thread * block_size; + const dim3 grid_dim = ceildiv(x->get_size()[0], work_per_block); + const dim3 block_dim{config::warp_size, 1, + block_size / config::warp_size}; + Array work(exec, grid_dim.x); + // TODO: write a kernel which does this more efficiently + for (size_type col = 0; col < x->get_size()[1]; ++col) { + kernel::compute_partial_conj_dot + <<>>( + x->get_size()[0], as_cuda_type(x->get_const_values() + col), + x->get_stride(), as_cuda_type(y->get_const_values() + col), + y->get_stride(), as_cuda_type(work.get_data())); + kernel::finalize_sum_reduce_computation + <<<1, block_dim>>>(grid_dim.x, + as_cuda_type(work.get_const_data()), + as_cuda_type(result->get_values() + col)); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); + + template void compute_norm2(std::shared_ptr exec, const matrix::Dense *x, @@ -245,8 +220,8 @@ void compute_norm2(std::shared_ptr exec, // TODO: these are tuning parameters obtained experimentally, once // we decide how to handle this uniformly, they should be modified // appropriately - constexpr auto work_per_thread = 32; - constexpr auto block_size = 1024; + constexpr int work_per_thread = 32; + constexpr int block_size = 1024; constexpr auto work_per_block = work_per_thread * block_size; const dim3 grid_dim = ceildiv(x->get_size()[0], work_per_block); @@ -258,9 +233,10 @@ void compute_norm2(std::shared_ptr exec, kernel::compute_partial_norm2<<>>( x->get_size()[0], as_cuda_type(x->get_const_values() + col), x->get_stride(), as_cuda_type(work.get_data())); - kernel::finalize_norm2_computation<<<1, block_dim>>>( - grid_dim.x, as_cuda_type(work.get_const_data()), - as_cuda_type(result->get_values() + col)); + kernel::finalize_sqrt_reduce_computation + <<<1, block_dim>>>(grid_dim.x, + as_cuda_type(work.get_const_data()), + as_cuda_type(result->get_values() + col)); } } } @@ -574,7 +550,7 @@ void transpose(std::shared_ptr exec, } }; -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TRANSPOSE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template @@ -599,106 +575,7 @@ void conj_transpose(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CONJ_TRANSPOSE_KERNEL); - - -template -void row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *row_permuted) -{ - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - kernel::row_permute<<>>( - orig->get_size()[0], orig->get_size()[1], - as_cuda_type(permutation_indices->get_const_data()), - as_cuda_type(orig->get_const_values()), orig->get_stride(), - as_cuda_type(row_permuted->get_values()), row_permuted->get_stride()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ROW_PERMUTE_KERNEL); - - -template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *column_permuted) -{ - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - kernel::column_permute<<>>( - orig->get_size()[0], orig->get_size()[1], - as_cuda_type(permutation_indices->get_const_data()), - as_cuda_type(orig->get_const_values()), orig->get_stride(), - as_cuda_type(column_permuted->get_values()), - column_permuted->get_stride()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_COLUMN_PERMUTE_KERNEL); - - -template -void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *row_permuted) -{ - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - kernel::inverse_row_permute<<>>( - orig->get_size()[0], orig->get_size()[1], - as_cuda_type(permutation_indices->get_const_data()), - as_cuda_type(orig->get_const_values()), orig->get_stride(), - as_cuda_type(row_permuted->get_values()), row_permuted->get_stride()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_INVERSE_ROW_PERMUTE_KERNEL); - - -template -void inverse_column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *column_permuted) -{ - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - kernel::inverse_column_permute<<>>( - orig->get_size()[0], orig->get_size()[1], - as_cuda_type(permutation_indices->get_const_data()), - as_cuda_type(orig->get_const_values()), orig->get_stride(), - as_cuda_type(column_permuted->get_values()), - column_permuted->get_stride()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_INVERSE_COLUMN_PERMUTE_KERNEL); - - -template -void extract_diagonal(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Diagonal *diag) -{ - const dim3 grid_dim = ceildiv(diag->get_size()[0], default_block_size); - kernel::extract_diagonal<<>>( - orig->get_size()[0], as_cuda_type(orig->get_const_values()), - orig->get_stride(), as_cuda_type(diag->get_values())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_EXTRACT_DIAGONAL_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); } // namespace dense diff --git a/cuda/matrix/diagonal_kernels.cu b/cuda/matrix/diagonal_kernels.cu index 3b969fb8c92..bea495fdc2f 100644 --- a/cuda/matrix/diagonal_kernels.cu +++ b/cuda/matrix/diagonal_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -54,61 +54,10 @@ namespace cuda { namespace diagonal { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; -#include "common/matrix/diagonal_kernels.hpp.inc" - - -template -void apply_to_dense(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Dense *b, - matrix::Dense *c) -{ - const auto b_size = b->get_size(); - const auto num_rows = b_size[0]; - const auto num_cols = b_size[1]; - const auto b_stride = b->get_stride(); - const auto c_stride = c->get_stride(); - const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size); - - const auto diag_values = a->get_const_values(); - const auto b_values = b->get_const_values(); - auto c_values = c->get_values(); - - kernel::apply_to_dense<<>>( - num_rows, num_cols, as_cuda_type(diag_values), b_stride, - as_cuda_type(b_values), c_stride, as_cuda_type(c_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL); - - -template -void right_apply_to_dense(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Dense *b, - matrix::Dense *c) -{ - const auto b_size = b->get_size(); - const auto num_rows = b_size[0]; - const auto num_cols = b_size[1]; - const auto b_stride = b->get_stride(); - const auto c_stride = c->get_stride(); - const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size); - - const auto diag_values = a->get_const_values(); - const auto b_values = b->get_const_values(); - auto c_values = c->get_values(); - - kernel::right_apply_to_dense<<>>( - num_rows, num_cols, as_cuda_type(diag_values), b_stride, - as_cuda_type(b_values), c_stride, as_cuda_type(c_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_DENSE_KERNEL); +#include "common/cuda_hip/matrix/diagonal_kernels.hpp.inc" template @@ -134,67 +83,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_DIAGONAL_APPLY_TO_CSR_KERNEL); -template -void right_apply_to_csr(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Csr *b, - matrix::Csr *c) -{ - const auto num_nnz = b->get_num_stored_elements(); - const auto diag_values = a->get_const_values(); - c->copy_from(b); - auto csr_values = c->get_values(); - const auto csr_col_idxs = c->get_const_col_idxs(); - - const auto grid_dim = ceildiv(num_nnz, default_block_size); - kernel::right_apply_to_csr<<>>( - num_nnz, as_cuda_type(diag_values), as_cuda_type(csr_col_idxs), - as_cuda_type(csr_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_CSR_KERNEL); - - -template -void convert_to_csr(std::shared_ptr exec, - const matrix::Diagonal *source, - matrix::Csr *result) -{ - const auto size = source->get_size()[0]; - const auto grid_dim = ceildiv(size, default_block_size); - - const auto diag_values = source->get_const_values(); - auto row_ptrs = result->get_row_ptrs(); - auto col_idxs = result->get_col_idxs(); - auto csr_values = result->get_values(); - - kernel::convert_to_csr<<>>( - size, as_cuda_type(diag_values), as_cuda_type(row_ptrs), - as_cuda_type(col_idxs), as_cuda_type(csr_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_DIAGONAL_CONVERT_TO_CSR_KERNEL); - - -template -void conj_transpose(std::shared_ptr exec, - const matrix::Diagonal *orig, - matrix::Diagonal *trans) -{ - const auto size = orig->get_size()[0]; - const auto grid_dim = ceildiv(size, default_block_size); - const auto orig_values = orig->get_const_values(); - auto trans_values = trans->get_values(); - - kernel::conj_transpose<<>>( - size, as_cuda_type(orig_values), as_cuda_type(trans_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_CONJ_TRANSPOSE_KERNEL); - - } // namespace diagonal } // namespace cuda } // namespace kernels diff --git a/cuda/matrix/ell_kernels.cu b/cuda/matrix/ell_kernels.cu index 937c0d3b010..fe0cb2a2c44 100644 --- a/cuda/matrix/ell_kernels.cu +++ b/cuda/matrix/ell_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "accessor/reduced_row_major.hpp" +#include "core/base/mixed_precision_types.hpp" #include "core/components/fill_array.hpp" #include "core/components/prefix_sum.hpp" #include "core/matrix/dense_kernels.hpp" @@ -102,21 +104,42 @@ constexpr int max_thread_per_worker = 32; using compiled_kernels = syn::value_list; -#include "common/matrix/ell_kernels.hpp.inc" +#include "common/cuda_hip/matrix/ell_kernels.hpp.inc" namespace { +template +GKO_INLINE auto as_cuda_accessor( + const acc::range> &acc) +{ + return acc::range< + acc::reduced_row_major, cuda_type>>( + acc.get_accessor().get_size(), + as_cuda_type(acc.get_accessor().get_stored_data()), + acc.get_accessor().get_stride()); +} + -template +template void abstract_spmv(syn::value_list, int num_worker_per_row, - const matrix::Ell *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Ell *a, + const matrix::Dense *b, + matrix::Dense *c, + const matrix::Dense *alpha = nullptr, + const matrix::Dense *beta = nullptr) { + using a_accessor = + gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>; + using b_accessor = + gko::acc::reduced_row_major<2, OutputValueType, const InputValueType>; + const auto nrows = a->get_size()[0]; + const auto stride = a->get_stride(); + const auto num_stored_elements_per_row = + a->get_num_stored_elements_per_row(); + constexpr int num_thread_per_worker = (info == 0) ? max_thread_per_worker : info; constexpr bool atomic = (info == 0); @@ -124,22 +147,29 @@ void abstract_spmv(syn::value_list, int num_worker_per_row, num_thread_per_worker, 1); const dim3 grid_size(ceildiv(nrows * num_worker_per_row, block_size.x), b->get_size()[1], 1); + + const auto a_vals = gko::acc::range( + std::array{{num_stored_elements_per_row * stride}}, + a->get_const_values()); + const auto b_vals = gko::acc::range( + std::array{{b->get_size()[0], b->get_size()[1]}}, + b->get_const_values(), std::array{{b->get_stride()}}); + if (alpha == nullptr && beta == nullptr) { kernel::spmv <<>>( - nrows, num_worker_per_row, as_cuda_type(a->get_const_values()), - a->get_const_col_idxs(), a->get_stride(), - a->get_num_stored_elements_per_row(), - as_cuda_type(b->get_const_values()), b->get_stride(), - as_cuda_type(c->get_values()), c->get_stride()); + nrows, num_worker_per_row, as_cuda_accessor(a_vals), + a->get_const_col_idxs(), stride, num_stored_elements_per_row, + as_cuda_accessor(b_vals), as_cuda_type(c->get_values()), + c->get_stride()); } else if (alpha != nullptr && beta != nullptr) { + const auto alpha_val = gko::acc::range( + std::array{1}, alpha->get_const_values()); kernel::spmv <<>>( - nrows, num_worker_per_row, - as_cuda_type(alpha->get_const_values()), - as_cuda_type(a->get_const_values()), a->get_const_col_idxs(), - a->get_stride(), a->get_num_stored_elements_per_row(), - as_cuda_type(b->get_const_values()), b->get_stride(), + nrows, num_worker_per_row, as_cuda_accessor(alpha_val), + as_cuda_accessor(a_vals), a->get_const_col_idxs(), stride, + num_stored_elements_per_row, as_cuda_accessor(b_vals), as_cuda_type(beta->get_const_values()), as_cuda_type(c->get_values()), c->get_stride()); } else { @@ -194,10 +224,12 @@ std::array compute_thread_worker_and_atomicity( } // namespace -template +template void spmv(std::shared_ptr exec, - const matrix::Ell *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Ell *a, + const matrix::Dense *b, + matrix::Dense *c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -212,7 +244,8 @@ void spmv(std::shared_ptr exec, const int info = (!atomic) * num_thread_per_worker; if (atomic) { components::fill_array(exec, c->get_values(), - c->get_num_stored_elements(), zero()); + c->get_num_stored_elements(), + zero()); } select_abstract_spmv( compiled_kernels(), @@ -221,16 +254,18 @@ void spmv(std::shared_ptr exec, c); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ELL_SPMV_KERNEL); +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ELL_SPMV_KERNEL); -template +template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Ell *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense *alpha, + const matrix::Ell *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -253,7 +288,7 @@ void advanced_spmv(std::shared_ptr exec, alpha, beta); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL); diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu new file mode 100644 index 00000000000..6f7bc48cf92 --- /dev/null +++ b/cuda/matrix/fbcsr_kernels.cu @@ -0,0 +1,176 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include + + +#include +#include +#include +#include +#include + + +#include "cuda/base/config.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The fixed-size block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void convert_row_ptrs_to_idxs(std::shared_ptr exec, + const IndexType *ptrs, size_type num_rows, + IndexType *idxs) GKO_NOT_IMPLEMENTED; + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/matrix/hybrid_kernels.cu b/cuda/matrix/hybrid_kernels.cu index 7b731559672..b699aa52e32 100644 --- a/cuda/matrix/hybrid_kernels.cu +++ b/cuda/matrix/hybrid_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -66,7 +66,7 @@ constexpr int default_block_size = 512; constexpr int warps_in_block = 4; -#include "common/matrix/hybrid_kernels.hpp.inc" +#include "common/cuda_hip/matrix/hybrid_kernels.hpp.inc" template diff --git a/cuda/matrix/sellp_kernels.cu b/cuda/matrix/sellp_kernels.cu index 1f1a978734f..a53ae60be82 100644 --- a/cuda/matrix/sellp_kernels.cu +++ b/cuda/matrix/sellp_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -59,10 +59,10 @@ namespace cuda { namespace sellp { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; -#include "common/matrix/sellp_kernels.hpp.inc" +#include "common/cuda_hip/matrix/sellp_kernels.hpp.inc" template diff --git a/cuda/matrix/sparsity_csr_kernels.cu b/cuda/matrix/sparsity_csr_kernels.cu index 69d2e53fe37..b312e2a7872 100644 --- a/cuda/matrix/sparsity_csr_kernels.cu +++ b/cuda/matrix/sparsity_csr_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/multigrid/amgx_pgm_kernels.cu b/cuda/multigrid/amgx_pgm_kernels.cu new file mode 100644 index 00000000000..8c811a638a5 --- /dev/null +++ b/cuda/multigrid/amgx_pgm_kernels.cu @@ -0,0 +1,178 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/multigrid/amgx_pgm_kernels.hpp" + + +#include + + +#include +#include +#include + + +#include +#include +#include + + +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/csr_builder.hpp" +#include "core/matrix/csr_kernels.hpp" +#include "cuda/base/cusparse_bindings.hpp" +#include "cuda/base/math.hpp" +#include "cuda/base/types.hpp" +#include "cuda/components/atomic.cuh" +#include "cuda/components/reduction.cuh" +#include "cuda/components/thread_ids.cuh" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The AMGX_PGM solver namespace. + * + * @ingroup amgx_pgm + */ +namespace amgx_pgm { + + +constexpr int default_block_size = 512; + + +#include "common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc" + + +template +void match_edge(std::shared_ptr exec, + const Array &strongest_neighbor, + Array &agg) +{ + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + kernel::match_edge_kernel<<>>( + num, strongest_neighbor.get_const_data(), agg.get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); + + +template +void count_unagg(std::shared_ptr exec, + const Array &agg, IndexType *num_unagg) +{ + Array active_agg(exec, agg.get_num_elems()); + const dim3 grid(ceildiv(active_agg.get_num_elems(), default_block_size)); + kernel::activate_kernel<<>>( + active_agg.get_num_elems(), agg.get_const_data(), + active_agg.get_data()); + *num_unagg = reduce_add_array(exec, active_agg.get_num_elems(), + active_agg.get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); + + +template +void renumber(std::shared_ptr exec, Array &agg, + IndexType *num_agg) +{ + const auto num = agg.get_num_elems(); + Array agg_map(exec, num + 1); + const dim3 grid(ceildiv(num, default_block_size)); + kernel::fill_agg_kernel<<>>( + num, agg.get_const_data(), agg_map.get_data()); + components::prefix_sum(exec, agg_map.get_data(), agg_map.get_num_elems()); + kernel::renumber_kernel<<>>( + num, agg_map.get_const_data(), agg.get_data()); + *num_agg = exec->copy_val_to_host(agg_map.get_const_data() + num); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); + + +template +void find_strongest_neighbor( + std::shared_ptr exec, + const matrix::Csr *weight_mtx, + const matrix::Diagonal *diag, Array &agg, + Array &strongest_neighbor) +{ + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + kernel::find_strongest_neighbor_kernel<<>>( + num, weight_mtx->get_const_row_ptrs(), weight_mtx->get_const_col_idxs(), + weight_mtx->get_const_values(), diag->get_const_values(), + agg.get_data(), strongest_neighbor.get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR); + +template +void assign_to_exist_agg(std::shared_ptr exec, + const matrix::Csr *weight_mtx, + const matrix::Diagonal *diag, + Array &agg, + Array &intermediate_agg) +{ + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + if (intermediate_agg.get_num_elems() > 0) { + // determinstic kernel + kernel::assign_to_exist_agg_kernel<<>>( + num, weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), weight_mtx->get_const_values(), + diag->get_const_values(), agg.get_const_data(), + intermediate_agg.get_data()); + // Copy the intermediate_agg to agg + agg = intermediate_agg; + } else { + // undeterminstic kernel + kernel::assign_to_exist_agg_kernel<<>>( + num, weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), weight_mtx->get_const_values(), + diag->get_const_values(), agg.get_data()); + } +} + +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); + + +} // namespace amgx_pgm +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/preconditioner/isai_kernels.cu b/cuda/preconditioner/isai_kernels.cu index 858c82584d5..87732ea6dad 100644 --- a/cuda/preconditioner/isai_kernels.cu +++ b/cuda/preconditioner/isai_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -66,7 +66,9 @@ constexpr int subwarps_per_block{2}; constexpr int default_block_size{subwarps_per_block * subwarp_size}; -#include "common/preconditioner/isai_kernels.hpp.inc" +#include "common/cuda_hip/components/atomic.hpp.inc" +#include "common/cuda_hip/components/warp_blas.hpp.inc" +#include "common/cuda_hip/preconditioner/isai_kernels.hpp.inc" template @@ -107,6 +109,32 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ISAI_GENERATE_TRI_INVERSE_KERNEL); +template +void generate_general_inverse(std::shared_ptr exec, + const matrix::Csr *input, + matrix::Csr *inverse, + IndexType *excess_rhs_ptrs, + IndexType *excess_nz_ptrs, bool spd) +{ + const auto num_rows = input->get_size()[0]; + + const dim3 block(default_block_size, 1, 1); + const dim3 grid(ceildiv(num_rows, block.x / subwarp_size), 1, 1); + kernel::generate_general_inverse + <<>>( + static_cast(num_rows), input->get_const_row_ptrs(), + input->get_const_col_idxs(), + as_cuda_type(input->get_const_values()), inverse->get_row_ptrs(), + inverse->get_col_idxs(), as_cuda_type(inverse->get_values()), + excess_rhs_ptrs, excess_nz_ptrs, spd); + components::prefix_sum(exec, excess_rhs_ptrs, num_rows + 1); + components::prefix_sum(exec, excess_nz_ptrs, num_rows + 1); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_GENERATE_GENERAL_INVERSE_KERNEL); + + template void generate_excess_system(std::shared_ptr exec, const matrix::Csr *input, @@ -114,12 +142,13 @@ void generate_excess_system(std::shared_ptr exec, const IndexType *excess_rhs_ptrs, const IndexType *excess_nz_ptrs, matrix::Csr *excess_system, - matrix::Dense *excess_rhs) + matrix::Dense *excess_rhs, + size_type e_start, size_type e_end) { const auto num_rows = input->get_size()[0]; const dim3 block(default_block_size, 1, 1); - const dim3 grid(ceildiv(num_rows, block.x / subwarp_size), 1, 1); + const dim3 grid(ceildiv(e_end - e_start, block.x / subwarp_size), 1, 1); kernel::generate_excess_system<<>>( static_cast(num_rows), input->get_const_row_ptrs(), input->get_const_col_idxs(), as_cuda_type(input->get_const_values()), @@ -127,27 +156,45 @@ void generate_excess_system(std::shared_ptr exec, excess_rhs_ptrs, excess_nz_ptrs, excess_system->get_row_ptrs(), excess_system->get_col_idxs(), as_cuda_type(excess_system->get_values()), - as_cuda_type(excess_rhs->get_values())); + as_cuda_type(excess_rhs->get_values()), e_start, e_end); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ISAI_GENERATE_EXCESS_SYSTEM_KERNEL); +template +void scale_excess_solution(std::shared_ptr, + const IndexType *excess_block_ptrs, + matrix::Dense *excess_solution, + size_type e_start, size_type e_end) +{ + const dim3 block(default_block_size, 1, 1); + const dim3 grid(ceildiv(e_end - e_start, block.x / subwarp_size), 1, 1); + kernel::scale_excess_solution<<>>( + excess_block_ptrs, as_cuda_type(excess_solution->get_values()), e_start, + e_end); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL); + + template void scatter_excess_solution(std::shared_ptr exec, const IndexType *excess_rhs_ptrs, const matrix::Dense *excess_solution, - matrix::Csr *inverse) + matrix::Csr *inverse, + size_type e_start, size_type e_end) { const auto num_rows = inverse->get_size()[0]; const dim3 block(default_block_size, 1, 1); - const dim3 grid(ceildiv(num_rows, block.x / subwarp_size), 1, 1); + const dim3 grid(ceildiv(e_end - e_start, block.x / subwarp_size), 1, 1); kernel::copy_excess_solution<<>>( static_cast(num_rows), inverse->get_const_row_ptrs(), excess_rhs_ptrs, as_cuda_type(excess_solution->get_const_values()), - as_cuda_type(inverse->get_values())); + as_cuda_type(inverse->get_values()), e_start, e_end); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/cuda/preconditioner/jacobi_advanced_apply_kernel.cu b/cuda/preconditioner/jacobi_advanced_apply_kernel.cu index 2dc9aeaf23b..7ed975868ef 100644 --- a/cuda/preconditioner/jacobi_advanced_apply_kernel.cu +++ b/cuda/preconditioner/jacobi_advanced_apply_kernel.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -60,7 +60,7 @@ namespace cuda { namespace jacobi { -#include "common/preconditioner/jacobi_advanced_apply_kernel.hpp.inc" +#include "common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc" namespace { diff --git a/cuda/preconditioner/jacobi_common.hpp b/cuda/preconditioner/jacobi_common.hpp index 3c76bb78388..21920f4fe63 100644 --- a/cuda/preconditioner/jacobi_common.hpp +++ b/cuda/preconditioner/jacobi_common.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/preconditioner/jacobi_generate_kernel.cu b/cuda/preconditioner/jacobi_generate_kernel.cu index 0f1c52e9621..b897bdc90d9 100644 --- a/cuda/preconditioner/jacobi_generate_kernel.cu +++ b/cuda/preconditioner/jacobi_generate_kernel.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -63,7 +63,7 @@ namespace cuda { namespace jacobi { -#include "common/preconditioner/jacobi_generate_kernel.hpp.inc" +#include "common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc" namespace { diff --git a/cuda/preconditioner/jacobi_kernels.cu b/cuda/preconditioner/jacobi_kernels.cu index e0662499762..77303eb16d7 100644 --- a/cuda/preconditioner/jacobi_kernels.cu +++ b/cuda/preconditioner/jacobi_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -66,11 +66,11 @@ constexpr int default_num_warps = 32; constexpr int default_grid_size = 32 * 32 * 128; -#include "common/preconditioner/jacobi_kernels.hpp.inc" +#include "common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc" template -size_type find_natural_blocks(std::shared_ptr exec, +size_type find_natural_blocks(std::shared_ptr exec, const matrix::Csr *mtx, int32 max_block_size, IndexType *__restrict__ block_ptrs) @@ -95,7 +95,7 @@ size_type find_natural_blocks(std::shared_ptr exec, template inline size_type agglomerate_supervariables( - std::shared_ptr exec, int32 max_block_size, + std::shared_ptr exec, int32 max_block_size, size_type num_natural_blocks, IndexType *block_ptrs) { Array nums(exec, 1); @@ -111,7 +111,7 @@ inline size_type agglomerate_supervariables( } // namespace -void initialize_precisions(std::shared_ptr exec, +void initialize_precisions(std::shared_ptr exec, const Array &source, Array &precisions) { @@ -126,7 +126,7 @@ void initialize_precisions(std::shared_ptr exec, template -void find_blocks(std::shared_ptr exec, +void find_blocks(std::shared_ptr exec, const matrix::Csr *system_matrix, uint32 max_block_size, size_type &num_blocks, Array &block_pointers) @@ -230,7 +230,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense( - std::shared_ptr exec, size_type num_blocks, + std::shared_ptr exec, size_type num_blocks, const Array &block_precisions, const Array &block_pointers, const Array &blocks, const preconditioner::block_interleaved_storage_scheme diff --git a/cuda/preconditioner/jacobi_simple_apply_kernel.cu b/cuda/preconditioner/jacobi_simple_apply_kernel.cu index fb6721bbdca..216af08cca7 100644 --- a/cuda/preconditioner/jacobi_simple_apply_kernel.cu +++ b/cuda/preconditioner/jacobi_simple_apply_kernel.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -60,7 +60,7 @@ namespace cuda { namespace jacobi { -#include "common/preconditioner/jacobi_simple_apply_kernel.hpp.inc" +#include "common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc" namespace { diff --git a/cuda/reorder/rcm_kernels.cu b/cuda/reorder/rcm_kernels.cu new file mode 100644 index 00000000000..a970b6e86b2 --- /dev/null +++ b/cuda/reorder/rcm_kernels.cu @@ -0,0 +1,83 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/reorder/rcm_kernels.hpp" + + +#include +#include +#include +#include +#include +#include + + +#include "cuda/base/math.hpp" +#include "cuda/base/types.hpp" +#include "cuda/components/prefix_sum.cuh" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The reordering namespace. + * + * @ingroup reorder + */ +namespace rcm { + + +template +void get_degree_of_nodes(std::shared_ptr exec, + const IndexType num_vertices, + const IndexType *const row_ptrs, + IndexType *const degrees) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); + + +template +void get_permutation( + std::shared_ptr exec, const IndexType num_vertices, + const IndexType *const row_ptrs, const IndexType *const col_idxs, + const IndexType *const degrees, IndexType *const permutation, + IndexType *const inv_permutation, + const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); + + +} // namespace rcm +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/solver/bicg_kernels.cu b/cuda/solver/bicg_kernels.cu deleted file mode 100644 index 175198d26d9..00000000000 --- a/cuda/solver/bicg_kernels.cu +++ /dev/null @@ -1,144 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include "core/solver/bicg_kernels.hpp" - - -#include -#include - - -#include "cuda/base/math.hpp" -#include "cuda/base/types.hpp" -#include "cuda/components/thread_ids.cuh" - - -namespace gko { -namespace kernels { -namespace cuda { -/** - * @brief The BICG solver namespace. - * - * @ingroup bicg - */ -namespace bicg { - - -constexpr int default_block_size = 512; - - -#include "common/solver/bicg_kernels.hpp.inc" - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *r2, - matrix::Dense *z2, matrix::Dense *p2, - matrix::Dense *q2, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(b->get_size()[0] * b->get_stride(), block_size.x), 1, 1); - - initialize_kernel<<>>( - b->get_size()[0], b->get_size()[1], b->get_stride(), - as_cuda_type(b->get_const_values()), as_cuda_type(r->get_values()), - as_cuda_type(z->get_values()), as_cuda_type(p->get_values()), - as_cuda_type(q->get_values()), as_cuda_type(r2->get_values()), - as_cuda_type(z2->get_values()), as_cuda_type(p2->get_values()), - as_cuda_type(q2->get_values()), as_cuda_type(prev_rho->get_values()), - as_cuda_type(rho->get_values()), as_cuda_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_INITIALIZE_KERNEL); - - -template -void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - matrix::Dense *p2, const matrix::Dense *z2, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - step_1_kernel<<>>( - p->get_size()[0], p->get_size()[1], p->get_stride(), - as_cuda_type(p->get_values()), as_cuda_type(z->get_const_values()), - as_cuda_type(p2->get_values()), as_cuda_type(z2->get_const_values()), - as_cuda_type(rho->get_const_values()), - as_cuda_type(prev_rho->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_1_KERNEL); - - -template -void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - matrix::Dense *r2, const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *q2, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - step_2_kernel<<>>( - p->get_size()[0], p->get_size()[1], p->get_stride(), x->get_stride(), - as_cuda_type(x->get_values()), as_cuda_type(r->get_values()), - as_cuda_type(r2->get_values()), as_cuda_type(p->get_const_values()), - as_cuda_type(q->get_const_values()), - as_cuda_type(q2->get_const_values()), - as_cuda_type(beta->get_const_values()), - as_cuda_type(rho->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_2_KERNEL); - - -} // namespace bicg -} // namespace cuda -} // namespace kernels -} // namespace gko diff --git a/cuda/solver/bicgstab_kernels.cu b/cuda/solver/bicgstab_kernels.cu deleted file mode 100644 index a0e5376cf69..00000000000 --- a/cuda/solver/bicgstab_kernels.cu +++ /dev/null @@ -1,201 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include "core/solver/bicgstab_kernels.hpp" - - -#include -#include - - -#include "cuda/base/math.hpp" -#include "cuda/base/types.hpp" -#include "cuda/components/thread_ids.cuh" - - -namespace gko { -namespace kernels { -namespace cuda { -/** - * @brief The BICGSTAB solver namespace. - * - * @ingroup bicgstab - */ -namespace bicgstab { - - -constexpr int default_block_size = 512; - - -#include "common/solver/bicgstab_kernels.hpp.inc" - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *rr, matrix::Dense *y, - matrix::Dense *s, matrix::Dense *t, - matrix::Dense *z, matrix::Dense *v, - matrix::Dense *p, matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *alpha, - matrix::Dense *beta, matrix::Dense *gamma, - matrix::Dense *omega, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(b->get_size()[0] * b->get_stride(), block_size.x), 1, 1); - - initialize_kernel<<>>( - b->get_size()[0], b->get_size()[1], b->get_stride(), - as_cuda_type(b->get_const_values()), as_cuda_type(r->get_values()), - as_cuda_type(rr->get_values()), as_cuda_type(y->get_values()), - as_cuda_type(s->get_values()), as_cuda_type(t->get_values()), - as_cuda_type(z->get_values()), as_cuda_type(v->get_values()), - as_cuda_type(p->get_values()), as_cuda_type(prev_rho->get_values()), - as_cuda_type(rho->get_values()), as_cuda_type(alpha->get_values()), - as_cuda_type(beta->get_values()), as_cuda_type(gamma->get_values()), - as_cuda_type(omega->get_values()), - as_cuda_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_INITIALIZE_KERNEL); - - -template -void step_1(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *p, - const matrix::Dense *v, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const matrix::Dense *alpha, - const matrix::Dense *omega, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(r->get_size()[0] * r->get_stride(), block_size.x), 1, 1); - - step_1_kernel<<>>( - r->get_size()[0], r->get_size()[1], r->get_stride(), - as_cuda_type(r->get_const_values()), as_cuda_type(p->get_values()), - as_cuda_type(v->get_const_values()), - as_cuda_type(rho->get_const_values()), - as_cuda_type(prev_rho->get_const_values()), - as_cuda_type(alpha->get_const_values()), - as_cuda_type(omega->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_1_KERNEL); - - -template -void step_2(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *s, - const matrix::Dense *v, - const matrix::Dense *rho, - matrix::Dense *alpha, - const matrix::Dense *beta, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(r->get_size()[0] * r->get_stride(), block_size.x), 1, 1); - - step_2_kernel<<>>( - r->get_size()[0], r->get_size()[1], r->get_stride(), - as_cuda_type(r->get_const_values()), as_cuda_type(s->get_values()), - as_cuda_type(v->get_const_values()), - as_cuda_type(rho->get_const_values()), - as_cuda_type(alpha->get_values()), - as_cuda_type(beta->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_2_KERNEL); - - -template -void step_3( - std::shared_ptr exec, matrix::Dense *x, - matrix::Dense *r, const matrix::Dense *s, - const matrix::Dense *t, const matrix::Dense *y, - const matrix::Dense *z, const matrix::Dense *alpha, - const matrix::Dense *beta, const matrix::Dense *gamma, - matrix::Dense *omega, const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(r->get_size()[0] * r->get_stride(), block_size.x), 1, 1); - - step_3_kernel<<>>( - r->get_size()[0], r->get_size()[1], r->get_stride(), x->get_stride(), - as_cuda_type(x->get_values()), as_cuda_type(r->get_values()), - as_cuda_type(s->get_const_values()), - as_cuda_type(t->get_const_values()), - as_cuda_type(y->get_const_values()), - as_cuda_type(z->get_const_values()), - as_cuda_type(alpha->get_const_values()), - as_cuda_type(beta->get_const_values()), - as_cuda_type(gamma->get_const_values()), - as_cuda_type(omega->get_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_3_KERNEL); - - -template -void finalize(std::shared_ptr exec, - matrix::Dense *x, const matrix::Dense *y, - const matrix::Dense *alpha, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(y->get_size()[0] * y->get_stride(), block_size.x), 1, 1); - - finalize_kernel<<>>( - y->get_size()[0], y->get_size()[1], y->get_stride(), x->get_stride(), - as_cuda_type(x->get_values()), as_cuda_type(y->get_const_values()), - as_cuda_type(alpha->get_const_values()), - as_cuda_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_FINALIZE_KERNEL); - - -} // namespace bicgstab -} // namespace cuda -} // namespace kernels -} // namespace gko diff --git a/cuda/solver/cb_gmres_kernels.cu b/cuda/solver/cb_gmres_kernels.cu new file mode 100644 index 00000000000..f0c4cb61aa6 --- /dev/null +++ b/cuda/solver/cb_gmres_kernels.cu @@ -0,0 +1,528 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/cb_gmres_kernels.hpp" + + +#include + + +#include +#include +#include +#include + + +#include "accessor/range.hpp" +#include "accessor/reduced_row_major.hpp" +#include "accessor/scaled_reduced_row_major.hpp" +#include "core/components/fill_array.hpp" +#include "core/matrix/dense_kernels.hpp" +#include "core/solver/cb_gmres_accessor.hpp" +#include "cuda/base/config.hpp" +#include "cuda/base/math.hpp" +#include "cuda/base/types.hpp" +#include "cuda/components/atomic.cuh" +#include "cuda/components/cooperative_groups.cuh" +#include "cuda/components/reduction.cuh" +#include "cuda/components/thread_ids.cuh" +#include "cuda/components/uninitialized_array.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The CB_GMRES solver namespace. + * + * @ingroup cb_gmres + */ +namespace cb_gmres { + + +constexpr int default_block_size = 512; +constexpr int default_dot_dim = 32; +constexpr int default_dot_size = default_dot_dim * default_dot_dim; + + +#include "common/cuda_hip/solver/cb_gmres_kernels.hpp.inc" + + +// Specialization, so the Accessor can use the same function as regular pointers +template +GKO_INLINE auto as_cuda_accessor( + const acc::range> &acc) +{ + return acc::range< + acc::reduced_row_major, cuda_type>>( + acc.get_accessor().get_size(), + as_cuda_type(acc.get_accessor().get_stored_data()), + acc.get_accessor().get_stride()); +} + +template +GKO_INLINE auto as_cuda_accessor( + const acc::range> + &acc) +{ + return acc::range, + cuda_type, mask>>( + acc.get_accessor().get_size(), + as_cuda_type(acc.get_accessor().get_stored_data()), + acc.get_accessor().get_storage_stride(), + as_cuda_type(acc.get_accessor().get_scalar()), + acc.get_accessor().get_scalar_stride()); +} + + +template +void zero_matrix(size_type m, size_type n, size_type stride, ValueType *array) +{ + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); + zero_matrix_kernel<<>>(m, n, stride, + as_cuda_type(array)); +} + + +template +void initialize_1(std::shared_ptr exec, + const matrix::Dense *b, + matrix::Dense *residual, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + Array *stop_status, size_type krylov_dim) +{ + const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), + krylov_dim * b->get_size()[1]); + const dim3 grid_dim(ceildiv(num_threads, default_block_size), 1, 1); + const dim3 block_dim(default_block_size, 1, 1); + constexpr auto block_size = default_block_size; + + initialize_1_kernel<<>>( + b->get_size()[0], b->get_size()[1], krylov_dim, + as_cuda_type(b->get_const_values()), b->get_stride(), + as_cuda_type(residual->get_values()), residual->get_stride(), + as_cuda_type(givens_sin->get_values()), givens_sin->get_stride(), + as_cuda_type(givens_cos->get_values()), givens_cos->get_stride(), + as_cuda_type(stop_status->get_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL); + + +template +void initialize_2(std::shared_ptr exec, + const matrix::Dense *residual, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + matrix::Dense> *arnoldi_norm, + Accessor3d krylov_bases, + matrix::Dense *next_krylov_basis, + Array *final_iter_nums, size_type krylov_dim) +{ + constexpr bool use_scalar = + gko::cb_gmres::detail::has_3d_scaled_accessor::value; + const auto num_rows = residual->get_size()[0]; + const auto num_rhs = residual->get_size()[1]; + const auto krylov_stride = + gko::cb_gmres::helper_functions_accessor::get_stride( + krylov_bases); + const dim3 grid_dim_1( + ceildiv((krylov_dim + 1) * krylov_stride[0], default_block_size), 1, 1); + const dim3 block_dim(default_block_size, 1, 1); + constexpr auto block_size = default_block_size; + const auto stride_arnoldi = arnoldi_norm->get_stride(); + + initialize_2_1_kernel<<>>( + residual->get_size()[0], residual->get_size()[1], krylov_dim, + as_cuda_accessor(krylov_bases), + as_cuda_type(residual_norm_collection->get_values()), + residual_norm_collection->get_stride()); + kernels::cuda::dense::compute_norm2(exec, residual, residual_norm); + + if (use_scalar) { + components::fill_array(exec, + arnoldi_norm->get_values() + 2 * stride_arnoldi, + num_rhs, zero>()); + const dim3 grid_size_nrm(ceildiv(num_rhs, default_dot_dim), + exec->get_num_multiprocessor() * 2); + const dim3 block_size_nrm(default_dot_dim, default_dot_dim); + multinorminf_without_stop_kernel<<>>( + num_rows, num_rhs, as_cuda_type(residual->get_const_values()), + residual->get_stride(), + as_cuda_type(arnoldi_norm->get_values() + 2 * stride_arnoldi), 0); + } + + if (gko::cb_gmres::detail::has_3d_scaled_accessor::value) { + set_scalar_kernel + <<>>( + num_rhs, krylov_dim + 1, + as_cuda_type(residual_norm->get_const_values()), + residual_norm->get_stride(), + as_cuda_type(arnoldi_norm->get_const_values() + + 2 * stride_arnoldi), + stride_arnoldi, as_cuda_accessor(krylov_bases)); + } + + const dim3 grid_dim_2( + ceildiv(num_rows * krylov_stride[1], default_block_size), 1, 1); + initialize_2_2_kernel<<>>( + residual->get_size()[0], residual->get_size()[1], + as_cuda_type(residual->get_const_values()), residual->get_stride(), + as_cuda_type(residual_norm->get_const_values()), + as_cuda_type(residual_norm_collection->get_values()), + as_cuda_accessor(krylov_bases), + as_cuda_type(next_krylov_basis->get_values()), + next_krylov_basis->get_stride(), + as_cuda_type(final_iter_nums->get_data())); +} + +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE( + GKO_DECLARE_CB_GMRES_INITIALIZE_2_KERNEL); + + +template +void finish_arnoldi_CGS(std::shared_ptr exec, + matrix::Dense *next_krylov_basis, + Accessor3dim krylov_bases, + matrix::Dense *hessenberg_iter, + matrix::Dense *buffer_iter, + matrix::Dense> *arnoldi_norm, + size_type iter, const stopping_status *stop_status, + stopping_status *reorth_status, + Array *num_reorth) +{ + using non_complex = remove_complex; + // optimization parameter + constexpr int singledot_block_size = default_dot_dim; + constexpr bool use_scalar = + gko::cb_gmres::detail::has_3d_scaled_accessor::value; + const auto stride_next_krylov = next_krylov_basis->get_stride(); + const auto stride_hessenberg = hessenberg_iter->get_stride(); + const auto stride_buffer = buffer_iter->get_stride(); + const auto stride_arnoldi = arnoldi_norm->get_stride(); + const auto dim_size = next_krylov_basis->get_size(); + const dim3 grid_size(ceildiv(dim_size[1], default_dot_dim), + exec->get_num_multiprocessor() * 2); + const dim3 grid_size_num_iters(ceildiv(dim_size[1], default_dot_dim), + exec->get_num_multiprocessor() * 2, + iter + 1); + const dim3 block_size(default_dot_dim, default_dot_dim); + // Note: having iter first (instead of row_idx information) is likely + // beneficial for avoiding atomic_add conflicts, but that needs + // further investigation. + const dim3 grid_size_iters_single(exec->get_num_multiprocessor() * 2, + iter + 1); + const dim3 block_size_iters_single(singledot_block_size); + size_type num_reorth_host; + + components::fill_array(exec, arnoldi_norm->get_values(), dim_size[1], + zero()); + multinorm2_kernel<<>>( + dim_size[0], dim_size[1], + as_cuda_type(next_krylov_basis->get_const_values()), stride_next_krylov, + as_cuda_type(arnoldi_norm->get_values()), as_cuda_type(stop_status)); + zero_matrix(iter + 1, dim_size[1], stride_hessenberg, + hessenberg_iter->get_values()); + if (dim_size[1] > 1) { + multidot_kernel<<>>( + dim_size[0], dim_size[1], + as_cuda_type(next_krylov_basis->get_const_values()), + stride_next_krylov, as_cuda_accessor(krylov_bases), + as_cuda_type(hessenberg_iter->get_values()), stride_hessenberg, + as_cuda_type(stop_status)); + } else { + singledot_kernel + <<>>( + dim_size[0], + as_cuda_type(next_krylov_basis->get_const_values()), + stride_next_krylov, as_cuda_accessor(krylov_bases), + as_cuda_type(hessenberg_iter->get_values()), stride_hessenberg, + as_cuda_type(stop_status)); + } + // for i in 1:iter + // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) + // end + update_next_krylov_kernel + <<>>( + iter + 1, dim_size[0], dim_size[1], + as_cuda_type(next_krylov_basis->get_values()), stride_next_krylov, + as_cuda_accessor(krylov_bases), + as_cuda_type(hessenberg_iter->get_const_values()), + stride_hessenberg, as_cuda_type(stop_status)); + + // for i in 1:iter + // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) + // end + components::fill_array(exec, arnoldi_norm->get_values() + stride_arnoldi, + dim_size[1], zero()); + if (use_scalar) { + components::fill_array(exec, + arnoldi_norm->get_values() + 2 * stride_arnoldi, + dim_size[1], zero()); + } + multinorm2_inf_kernel<<>>( + dim_size[0], dim_size[1], + as_cuda_type(next_krylov_basis->get_const_values()), stride_next_krylov, + as_cuda_type(arnoldi_norm->get_values() + stride_arnoldi), + as_cuda_type(arnoldi_norm->get_values() + 2 * stride_arnoldi), + as_cuda_type(stop_status)); + // nrmN = norm(next_krylov_basis) + components::fill_array(exec, num_reorth->get_data(), 1, zero()); + check_arnoldi_norms + <<>>( + dim_size[1], as_cuda_type(arnoldi_norm->get_values()), + stride_arnoldi, as_cuda_type(hessenberg_iter->get_values()), + stride_hessenberg, iter + 1, as_cuda_accessor(krylov_bases), + as_cuda_type(stop_status), as_cuda_type(reorth_status), + as_cuda_type(num_reorth->get_data())); + num_reorth_host = exec->copy_val_to_host(num_reorth->get_const_data()); + // num_reorth_host := number of next_krylov vector to be reorthogonalization + for (size_type l = 1; (num_reorth_host > 0) && (l < 3); l++) { + zero_matrix(iter + 1, dim_size[1], stride_buffer, + buffer_iter->get_values()); + if (dim_size[1] > 1) { + multidot_kernel + <<>>( + dim_size[0], dim_size[1], + as_cuda_type(next_krylov_basis->get_const_values()), + stride_next_krylov, as_cuda_accessor(krylov_bases), + as_cuda_type(buffer_iter->get_values()), stride_buffer, + as_cuda_type(stop_status)); + } else { + singledot_kernel + <<>>( + dim_size[0], + as_cuda_type(next_krylov_basis->get_const_values()), + stride_next_krylov, as_cuda_accessor(krylov_bases), + as_cuda_type(buffer_iter->get_values()), stride_buffer, + as_cuda_type(stop_status)); + } + // for i in 1:iter + // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) + // end + update_next_krylov_and_add_kernel + <<>>( + iter + 1, dim_size[0], dim_size[1], + as_cuda_type(next_krylov_basis->get_values()), + stride_next_krylov, as_cuda_accessor(krylov_bases), + as_cuda_type(hessenberg_iter->get_values()), stride_hessenberg, + as_cuda_type(buffer_iter->get_const_values()), stride_buffer, + as_cuda_type(stop_status), as_cuda_type(reorth_status)); + // for i in 1:iter + // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) + // end + components::fill_array(exec, + arnoldi_norm->get_values() + stride_arnoldi, + dim_size[1], zero()); + if (use_scalar) { + components::fill_array( + exec, arnoldi_norm->get_values() + 2 * stride_arnoldi, + dim_size[1], zero()); + } + multinorm2_inf_kernel<<>>( + dim_size[0], dim_size[1], + as_cuda_type(next_krylov_basis->get_const_values()), + stride_next_krylov, + as_cuda_type(arnoldi_norm->get_values() + stride_arnoldi), + as_cuda_type(arnoldi_norm->get_values() + 2 * stride_arnoldi), + as_cuda_type(stop_status)); + // nrmN = norm(next_krylov_basis) + components::fill_array(exec, num_reorth->get_data(), 1, + zero()); + check_arnoldi_norms + <<>>( + dim_size[1], as_cuda_type(arnoldi_norm->get_values()), + stride_arnoldi, as_cuda_type(hessenberg_iter->get_values()), + stride_hessenberg, iter + 1, as_cuda_accessor(krylov_bases), + as_cuda_type(stop_status), as_cuda_type(reorth_status), + as_cuda_type(num_reorth->get_data())); + num_reorth_host = exec->copy_val_to_host(num_reorth->get_const_data()); + } + + update_krylov_next_krylov_kernel + <<>>( + iter, dim_size[0], dim_size[1], + as_cuda_type(next_krylov_basis->get_values()), stride_next_krylov, + as_cuda_accessor(krylov_bases), + as_cuda_type(hessenberg_iter->get_const_values()), + stride_hessenberg, as_cuda_type(stop_status)); + // next_krylov_basis /= hessenberg(iter, iter + 1) + // krylov_bases(:, iter + 1) = next_krylov_basis + // End of arnoldi +} + +template +void givens_rotation(std::shared_ptr exec, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + matrix::Dense *hessenberg_iter, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + size_type iter, const Array *stop_status) +{ + // TODO: tune block_size for optimal performance + constexpr auto block_size = default_block_size; + const auto num_cols = hessenberg_iter->get_size()[1]; + const dim3 block_dim{block_size, 1, 1}; + const dim3 grid_dim{ + static_cast(ceildiv(num_cols, block_size)), 1, 1}; + + givens_rotation_kernel<<>>( + hessenberg_iter->get_size()[0], hessenberg_iter->get_size()[1], iter, + as_cuda_type(hessenberg_iter->get_values()), + hessenberg_iter->get_stride(), as_cuda_type(givens_sin->get_values()), + givens_sin->get_stride(), as_cuda_type(givens_cos->get_values()), + givens_cos->get_stride(), as_cuda_type(residual_norm->get_values()), + as_cuda_type(residual_norm_collection->get_values()), + residual_norm_collection->get_stride(), + as_cuda_type(stop_status->get_const_data())); +} + + +template +void step_1(std::shared_ptr exec, + matrix::Dense *next_krylov_basis, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + Accessor3d krylov_bases, matrix::Dense *hessenberg_iter, + matrix::Dense *buffer_iter, + matrix::Dense> *arnoldi_norm, + size_type iter, Array *final_iter_nums, + const Array *stop_status, + Array *reorth_status, Array *num_reorth) +{ + increase_final_iteration_numbers_kernel<<< + static_cast( + ceildiv(final_iter_nums->get_num_elems(), default_block_size)), + default_block_size>>>(as_cuda_type(final_iter_nums->get_data()), + as_cuda_type(stop_status->get_const_data()), + final_iter_nums->get_num_elems()); + finish_arnoldi_CGS(exec, next_krylov_basis, krylov_bases, hessenberg_iter, + buffer_iter, arnoldi_norm, iter, + stop_status->get_const_data(), reorth_status->get_data(), + num_reorth); + givens_rotation(exec, givens_sin, givens_cos, hessenberg_iter, + residual_norm, residual_norm_collection, iter, stop_status); +} + +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_STEP_1_KERNEL); + + +template +void solve_upper_triangular( + const matrix::Dense *residual_norm_collection, + const matrix::Dense *hessenberg, matrix::Dense *y, + const Array *final_iter_nums) +{ + // TODO: tune block_size for optimal performance + constexpr auto block_size = default_block_size; + const auto num_rhs = residual_norm_collection->get_size()[1]; + const dim3 block_dim{block_size, 1, 1}; + const dim3 grid_dim{static_cast(ceildiv(num_rhs, block_size)), + 1, 1}; + + solve_upper_triangular_kernel<<>>( + hessenberg->get_size()[1], num_rhs, + as_cuda_type(residual_norm_collection->get_const_values()), + residual_norm_collection->get_stride(), + as_cuda_type(hessenberg->get_const_values()), hessenberg->get_stride(), + as_cuda_type(y->get_values()), y->get_stride(), + as_cuda_type(final_iter_nums->get_const_data())); +} + + +template +void calculate_qy(ConstAccessor3d krylov_bases, size_type num_krylov_bases, + const matrix::Dense *y, + matrix::Dense *before_preconditioner, + const Array *final_iter_nums) +{ + const auto num_rows = before_preconditioner->get_size()[0]; + const auto num_cols = before_preconditioner->get_size()[1]; + const auto stride_before_preconditioner = + before_preconditioner->get_stride(); + + constexpr auto block_size = default_block_size; + const dim3 grid_dim{ + static_cast( + ceildiv(num_rows * stride_before_preconditioner, block_size)), + 1, 1}; + const dim3 block_dim{block_size, 1, 1}; + + + calculate_Qy_kernel<<>>( + num_rows, num_cols, as_cuda_accessor(krylov_bases), + as_cuda_type(y->get_const_values()), y->get_stride(), + as_cuda_type(before_preconditioner->get_values()), + stride_before_preconditioner, + as_cuda_type(final_iter_nums->get_const_data())); + // Calculate qy + // before_preconditioner = krylov_bases * y +} + + +template +void step_2(std::shared_ptr exec, + const matrix::Dense *residual_norm_collection, + ConstAccessor3d krylov_bases, + const matrix::Dense *hessenberg, + matrix::Dense *y, + matrix::Dense *before_preconditioner, + const Array *final_iter_nums) +{ + // since hessenberg has dims: iters x iters * num_rhs + // krylov_bases has dims: (iters + 1) x sysmtx[0] x num_rhs + const auto iters = + hessenberg->get_size()[1] / before_preconditioner->get_size()[1]; + const auto num_krylov_bases = iters + 1; + solve_upper_triangular(residual_norm_collection, hessenberg, y, + final_iter_nums); + calculate_qy(krylov_bases, num_krylov_bases, y, before_preconditioner, + final_iter_nums); +} + +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_CONST_TYPE( + GKO_DECLARE_CB_GMRES_STEP_2_KERNEL); + + +} // namespace cb_gmres +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/solver/cg_kernels.cu b/cuda/solver/cg_kernels.cu deleted file mode 100644 index 9adb589a9ea..00000000000 --- a/cuda/solver/cg_kernels.cu +++ /dev/null @@ -1,136 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include "core/solver/cg_kernels.hpp" - - -#include -#include - - -#include "cuda/base/math.hpp" -#include "cuda/base/types.hpp" -#include "cuda/components/thread_ids.cuh" - - -namespace gko { -namespace kernels { -namespace cuda { -/** - * @brief The CG solver namespace. - * - * @ingroup cg - */ -namespace cg { - - -constexpr int default_block_size = 512; - - -#include "common/solver/cg_kernels.hpp.inc" - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *prev_rho, - matrix::Dense *rho, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(b->get_size()[0] * b->get_stride(), block_size.x), 1, 1); - - initialize_kernel<<>>( - b->get_size()[0], b->get_size()[1], b->get_stride(), - as_cuda_type(b->get_const_values()), as_cuda_type(r->get_values()), - as_cuda_type(z->get_values()), as_cuda_type(p->get_values()), - as_cuda_type(q->get_values()), as_cuda_type(prev_rho->get_values()), - as_cuda_type(rho->get_values()), as_cuda_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_INITIALIZE_KERNEL); - - -template -void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - step_1_kernel<<>>( - p->get_size()[0], p->get_size()[1], p->get_stride(), - as_cuda_type(p->get_values()), as_cuda_type(z->get_const_values()), - as_cuda_type(rho->get_const_values()), - as_cuda_type(prev_rho->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_1_KERNEL); - - -template -void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - step_2_kernel<<>>( - p->get_size()[0], p->get_size()[1], p->get_stride(), x->get_stride(), - as_cuda_type(x->get_values()), as_cuda_type(r->get_values()), - as_cuda_type(p->get_const_values()), - as_cuda_type(q->get_const_values()), - as_cuda_type(beta->get_const_values()), - as_cuda_type(rho->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_2_KERNEL); - - -} // namespace cg -} // namespace cuda -} // namespace kernels -} // namespace gko diff --git a/cuda/solver/cgs_kernels.cu b/cuda/solver/cgs_kernels.cu deleted file mode 100644 index 1c1b1af6b48..00000000000 --- a/cuda/solver/cgs_kernels.cu +++ /dev/null @@ -1,169 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include "core/solver/cgs_kernels.hpp" - - -#include -#include - - -#include "cuda/base/math.hpp" -#include "cuda/base/types.hpp" -#include "cuda/components/thread_ids.cuh" - - -namespace gko { -namespace kernels { -namespace cuda { -/** - * @brief The CGS solver namespace. - * - * @ingroup cgs - */ -namespace cgs { - - -constexpr int default_block_size = 512; - - -#include "common/solver/cgs_kernels.hpp.inc" - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *r_tld, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *u, - matrix::Dense *u_hat, - matrix::Dense *v_hat, matrix::Dense *t, - matrix::Dense *alpha, matrix::Dense *beta, - matrix::Dense *gamma, - matrix::Dense *rho_prev, - matrix::Dense *rho, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(b->get_size()[0] * b->get_stride(), block_size.x), 1, 1); - - initialize_kernel<<>>( - b->get_size()[0], b->get_size()[1], b->get_stride(), - as_cuda_type(b->get_const_values()), as_cuda_type(r->get_values()), - as_cuda_type(r_tld->get_values()), as_cuda_type(p->get_values()), - as_cuda_type(q->get_values()), as_cuda_type(u->get_values()), - as_cuda_type(u_hat->get_values()), as_cuda_type(v_hat->get_values()), - as_cuda_type(t->get_values()), as_cuda_type(alpha->get_values()), - as_cuda_type(beta->get_values()), as_cuda_type(gamma->get_values()), - as_cuda_type(rho_prev->get_values()), as_cuda_type(rho->get_values()), - as_cuda_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_INITIALIZE_KERNEL); - - -template -void step_1(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *u, - matrix::Dense *p, const matrix::Dense *q, - matrix::Dense *beta, const matrix::Dense *rho, - const matrix::Dense *rho_prev, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - step_1_kernel<<>>( - p->get_size()[0], p->get_size()[1], p->get_stride(), - as_cuda_type(r->get_const_values()), as_cuda_type(u->get_values()), - as_cuda_type(p->get_values()), as_cuda_type(q->get_const_values()), - as_cuda_type(beta->get_values()), as_cuda_type(rho->get_const_values()), - as_cuda_type(rho_prev->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_1_KERNEL); - - -template -void step_2(std::shared_ptr exec, - const matrix::Dense *u, - const matrix::Dense *v_hat, matrix::Dense *q, - matrix::Dense *t, matrix::Dense *alpha, - const matrix::Dense *rho, - const matrix::Dense *gamma, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(u->get_size()[0] * u->get_stride(), block_size.x), 1, 1); - - step_2_kernel<<>>( - u->get_size()[0], u->get_size()[1], u->get_stride(), - as_cuda_type(u->get_const_values()), - as_cuda_type(v_hat->get_const_values()), as_cuda_type(q->get_values()), - as_cuda_type(t->get_values()), as_cuda_type(alpha->get_values()), - as_cuda_type(rho->get_const_values()), - as_cuda_type(gamma->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_2_KERNEL); - - -template -void step_3(std::shared_ptr exec, - const matrix::Dense *t, - const matrix::Dense *u_hat, matrix::Dense *r, - matrix::Dense *x, const matrix::Dense *alpha, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(t->get_size()[0] * t->get_stride(), block_size.x), 1, 1); - - step_3_kernel<<>>( - t->get_size()[0], t->get_size()[1], t->get_stride(), x->get_stride(), - as_cuda_type(t->get_const_values()), - as_cuda_type(u_hat->get_const_values()), as_cuda_type(r->get_values()), - as_cuda_type(x->get_values()), as_cuda_type(alpha->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_3_KERNEL); - - -} // namespace cgs -} // namespace cuda -} // namespace kernels -} // namespace gko diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh index f16be5ee0e1..53d6661c45f 100644 --- a/cuda/solver/common_trs_kernels.cuh +++ b/cuda/solver/common_trs_kernels.cuh @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/solver/fcg_kernels.cu b/cuda/solver/fcg_kernels.cu deleted file mode 100644 index ed92ca19120..00000000000 --- a/cuda/solver/fcg_kernels.cu +++ /dev/null @@ -1,139 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include "core/solver/fcg_kernels.hpp" - - -#include -#include - - -#include "cuda/base/math.hpp" -#include "cuda/base/types.hpp" -#include "cuda/components/thread_ids.cuh" - - -namespace gko { -namespace kernels { -namespace cuda { -/** - * @brief The FCG solver namespace. - * - * @ingroup fcg - */ -namespace fcg { - - -constexpr int default_block_size = 512; - - -#include "common/solver/fcg_kernels.hpp.inc" - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *t, - matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *rho_t, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(b->get_size()[0] * b->get_stride(), block_size.x), 1, 1); - - initialize_kernel<<>>( - b->get_size()[0], b->get_size()[1], b->get_stride(), - as_cuda_type(b->get_const_values()), as_cuda_type(r->get_values()), - as_cuda_type(z->get_values()), as_cuda_type(p->get_values()), - as_cuda_type(q->get_values()), as_cuda_type(t->get_values()), - as_cuda_type(prev_rho->get_values()), as_cuda_type(rho->get_values()), - as_cuda_type(rho_t->get_values()), - as_cuda_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_INITIALIZE_KERNEL); - - -template -void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - const matrix::Dense *rho_t, - const matrix::Dense *prev_rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - step_1_kernel<<>>( - p->get_size()[0], p->get_size()[1], p->get_stride(), - as_cuda_type(p->get_values()), as_cuda_type(z->get_const_values()), - as_cuda_type(rho_t->get_const_values()), - as_cuda_type(prev_rho->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_1_KERNEL); - - -template -void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - matrix::Dense *t, const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - step_2_kernel<<>>( - p->get_size()[0], p->get_size()[1], p->get_stride(), x->get_stride(), - as_cuda_type(x->get_values()), as_cuda_type(r->get_values()), - as_cuda_type(t->get_values()), as_cuda_type(p->get_const_values()), - as_cuda_type(q->get_const_values()), - as_cuda_type(beta->get_const_values()), - as_cuda_type(rho->get_const_values()), - as_cuda_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_2_KERNEL); - - -} // namespace fcg -} // namespace cuda -} // namespace kernels -} // namespace gko diff --git a/cuda/solver/gmres_kernels.cu b/cuda/solver/gmres_kernels.cu index 0ddddfc74f7..a401e8fd4d0 100644 --- a/cuda/solver/gmres_kernels.cu +++ b/cuda/solver/gmres_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/fill_array.hpp" +#include "core/matrix/dense_kernels.hpp" #include "cuda/base/config.hpp" #include "cuda/base/cublas_bindings.hpp" #include "cuda/base/math.hpp" @@ -72,7 +73,7 @@ constexpr int default_dot_dim = 32; constexpr int default_dot_size = default_dot_dim * default_dot_dim; -#include "common/solver/gmres_kernels.hpp.inc" +#include "common/cuda_hip/solver/gmres_kernels.hpp.inc" template @@ -118,7 +119,7 @@ void initialize_2(std::shared_ptr exec, const dim3 block_dim(default_block_size, 1, 1); constexpr auto block_size = default_block_size; - residual->compute_norm2(residual_norm); + kernels::cuda::dense::compute_norm2(exec, residual, residual_norm); const dim3 grid_dim_2(ceildiv(num_rows * num_rhs, default_block_size), 1, 1); diff --git a/cuda/solver/idr_kernels.cu b/cuda/solver/idr_kernels.cu new file mode 100644 index 00000000000..e05ddfd5317 --- /dev/null +++ b/cuda/solver/idr_kernels.cu @@ -0,0 +1,356 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/idr_kernels.hpp" + + +#include +#include + + +#include +#include + + +#include "core/components/fill_array.hpp" +#include "cuda/base/config.hpp" +#include "cuda/base/cublas_bindings.hpp" +#include "cuda/base/curand_bindings.hpp" +#include "cuda/base/math.hpp" +#include "cuda/base/types.hpp" +#include "cuda/components/atomic.cuh" +#include "cuda/components/cooperative_groups.cuh" +#include "cuda/components/reduction.cuh" +#include "cuda/components/thread_ids.cuh" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The IDR solver namespace. + * + * @ingroup idr + */ +namespace idr { + + +constexpr int default_block_size = 512; +constexpr int default_dot_dim = 32; +constexpr int default_dot_size = default_dot_dim * default_dot_dim; + + +#include "common/cuda_hip/solver/idr_kernels.hpp.inc" + + +namespace { + + +template +void initialize_m(const size_type nrhs, matrix::Dense *m, + Array *stop_status) +{ + const auto subspace_dim = m->get_size()[0]; + const auto m_stride = m->get_stride(); + + const auto grid_dim = ceildiv(m_stride * subspace_dim, default_block_size); + initialize_m_kernel<<>>( + subspace_dim, nrhs, as_cuda_type(m->get_values()), m_stride, + as_cuda_type(stop_status->get_data())); +} + + +template +void initialize_subspace_vectors(matrix::Dense *subspace_vectors, + bool deterministic) +{ + if (deterministic) { + auto subspace_vectors_data = matrix_data( + subspace_vectors->get_size(), std::normal_distribution<>(0.0, 1.0), + std::ranlux48(15)); + subspace_vectors->read(subspace_vectors_data); + } else { + auto gen = + curand::rand_generator(time(NULL), CURAND_RNG_PSEUDO_DEFAULT); + curand::rand_vector( + gen, + subspace_vectors->get_size()[0] * subspace_vectors->get_stride(), + 0.0, 1.0, subspace_vectors->get_values()); + } +} + + +template +void orthonormalize_subspace_vectors(matrix::Dense *subspace_vectors) +{ + orthonormalize_subspace_vectors_kernel + <<<1, default_block_size>>>( + subspace_vectors->get_size()[0], subspace_vectors->get_size()[1], + as_cuda_type(subspace_vectors->get_values()), + subspace_vectors->get_stride()); +} + + +template +void solve_lower_triangular(const size_type nrhs, + const matrix::Dense *m, + const matrix::Dense *f, + matrix::Dense *c, + const Array *stop_status) +{ + const auto subspace_dim = m->get_size()[0]; + + const auto grid_dim = ceildiv(nrhs, default_block_size); + solve_lower_triangular_kernel<<>>( + subspace_dim, nrhs, as_cuda_type(m->get_const_values()), + m->get_stride(), as_cuda_type(f->get_const_values()), f->get_stride(), + as_cuda_type(c->get_values()), c->get_stride(), + as_cuda_type(stop_status->get_const_data())); +} + + +template +void update_g_and_u(std::shared_ptr exec, + const size_type nrhs, const size_type k, + const matrix::Dense *p, + const matrix::Dense *m, + matrix::Dense *alpha, + matrix::Dense *g, matrix::Dense *g_k, + matrix::Dense *u, + const Array *stop_status) +{ + const auto size = g->get_size()[0]; + const auto p_stride = p->get_stride(); + + const dim3 grid_dim(ceildiv(nrhs, default_dot_dim), + exec->get_num_multiprocessor() * 2); + const dim3 block_dim(default_dot_dim, default_dot_dim); + + for (size_type i = 0; i < k; i++) { + const auto p_i = p->get_const_values() + i * p_stride; + if (nrhs > 1 || is_complex()) { + components::fill_array(exec, alpha->get_values(), nrhs, + zero()); + multidot_kernel<<>>( + size, nrhs, as_cuda_type(p_i), as_cuda_type(g_k->get_values()), + g_k->get_stride(), as_cuda_type(alpha->get_values()), + as_cuda_type(stop_status->get_const_data())); + } else { + cublas::dot(exec->get_cublas_handle(), size, p_i, 1, + g_k->get_values(), g_k->get_stride(), + alpha->get_values()); + } + update_g_k_and_u_kernel + <<get_stride(), default_block_size), + default_block_size>>>( + k, i, size, nrhs, as_cuda_type(alpha->get_const_values()), + as_cuda_type(m->get_const_values()), m->get_stride(), + as_cuda_type(g->get_const_values()), g->get_stride(), + as_cuda_type(g_k->get_values()), g_k->get_stride(), + as_cuda_type(u->get_values()), u->get_stride(), + as_cuda_type(stop_status->get_const_data())); + } + update_g_kernel + <<get_stride(), default_block_size), + default_block_size>>>( + k, size, nrhs, as_cuda_type(g_k->get_const_values()), + g_k->get_stride(), as_cuda_type(g->get_values()), g->get_stride(), + as_cuda_type(stop_status->get_const_data())); +} + + +template +void update_m(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *p, + const matrix::Dense *g_k, matrix::Dense *m, + const Array *stop_status) +{ + const auto size = g_k->get_size()[0]; + const auto subspace_dim = m->get_size()[0]; + const auto p_stride = p->get_stride(); + const auto m_stride = m->get_stride(); + + const dim3 grid_dim(ceildiv(nrhs, default_dot_dim), + exec->get_num_multiprocessor() * 2); + const dim3 block_dim(default_dot_dim, default_dot_dim); + + for (size_type i = k; i < subspace_dim; i++) { + const auto p_i = p->get_const_values() + i * p_stride; + auto m_i = m->get_values() + i * m_stride + k * nrhs; + if (nrhs > 1 || is_complex()) { + components::fill_array(exec, m_i, nrhs, zero()); + multidot_kernel<<>>( + size, nrhs, as_cuda_type(p_i), + as_cuda_type(g_k->get_const_values()), g_k->get_stride(), + as_cuda_type(m_i), as_cuda_type(stop_status->get_const_data())); + } else { + cublas::dot(exec->get_cublas_handle(), size, p_i, 1, + g_k->get_const_values(), g_k->get_stride(), m_i); + } + } +} + + +template +void update_x_r_and_f(std::shared_ptr exec, + const size_type nrhs, const size_type k, + const matrix::Dense *m, + const matrix::Dense *g, + const matrix::Dense *u, + matrix::Dense *f, matrix::Dense *r, + matrix::Dense *x, + const Array *stop_status) +{ + const auto size = x->get_size()[0]; + const auto subspace_dim = m->get_size()[0]; + + const auto grid_dim = ceildiv(size * x->get_stride(), default_block_size); + update_x_r_and_f_kernel<<>>( + k, size, subspace_dim, nrhs, as_cuda_type(m->get_const_values()), + m->get_stride(), as_cuda_type(g->get_const_values()), g->get_stride(), + as_cuda_type(u->get_const_values()), u->get_stride(), + as_cuda_type(f->get_values()), f->get_stride(), + as_cuda_type(r->get_values()), r->get_stride(), + as_cuda_type(x->get_values()), x->get_stride(), + as_cuda_type(stop_status->get_const_data())); + components::fill_array(exec, f->get_values() + k * f->get_stride(), nrhs, + zero()); +} + + +} // namespace + + +template +void initialize(std::shared_ptr exec, const size_type nrhs, + matrix::Dense *m, + matrix::Dense *subspace_vectors, bool deterministic, + Array *stop_status) +{ + initialize_m(nrhs, m, stop_status); + initialize_subspace_vectors(subspace_vectors, deterministic); + orthonormalize_subspace_vectors(subspace_vectors); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); + + +template +void step_1(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *m, + const matrix::Dense *f, + const matrix::Dense *residual, + const matrix::Dense *g, matrix::Dense *c, + matrix::Dense *v, + const Array *stop_status) +{ + solve_lower_triangular(nrhs, m, f, c, stop_status); + + const auto num_rows = v->get_size()[0]; + const auto subspace_dim = m->get_size()[0]; + + const auto grid_dim = ceildiv(nrhs * num_rows, default_block_size); + step_1_kernel<<>>( + k, num_rows, subspace_dim, nrhs, + as_cuda_type(residual->get_const_values()), residual->get_stride(), + as_cuda_type(c->get_const_values()), c->get_stride(), + as_cuda_type(g->get_const_values()), g->get_stride(), + as_cuda_type(v->get_values()), v->get_stride(), + as_cuda_type(stop_status->get_const_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); + + +template +void step_2(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *omega, + const matrix::Dense *preconditioned_vector, + const matrix::Dense *c, matrix::Dense *u, + const Array *stop_status) +{ + const auto num_rows = preconditioned_vector->get_size()[0]; + const auto subspace_dim = u->get_size()[1] / nrhs; + + const auto grid_dim = ceildiv(nrhs * num_rows, default_block_size); + step_2_kernel<<>>( + k, num_rows, subspace_dim, nrhs, + as_cuda_type(omega->get_const_values()), + as_cuda_type(preconditioned_vector->get_const_values()), + preconditioned_vector->get_stride(), + as_cuda_type(c->get_const_values()), c->get_stride(), + as_cuda_type(u->get_values()), u->get_stride(), + as_cuda_type(stop_status->get_const_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); + + +template +void step_3(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *p, + matrix::Dense *g, matrix::Dense *g_k, + matrix::Dense *u, matrix::Dense *m, + matrix::Dense *f, matrix::Dense *alpha, + matrix::Dense *residual, matrix::Dense *x, + const Array *stop_status) +{ + update_g_and_u(exec, nrhs, k, p, m, alpha, g, g_k, u, stop_status); + update_m(exec, nrhs, k, p, g_k, m, stop_status); + update_x_r_and_f(exec, nrhs, k, m, g, u, f, residual, x, stop_status); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); + + +template +void compute_omega( + std::shared_ptr exec, const size_type nrhs, + const remove_complex kappa, const matrix::Dense *tht, + const matrix::Dense> *residual_norm, + matrix::Dense *omega, const Array *stop_status) +{ + const auto grid_dim = ceildiv(nrhs, config::warp_size); + compute_omega_kernel<<>>( + nrhs, kappa, as_cuda_type(tht->get_const_values()), + as_cuda_type(residual_norm->get_const_values()), + as_cuda_type(omega->get_values()), + as_cuda_type(stop_status->get_const_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL); + + +} // namespace idr +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/solver/lower_trs_kernels.cu b/cuda/solver/lower_trs_kernels.cu index 1cd2764d481..95da4d221a9 100644 --- a/cuda/solver/lower_trs_kernels.cu +++ b/cuda/solver/lower_trs_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/solver/upper_trs_kernels.cu b/cuda/solver/upper_trs_kernels.cu index 0518b11bed7..361a0738d0d 100644 --- a/cuda/solver/upper_trs_kernels.cu +++ b/cuda/solver/upper_trs_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/stop/criterion_kernels.cu b/cuda/stop/criterion_kernels.cu index 390f96cb2f2..2bc18045715 100644 --- a/cuda/stop/criterion_kernels.cu +++ b/cuda/stop/criterion_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/stop/residual_norm_kernels.cu b/cuda/stop/residual_norm_kernels.cu index 45f2c2336d5..6297df89352 100644 --- a/cuda/stop/residual_norm_kernels.cu +++ b/cuda/stop/residual_norm_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -121,6 +121,82 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE( } // namespace residual_norm + + +/** + * @brief The Implicit Residual norm stopping criterion. + * @ref implicit_resnorm + * @ingroup resnorm + */ +namespace implicit_residual_norm { + + +constexpr int default_block_size = 512; + + +template +__global__ + __launch_bounds__(default_block_size) void implicit_residual_norm_kernel( + size_type num_cols, remove_complex rel_residual_goal, + const ValueType *__restrict__ tau, + const remove_complex *__restrict__ orig_tau, + uint8 stoppingId, bool setFinalized, + stopping_status *__restrict__ stop_status, + bool *__restrict__ device_storage) +{ + const auto tidx = thread::get_thread_id_flat(); + if (tidx < num_cols) { + if (sqrt(abs(tau[tidx])) < rel_residual_goal * orig_tau[tidx]) { + stop_status[tidx].converge(stoppingId, setFinalized); + device_storage[1] = true; + } + // because only false is written to all_converged, write conflicts + // should not cause any problem + else if (!stop_status[tidx].has_stopped()) { + device_storage[0] = false; + } + } +} + + +__global__ __launch_bounds__(1) void init_kernel( + bool *__restrict__ device_storage) +{ + device_storage[0] = true; + device_storage[1] = false; +} + + +template +void implicit_residual_norm( + std::shared_ptr exec, + const matrix::Dense *tau, + const matrix::Dense> *orig_tau, + remove_complex rel_residual_goal, uint8 stoppingId, + bool setFinalized, Array *stop_status, + Array *device_storage, bool *all_converged, bool *one_changed) +{ + init_kernel<<<1, 1>>>(as_cuda_type(device_storage->get_data())); + + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(tau->get_size()[1], block_size.x), 1, 1); + + implicit_residual_norm_kernel<<>>( + tau->get_size()[1], rel_residual_goal, + as_cuda_type(tau->get_const_values()), + as_cuda_type(orig_tau->get_const_values()), stoppingId, setFinalized, + as_cuda_type(stop_status->get_data()), + as_cuda_type(device_storage->get_data())); + + /* Represents all_converged, one_changed */ + *all_converged = exec->copy_val_to_host(device_storage->get_const_data()); + *one_changed = exec->copy_val_to_host(device_storage->get_const_data() + 1); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL); + + +} // namespace implicit_residual_norm } // namespace cuda } // namespace kernels } // namespace gko diff --git a/cuda/test/CMakeLists.txt b/cuda/test/CMakeLists.txt index 5b180f32c11..fb66aaf270f 100644 --- a/cuda/test/CMakeLists.txt +++ b/cuda/test/CMakeLists.txt @@ -1,10 +1,12 @@ -include(${CMAKE_SOURCE_DIR}/cmake/create_test.cmake) +include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) add_subdirectory(factorization) add_subdirectory(matrix) +add_subdirectory(multigrid) add_subdirectory(preconditioner) +add_subdirectory(reorder) add_subdirectory(solver) add_subdirectory(stop) add_subdirectory(utils) diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt index d9ba808fa21..c23efefa8ed 100644 --- a/cuda/test/base/CMakeLists.txt +++ b/cuda/test/base/CMakeLists.txt @@ -1,4 +1,13 @@ +ginkgo_create_cuda_test(array) ginkgo_create_cuda_test(cuda_executor) +ginkgo_create_thread_test(cuda_executor_reset) +if(GINKGO_HAVE_HWLOC) + find_package(NUMA REQUIRED) + ginkgo_create_cuda_test(cuda_executor_topology NUMA::NUMA) +endif() ginkgo_create_cuda_test(exception_helpers) +ginkgo_create_cuda_test(kernel_launch) +# set correct flags for kernel_launch.hpp +target_compile_definitions(cuda_test_base_kernel_launch PRIVATE GKO_COMPILING_CUDA) ginkgo_create_cuda_test(lin_op) ginkgo_create_cuda_test(math) diff --git a/cuda/test/base/array.cu b/cuda/test/base/array.cu new file mode 100644 index 00000000000..4d12cff3988 --- /dev/null +++ b/cuda/test/base/array.cu @@ -0,0 +1,94 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include "cuda/test/utils.hpp" + + +template +class Array : public ::testing::Test { +protected: + Array() : exec(gko::ReferenceExecutor::create()), x(exec, 2) + { + x.get_data()[0] = 5; + x.get_data()[1] = 2; + } + + static void assert_equal_to_original_x(gko::Array &a) + { + ASSERT_EQ(a.get_num_elems(), 2); + EXPECT_EQ(a.get_data()[0], T{5}); + EXPECT_EQ(a.get_data()[1], T{2}); + EXPECT_EQ(a.get_const_data()[0], T{5}); + EXPECT_EQ(a.get_const_data()[1], T{2}); + } + + std::shared_ptr exec; + gko::Array x; +}; + +TYPED_TEST_SUITE(Array, gko::test::ValueAndIndexTypes); + + +TYPED_TEST(Array, CanCreateTemporaryCloneOnDifferentExecutor) +{ + auto cuda = gko::CudaExecutor::create(0, this->exec); + + auto tmp_clone = make_temporary_clone(cuda, &this->x); + + ASSERT_NE(tmp_clone.get(), &this->x); + tmp_clone->set_executor(this->exec); + this->assert_equal_to_original_x(*tmp_clone.get()); +} + + +TYPED_TEST(Array, CanCopyBackTemporaryCloneOnDifferentExecutor) +{ + auto cuda = gko::CudaExecutor::create(0, this->exec); + + { + auto tmp_clone = make_temporary_clone(cuda, &this->x); + // change x, so it no longer matches the original x + // the copy-back will overwrite it again with the correct value + this->x.get_data()[0] = 0; + } + + this->assert_equal_to_original_x(this->x); +} diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu index 2bcf5961bbd..032054a1ca8 100644 --- a/cuda/test/base/cuda_executor.cu +++ b/cuda/test/base/cuda_executor.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include - +#include "common/cuda_hip/base/executor.hpp.inc" #include "cuda/test/utils.hpp" @@ -69,6 +69,11 @@ public: value = -3; } + void run(std::shared_ptr) const override + { + value = -4; + } + void run(std::shared_ptr) const override { cudaGetDevice(&value); @@ -81,7 +86,10 @@ public: class CudaExecutor : public ::testing::Test { protected: CudaExecutor() - : omp(gko::OmpExecutor::create()), cuda(nullptr), cuda2(nullptr) + : omp(gko::OmpExecutor::create()), + cuda(nullptr), + cuda2(nullptr), + cuda3(nullptr) {} void SetUp() @@ -90,6 +98,8 @@ protected: cuda = gko::CudaExecutor::create(0, omp); cuda2 = gko::CudaExecutor::create( gko::CudaExecutor::get_num_devices() - 1, omp); + cuda3 = gko::CudaExecutor::create(0, omp, false, + gko::allocation_mode::unified_global); } void TearDown() @@ -103,6 +113,7 @@ protected: std::shared_ptr omp; std::shared_ptr cuda; std::shared_ptr cuda2; + std::shared_ptr cuda3; }; @@ -158,6 +169,7 @@ __global__ void check_data(int *data) } } + TEST_F(CudaExecutor, CopiesDataToCuda) { int orig[] = {3, 8}; @@ -171,6 +183,29 @@ TEST_F(CudaExecutor, CopiesDataToCuda) } +__global__ void check_data2(int *data) +{ + if (data[0] != 4 || data[1] != 8) { + asm("trap;"); + } +} + + +TEST_F(CudaExecutor, CanAllocateOnUnifiedMemory) +{ + int orig[] = {3, 8}; + auto *copy = cuda3->alloc(2); + + cuda3->copy_from(omp.get(), 2, orig, copy); + + check_data<<<1, 1>>>(copy); + ASSERT_NO_THROW(cuda3->synchronize()); + copy[0] = 4; + check_data2<<<1, 1>>>(copy); + cuda3->free(copy); +} + + __global__ void init_data(int *data) { data[0] = 3; @@ -239,7 +274,7 @@ TEST_F(CudaExecutor, CopiesDataFromCudaToCuda) omp->copy_from(cuda2.get(), 2, copy_cuda2, copy); EXPECT_EQ(3, copy[0]); ASSERT_EQ(8, copy[1]); - cuda->free(copy_cuda2); + cuda2->free(copy_cuda2); cuda->free(orig); } @@ -251,4 +286,33 @@ TEST_F(CudaExecutor, Synchronizes) } +TEST_F(CudaExecutor, ExecInfoSetsCorrectProperties) +{ + auto dev_id = cuda->get_device_id(); + auto num_sm = 0; + auto major = 0; + auto minor = 0; + auto max_threads_per_block = 0; + auto warp_size = 0; + GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( + &num_sm, cudaDevAttrMultiProcessorCount, dev_id)); + GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( + &major, cudaDevAttrComputeCapabilityMajor, dev_id)); + GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( + &minor, cudaDevAttrComputeCapabilityMinor, dev_id)); + GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute( + &max_threads_per_block, cudaDevAttrMaxThreadsPerBlock, dev_id)); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaDeviceGetAttribute(&warp_size, cudaDevAttrWarpSize, dev_id)); + auto num_cores = convert_sm_ver_to_cores(major, minor); + + ASSERT_EQ(cuda->get_major_version(), major); + ASSERT_EQ(cuda->get_minor_version(), minor); + ASSERT_EQ(cuda->get_num_multiprocessor(), num_sm); + ASSERT_EQ(cuda->get_warp_size(), warp_size); + ASSERT_EQ(cuda->get_num_warps(), num_sm * (num_cores / warp_size)); + ASSERT_EQ(cuda->get_num_warps_per_sm(), num_cores / warp_size); +} + + } // namespace diff --git a/cuda/test/base/cuda_executor_reset.cpp b/cuda/test/base/cuda_executor_reset.cpp new file mode 100644 index 00000000000..5d9169be808 --- /dev/null +++ b/cuda/test/base/cuda_executor_reset.cpp @@ -0,0 +1,87 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +namespace { + + +#define GTEST_ASSERT_NO_EXIT(statement) \ + ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "") + + +TEST(DeviceReset, HipCuda) +{ + GTEST_ASSERT_NO_EXIT({ + auto ref = gko::ReferenceExecutor::create(); + auto hip = gko::HipExecutor::create(0, ref, true); + auto cuda = gko::CudaExecutor::create(0, ref, true); + }); +} + + +TEST(DeviceReset, CudaHip) +{ + GTEST_ASSERT_NO_EXIT({ + auto ref = gko::ReferenceExecutor::create(); + auto cuda = gko::CudaExecutor::create(0, ref, true); + auto hip = gko::HipExecutor::create(0, ref, true); + }); +} + + +void func() +{ + auto ref = gko::ReferenceExecutor::create(); + auto exec = gko::CudaExecutor::create(0, ref, true); +} + + +TEST(DeviceReset, CudaCuda) +{ + GTEST_ASSERT_NO_EXIT({ + std::thread t1(func); + std::thread t2(func); + t1.join(); + t2.join(); + }); +} + + +} // namespace diff --git a/cuda/test/base/cuda_executor_topology.cu b/cuda/test/base/cuda_executor_topology.cu new file mode 100644 index 00000000000..7e9eb4191da --- /dev/null +++ b/cuda/test/base/cuda_executor_topology.cu @@ -0,0 +1,163 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#if defined(__unix__) || defined(__APPLE__) +#include +#include +#endif + + +#include + + +#include +#include + + +#include "cuda/test/utils.hpp" + + +namespace { + + +class CudaExecutor : public ::testing::Test { +protected: + CudaExecutor() + : omp(gko::OmpExecutor::create()), cuda(nullptr), cuda2(nullptr) + {} + + void SetUp() + { + ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); + cuda = gko::CudaExecutor::create(0, omp); + cuda2 = gko::CudaExecutor::create( + gko::CudaExecutor::get_num_devices() - 1, omp); + } + + void TearDown() + { + if (cuda != nullptr) { + // ensure that previous calls finished and didn't throw an error + ASSERT_NO_THROW(cuda->synchronize()); + } + } + + std::shared_ptr omp; + std::shared_ptr cuda; + std::shared_ptr cuda2; +}; + + +#if GKO_HAVE_HWLOC + + +inline int get_cpu_os_id(int log_id) +{ + return gko::MachineTopology::get_instance()->get_pu(log_id)->os_id; +} + + +inline int get_core_os_id(int log_id) +{ + return gko::MachineTopology::get_instance()->get_core(log_id)->os_id; +} + + +TEST_F(CudaExecutor, CanBindToSinglePu) +{ + cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create()); + + const int bind_pu = 1; + gko::MachineTopology::get_instance()->bind_to_pu(bind_pu); + + auto cpu_sys = sched_getcpu(); + ASSERT_TRUE(cpu_sys == get_cpu_os_id(1)); +} + + +TEST_F(CudaExecutor, CanBindToPus) +{ + cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create()); + + std::vector bind_pus = {1, 3}; + gko::MachineTopology::get_instance()->bind_to_pus(bind_pus); + + auto cpu_sys = sched_getcpu(); + ASSERT_TRUE(cpu_sys == get_cpu_os_id(3) || cpu_sys == get_cpu_os_id(1)); +} + + +TEST_F(CudaExecutor, CanBindToCores) +{ + cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create()); + + std::vector bind_cores = {1, 3}; + gko::MachineTopology::get_instance()->bind_to_cores(bind_cores); + + auto cpu_sys = sched_getcpu(); + ASSERT_TRUE(cpu_sys == get_core_os_id(3) || cpu_sys == get_core_os_id(1)); +} + + +TEST_F(CudaExecutor, ClosestCpusIsPopulated) +{ + cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create()); + auto close_cpus0 = cuda->get_closest_pus(); + + ASSERT_NE(close_cpus0[0], -1); +} + + +TEST_F(CudaExecutor, KnowsItsNuma) +{ + cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create()); + auto numa0 = cuda->get_closest_numa(); + auto close_cpu0 = cuda->get_closest_pus()[0]; + + auto numa_sys0 = numa_node_of_cpu(get_cpu_os_id(close_cpu0)); + + ASSERT_TRUE(numa0 == numa_sys0); +} + + +#endif + + +} // namespace diff --git a/cuda/test/base/exception_helpers.cu b/cuda/test/base/exception_helpers.cu index 1652594803a..f49df30bcc5 100644 --- a/cuda/test/base/exception_helpers.cu +++ b/cuda/test/base/exception_helpers.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include @@ -68,6 +69,18 @@ TEST(AssertNoCublasErrors, DoesNotThrowOnSuccess) } +TEST(AssertNoCurandErrors, ThrowsOnError) +{ + ASSERT_THROW(GKO_ASSERT_NO_CURAND_ERRORS(1), gko::CurandError); +} + + +TEST(AssertNoCurandErrors, DoesNotThrowOnSuccess) +{ + ASSERT_NO_THROW(GKO_ASSERT_NO_CURAND_ERRORS(CURAND_STATUS_SUCCESS)); +} + + TEST(AssertNoCusparseErrors, ThrowsOnError) { ASSERT_THROW(GKO_ASSERT_NO_CUSPARSE_ERRORS(1), gko::CusparseError); diff --git a/cuda/test/base/kernel_launch.cu b/cuda/test/base/kernel_launch.cu new file mode 100644 index 00000000000..d983085d974 --- /dev/null +++ b/cuda/test/base/kernel_launch.cu @@ -0,0 +1,278 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "common/unified/base/kernel_launch.hpp" + + +#include +#include + + +#include + + +#include +#include +#include +#include + + +#include "common/unified/base/kernel_launch_solver.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +using gko::dim; +using gko::size_type; +using std::is_same; + + +class KernelLaunch : public ::testing::Test { +protected: + KernelLaunch() + : exec(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(), + false, gko::allocation_mode::device)), + zero_array(exec->get_master(), 16), + iota_array(exec->get_master(), 16), + iota_transp_array(exec->get_master(), 16), + iota_dense(gko::matrix::Dense<>::create(exec, dim<2>{4, 4})), + zero_dense(gko::matrix::Dense<>::create(exec, dim<2>{4, 4}, 6)), + zero_dense2(gko::matrix::Dense<>::create(exec, dim<2>{4, 4}, 5)), + vec_dense(gko::matrix::Dense<>::create(exec, dim<2>{1, 4})) + { + auto ref_iota_dense = + gko::matrix::Dense<>::create(exec->get_master(), dim<2>{4, 4}); + for (int i = 0; i < 16; i++) { + zero_array.get_data()[i] = 0; + iota_array.get_data()[i] = i; + iota_transp_array.get_data()[i] = (i % 4 * 4) + i / 4; + ref_iota_dense->at(i / 4, i % 4) = i; + } + zero_dense->fill(0.0); + zero_dense2->fill(0.0); + iota_dense->copy_from(ref_iota_dense.get()); + zero_array.set_executor(exec); + iota_array.set_executor(exec); + iota_transp_array.set_executor(exec); + } + + std::shared_ptr exec; + gko::Array zero_array; + gko::Array iota_array; + gko::Array iota_transp_array; + std::unique_ptr> iota_dense; + std::unique_ptr> zero_dense; + std::unique_ptr> zero_dense2; + std::unique_ptr> vec_dense; +}; + + +// nvcc doesn't like device lambdas declared in complex classes, move it out +void run1d(std::shared_ptr exec, size_type dim, int *data) +{ + gko::kernels::cuda::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto d) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + d[i] = i; + }, + dim, data); +} + +TEST_F(KernelLaunch, Runs1D) +{ + run1d(exec, zero_array.get_num_elems(), zero_array.get_data()); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_array); +} + + +void run1d(std::shared_ptr exec, gko::Array &data) +{ + gko::kernels::cuda::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto d, auto d_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + if (d == d_ptr) { + d[i] = i; + } else { + d[i] = 0; + } + }, + data.get_num_elems(), data, data.get_const_data()); +} + +TEST_F(KernelLaunch, Runs1DArray) +{ + run1d(exec, zero_array); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_array); +} + + +void run1d(std::shared_ptr exec, gko::matrix::Dense<> *m) +{ + gko::kernels::cuda::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto d, auto d2, auto d_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, + "type"); + static_assert(is_same::value, + "type"); + bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; + bool strides_correct = d.stride == 5 && d2.stride == 5; + bool accessors_2d_correct = + &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && + &d2(0, 0) == d_ptr && &d2(1, 0) == d_ptr + d.stride; + bool accessors_1d_correct = &d[0] == d_ptr && &d2[0] == d_ptr; + if (pointers_correct && strides_correct && accessors_2d_correct && + accessors_1d_correct) { + d(i / 4, i % 4) = i; + } else { + d(i / 4, i % 4) = 0; + } + }, + 16, m, static_cast *>(m), + m->get_const_values()); +} + +TEST_F(KernelLaunch, Runs1DDense) +{ + run1d(exec, zero_dense2.get()); + + GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); +} + + +void run2d(std::shared_ptr exec, int *data) +{ + gko::kernels::cuda::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto j, auto d) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + d[i + 4 * j] = 4 * i + j; + }, + dim<2>{4, 4}, data); +} + +TEST_F(KernelLaunch, Runs2D) +{ + run2d(exec, zero_array.get_data()); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_transp_array); +} + + +void run2d(std::shared_ptr exec, gko::Array &data) +{ + gko::kernels::cuda::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto j, auto d, auto d_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + if (d == d_ptr) { + d[i + 4 * j] = 4 * i + j; + } else { + d[i + 4 * j] = 0; + } + }, + dim<2>{4, 4}, data, data.get_const_data()); +} + +TEST_F(KernelLaunch, Runs2DArray) +{ + run2d(exec, zero_array); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_transp_array); +} + + +void run2d(std::shared_ptr exec, gko::matrix::Dense<> *m1, + gko::matrix::Dense<> *m2, gko::matrix::Dense<> *m3) +{ + gko::kernels::cuda::run_kernel_solver( + exec, + [] GKO_KERNEL(auto i, auto j, auto d, auto d2, auto d_ptr, auto d3, + auto d4, auto d2_ptr, auto d3_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, + "type"); + static_assert(is_same::value, + "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && + d3.data == d2_ptr && d4 == d3_ptr; + bool strides_correct = + d.stride == 5 && d2.stride == 5 && d3.stride == 6; + bool accessors_2d_correct = + &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && + &d2(0, 0) == d_ptr && &d2(1, 0) == d_ptr + d2.stride && + &d3(0, 0) == d2_ptr && &d3(1, 0) == d2_ptr + d3.stride; + bool accessors_1d_correct = + &d[0] == d_ptr && &d2[0] == d_ptr && &d3[0] == d2_ptr; + if (pointers_correct && strides_correct && accessors_2d_correct && + accessors_1d_correct) { + d(i, j) = 4 * i + j; + } else { + d(i, j) = 0; + } + }, + dim<2>{4, 4}, m2->get_stride(), m1, + static_cast *>(m1), m1->get_const_values(), + gko::kernels::cuda::default_stride(m2), + gko::kernels::cuda::row_vector(m3), m2->get_values(), m3->get_values()); +} + +TEST_F(KernelLaunch, Runs2DDense) +{ + run2d(exec, zero_dense2.get(), zero_dense.get(), vec_dense.get()); + + GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); +} + + +} // namespace diff --git a/cuda/test/base/lin_op.cu b/cuda/test/base/lin_op.cu index 82016f87816..cc4ca8099b7 100644 --- a/cuda/test/base/lin_op.cu +++ b/cuda/test/base/lin_op.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -39,6 +39,110 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { +class DummyLinOp : public gko::EnableLinOp, + public gko::EnableCreateMethod { +public: + DummyLinOp(std::shared_ptr exec, + gko::dim<2> size = gko::dim<2>{}) + : EnableLinOp(exec, size) + {} + + void access() const { last_access = this->get_executor(); } + + mutable std::shared_ptr last_access; + mutable std::shared_ptr last_b_access; + mutable std::shared_ptr last_x_access; + mutable std::shared_ptr last_alpha_access; + mutable std::shared_ptr last_beta_access; + +protected: + void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + { + this->access(); + static_cast(b)->access(); + static_cast(x)->access(); + last_b_access = b->get_executor(); + last_x_access = x->get_executor(); + } + + void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, + const gko::LinOp *beta, gko::LinOp *x) const override + { + this->access(); + static_cast(alpha)->access(); + static_cast(b)->access(); + static_cast(beta)->access(); + static_cast(x)->access(); + last_alpha_access = alpha->get_executor(); + last_b_access = b->get_executor(); + last_beta_access = beta->get_executor(); + last_x_access = x->get_executor(); + } +}; + + +class EnableLinOp : public ::testing::Test { +protected: + EnableLinOp() + : ref{gko::ReferenceExecutor::create()}, + cuda{gko::CudaExecutor::create(0, ref)}, + op{DummyLinOp::create(cuda, gko::dim<2>{3, 5})}, + alpha{DummyLinOp::create(ref, gko::dim<2>{1})}, + beta{DummyLinOp::create(ref, gko::dim<2>{1})}, + b{DummyLinOp::create(ref, gko::dim<2>{5, 4})}, + x{DummyLinOp::create(ref, gko::dim<2>{3, 4})} + {} + + std::shared_ptr ref; + std::shared_ptr cuda; + std::unique_ptr op; + std::unique_ptr alpha; + std::unique_ptr beta; + std::unique_ptr b; + std::unique_ptr x; +}; + + +TEST_F(EnableLinOp, ApplyCopiesDataToCorrectExecutor) +{ + op->apply(gko::lend(b), gko::lend(x)); + + ASSERT_EQ(op->last_b_access, cuda); + ASSERT_EQ(op->last_x_access, cuda); +} + + +TEST_F(EnableLinOp, ApplyCopiesBackOnlyX) +{ + op->apply(gko::lend(b), gko::lend(x)); + + ASSERT_EQ(b->last_access, nullptr); + ASSERT_EQ(x->last_access, cuda); +} + + +TEST_F(EnableLinOp, ExtendedApplyCopiesDataToCorrectExecutor) +{ + op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x)); + + ASSERT_EQ(op->last_alpha_access, cuda); + ASSERT_EQ(op->last_b_access, cuda); + ASSERT_EQ(op->last_beta_access, cuda); + ASSERT_EQ(op->last_x_access, cuda); +} + + +TEST_F(EnableLinOp, ExtendedApplyCopiesBackOnlyX) +{ + op->apply(gko::lend(alpha), gko::lend(b), gko::lend(beta), gko::lend(x)); + + ASSERT_EQ(alpha->last_access, nullptr); + ASSERT_EQ(b->last_access, nullptr); + ASSERT_EQ(beta->last_access, nullptr); + ASSERT_EQ(x->last_access, cuda); +} + + class FactoryParameter : public ::testing::Test { protected: FactoryParameter() {} diff --git a/cuda/test/base/math.cu b/cuda/test/base/math.cu index 08deb9a29d9..19ee73ea3eb 100644 --- a/cuda/test/base/math.cu +++ b/cuda/test/base/math.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/test/components/CMakeLists.txt b/cuda/test/components/CMakeLists.txt index 154a39e963e..1d76a251948 100644 --- a/cuda/test/components/CMakeLists.txt +++ b/cuda/test/components/CMakeLists.txt @@ -2,6 +2,7 @@ ginkgo_create_cuda_test(cooperative_groups_kernels) ginkgo_create_cuda_test(merging_kernels) ginkgo_create_cuda_test(searching_kernels) ginkgo_create_cuda_test(sorting_kernels) +ginkgo_create_test(absolute_array) ginkgo_create_test(fill_array) ginkgo_create_test(precision_conversion) ginkgo_create_test(prefix_sum) diff --git a/cuda/test/components/absolute_array.cpp b/cuda/test/components/absolute_array.cpp new file mode 100644 index 00000000000..4cc92ec0b17 --- /dev/null +++ b/cuda/test/components/absolute_array.cpp @@ -0,0 +1,132 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/absolute_array.hpp" + + +#include +#include +#include + + +#include + + +#include + + +#include "core/test/utils/assertions.hpp" + + +namespace { + + +class AbsoluteArray : public ::testing::Test { +protected: + using value_type = double; + using complex_type = std::complex; + AbsoluteArray() + : ref(gko::ReferenceExecutor::create()), + exec(gko::CudaExecutor::create(0, ref)), + total_size(6344), + vals(ref, total_size), + dvals(exec, total_size), + complex_vals(ref, total_size), + dcomplex_vals(exec, total_size) + { + std::fill_n(vals.get_data(), total_size, -1234.0); + dvals = vals; + std::fill_n(complex_vals.get_data(), total_size, complex_type{3, 4}); + dcomplex_vals = complex_vals; + } + + std::shared_ptr ref; + std::shared_ptr exec; + gko::size_type total_size; + gko::Array vals; + gko::Array dvals; + gko::Array complex_vals; + gko::Array dcomplex_vals; +}; + + +TEST_F(AbsoluteArray, InplaceEqualsReference) +{ + gko::kernels::cuda::components::inplace_absolute_array( + exec, dvals.get_data(), total_size); + gko::kernels::reference::components::inplace_absolute_array( + ref, vals.get_data(), total_size); + + GKO_ASSERT_ARRAY_EQ(vals, dvals); +} + + +TEST_F(AbsoluteArray, InplaceComplexEqualsReference) +{ + gko::kernels::cuda::components::inplace_absolute_array( + exec, dcomplex_vals.get_data(), total_size); + gko::kernels::reference::components::inplace_absolute_array( + ref, complex_vals.get_data(), total_size); + + GKO_ASSERT_ARRAY_EQ(complex_vals, dcomplex_vals); +} + + +TEST_F(AbsoluteArray, OutplaceEqualsReference) +{ + gko::Array abs_vals(ref, total_size); + gko::Array dabs_vals(exec, total_size); + + gko::kernels::cuda::components::outplace_absolute_array( + exec, dvals.get_const_data(), total_size, dabs_vals.get_data()); + gko::kernels::reference::components::outplace_absolute_array( + ref, vals.get_const_data(), total_size, abs_vals.get_data()); + + GKO_ASSERT_ARRAY_EQ(abs_vals, dabs_vals); +} + + +TEST_F(AbsoluteArray, OutplaceComplexEqualsReference) +{ + gko::Array abs_vals(ref, total_size); + gko::Array dabs_vals(exec, total_size); + + gko::kernels::cuda::components::outplace_absolute_array( + exec, dcomplex_vals.get_const_data(), total_size, dabs_vals.get_data()); + gko::kernels::reference::components::outplace_absolute_array( + ref, complex_vals.get_const_data(), total_size, abs_vals.get_data()); + + GKO_ASSERT_ARRAY_EQ(abs_vals, dabs_vals); +} + + +} // namespace diff --git a/cuda/test/components/cooperative_groups_kernels.cu b/cuda/test/components/cooperative_groups_kernels.cu index e565a6c9952..47f1e6446cd 100644 --- a/cuda/test/components/cooperative_groups_kernels.cu +++ b/cuda/test/components/cooperative_groups_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/test/components/fill_array.cpp b/cuda/test/components/fill_array.cpp index f5a1f8734f1..06706b174e8 100644 --- a/cuda/test/components/fill_array.cpp +++ b/cuda/test/components/fill_array.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,22 +45,26 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/test/utils/assertions.hpp" +#include "cuda/test/utils.hpp" namespace { +template class FillArray : public ::testing::Test { protected: - using value_type = double; + using value_type = T; FillArray() : ref(gko::ReferenceExecutor::create()), exec(gko::CudaExecutor::create(0, ref)), total_size(6344), vals(ref, total_size), - dvals(exec, total_size) + dvals(exec, total_size), + seqs(ref, total_size) { - std::fill_n(vals.get_data(), total_size, 1234.0); + std::fill_n(vals.get_data(), total_size, T(1234)); + std::iota(seqs.get_data(), seqs.get_data() + total_size, 0); } std::shared_ptr ref; @@ -68,14 +72,29 @@ class FillArray : public ::testing::Test { gko::size_type total_size; gko::Array vals; gko::Array dvals; + gko::Array seqs; }; +TYPED_TEST_SUITE(FillArray, gko::test::ValueAndIndexTypes); -TEST_F(FillArray, EqualsReference) + +TYPED_TEST(FillArray, EqualsReference) { - gko::kernels::cuda::components::fill_array(exec, dvals.get_data(), - total_size, 1234.0); - GKO_ASSERT_ARRAY_EQ(vals, dvals); + using T = typename TestFixture::value_type; + gko::kernels::cuda::components::fill_array( + this->exec, this->dvals.get_data(), this->total_size, T(1234)); + + GKO_ASSERT_ARRAY_EQ(this->vals, this->dvals); +} + + +TYPED_TEST(FillArray, FillSeqEqualsReference) +{ + using T = typename TestFixture::value_type; + gko::kernels::cuda::components::fill_seq_array( + this->exec, this->dvals.get_data(), this->total_size); + + GKO_ASSERT_ARRAY_EQ(this->seqs, this->dvals); } diff --git a/cuda/test/components/merging_kernels.cu b/cuda/test/components/merging_kernels.cu index abd135b4d65..60e1605da0c 100644 --- a/cuda/test/components/merging_kernels.cu +++ b/cuda/test/components/merging_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/test/components/precision_conversion.cpp b/cuda/test/components/precision_conversion.cpp index 73751dbc1d9..87c41a9b34e 100644 --- a/cuda/test/components/precision_conversion.cpp +++ b/cuda/test/components/precision_conversion.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/test/components/prefix_sum.cpp b/cuda/test/components/prefix_sum.cpp index 6c3ad82f21e..5d3fb835156 100644 --- a/cuda/test/components/prefix_sum.cpp +++ b/cuda/test/components/prefix_sum.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/test/components/searching_kernels.cu b/cuda/test/components/searching_kernels.cu index d4f92099f4a..faadf2cf377 100644 --- a/cuda/test/components/searching_kernels.cu +++ b/cuda/test/components/searching_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/test/components/sorting_kernels.cu b/cuda/test/components/sorting_kernels.cu index cc50281177b..6bcf98fdeca 100644 --- a/cuda/test/components/sorting_kernels.cu +++ b/cuda/test/components/sorting_kernels.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -54,8 +54,8 @@ using gko::kernels::cuda::bitonic_sort; using gko::kernels::cuda::config; -constexpr auto num_elements = 2048; -constexpr auto num_local = 4; +constexpr int num_elements = 2048; +constexpr int num_local = 4; constexpr auto num_threads = num_elements / num_local; @@ -99,7 +99,7 @@ protected: { // we want some duplicate elements std::uniform_int_distribution dist(0, num_elements / 2); - for (auto i = 0; i < num_elements; ++i) { + for (int i = 0; i < num_elements; ++i) { ref_shared.get_data()[i] = dist(rng); } ddata = gko::Array{cuda, ref_shared}; diff --git a/cuda/test/factorization/CMakeLists.txt b/cuda/test/factorization/CMakeLists.txt index 5b494bf99b9..6fb7f5fea43 100644 --- a/cuda/test/factorization/CMakeLists.txt +++ b/cuda/test/factorization/CMakeLists.txt @@ -1,4 +1,6 @@ +ginkgo_create_test(ic_kernels) ginkgo_create_test(ilu_kernels) +ginkgo_create_test(par_ic_kernels) ginkgo_create_test(par_ict_kernels) ginkgo_create_test(par_ilu_kernels) ginkgo_create_test(par_ilut_kernels) diff --git a/cuda/test/factorization/ic_kernels.cpp b/cuda/test/factorization/ic_kernels.cpp new file mode 100644 index 00000000000..d99121aa320 --- /dev/null +++ b/cuda/test/factorization/ic_kernels.cpp @@ -0,0 +1,147 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include + + +#include + + +#include +#include + + +#include "core/test/utils/unsort_matrix.hpp" +#include "cuda/test/utils.hpp" +#include "matrices/config.hpp" + + +namespace { + + +class Ic : public ::testing::Test { +protected: + using value_type = gko::default_precision; + using index_type = gko::int32; + using Csr = gko::matrix::Csr; + + std::shared_ptr ref; + std::shared_ptr cuda; + std::ranlux48 rand_engine; + std::shared_ptr csr_ref; + std::shared_ptr csr_cuda; + + Ic() + : ref(gko::ReferenceExecutor::create()), + cuda(gko::CudaExecutor::create(0, ref)), + rand_engine(6794) + {} + + void SetUp() override + { + std::string file_name(gko::matrices::location_ani4_mtx); + auto input_file = std::ifstream(file_name, std::ios::in); + if (!input_file) { + FAIL() << "Could not find the file \"" << file_name + << "\", which is required for this test.\n"; + } + csr_ref = gko::read(input_file, ref); + csr_cuda = Csr::create(cuda); + csr_cuda->copy_from(gko::lend(csr_ref)); + } +}; + + +TEST_F(Ic, ComputeICIsEquivalentToRefSorted) +{ + auto ref_fact = gko::factorization::ParIc<>::build() + .with_skip_sorting(true) + .on(ref) + ->generate(csr_ref); + auto cuda_fact = gko::factorization::Ic<>::build() + .with_skip_sorting(true) + .on(cuda) + ->generate(csr_cuda); + + GKO_ASSERT_MTX_NEAR(ref_fact->get_l_factor(), cuda_fact->get_l_factor(), + 1e-14); + GKO_ASSERT_MTX_NEAR(ref_fact->get_lt_factor(), cuda_fact->get_lt_factor(), + 1e-14); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_l_factor(), + cuda_fact->get_l_factor()); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_lt_factor(), + cuda_fact->get_lt_factor()); +} + + +TEST_F(Ic, ComputeICIsEquivalentToRefUnsorted) +{ + gko::test::unsort_matrix(gko::lend(csr_ref), rand_engine); + csr_cuda->copy_from(gko::lend(csr_ref)); + + auto ref_fact = + gko::factorization::ParIc<>::build().on(ref)->generate(csr_ref); + auto cuda_fact = + gko::factorization::Ic<>::build().on(cuda)->generate(csr_cuda); + + GKO_ASSERT_MTX_NEAR(ref_fact->get_l_factor(), cuda_fact->get_l_factor(), + 1e-14); + GKO_ASSERT_MTX_NEAR(ref_fact->get_lt_factor(), cuda_fact->get_lt_factor(), + 1e-14); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_l_factor(), + cuda_fact->get_l_factor()); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_lt_factor(), + cuda_fact->get_lt_factor()); +} + + +TEST_F(Ic, SetsCorrectStrategy) +{ + auto cuda_fact = gko::factorization::Ic<>::build() + .with_l_strategy(std::make_shared()) + .on(cuda) + ->generate(csr_cuda); + + ASSERT_EQ(cuda_fact->get_l_factor()->get_strategy()->get_name(), + "merge_path"); + ASSERT_EQ(cuda_fact->get_lt_factor()->get_strategy()->get_name(), + "merge_path"); +} + + +} // namespace diff --git a/cuda/test/factorization/ilu_kernels.cpp b/cuda/test/factorization/ilu_kernels.cpp index 4c1d356b0d0..85ed7ffc05f 100644 --- a/cuda/test/factorization/ilu_kernels.cpp +++ b/cuda/test/factorization/ilu_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/test/utils/unsort_matrix.hpp" #include "cuda/test/utils.hpp" #include "matrices/config.hpp" @@ -61,12 +62,14 @@ class Ilu : public ::testing::Test { std::shared_ptr ref; std::shared_ptr cuda; + std::ranlux48 rand_engine; std::shared_ptr csr_ref; std::shared_ptr csr_cuda; Ilu() : ref(gko::ReferenceExecutor::create()), - cuda(gko::CudaExecutor::create(0, ref)) + cuda(gko::CudaExecutor::create(0, ref)), + rand_engine(1337) {} void SetUp() override @@ -84,8 +87,33 @@ class Ilu : public ::testing::Test { }; -TEST_F(Ilu, ComputeILUIsEquivalentToRef) +TEST_F(Ilu, ComputeILUIsEquivalentToRefSorted) { + auto ref_fact = gko::factorization::ParIlu<>::build() + .with_skip_sorting(true) + .on(ref) + ->generate(csr_ref); + auto cuda_fact = gko::factorization::Ilu<>::build() + .with_skip_sorting(true) + .on(cuda) + ->generate(csr_cuda); + + GKO_ASSERT_MTX_NEAR(ref_fact->get_l_factor(), cuda_fact->get_l_factor(), + 1e-14); + GKO_ASSERT_MTX_NEAR(ref_fact->get_u_factor(), cuda_fact->get_u_factor(), + 1e-14); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_l_factor(), + cuda_fact->get_l_factor()); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_u_factor(), + cuda_fact->get_u_factor()); +} + + +TEST_F(Ilu, ComputeILUIsEquivalentToRefUnsorted) +{ + gko::test::unsort_matrix(gko::lend(csr_ref), rand_engine); + csr_cuda->copy_from(gko::lend(csr_ref)); + auto ref_fact = gko::factorization::ParIlu<>::build().on(ref)->generate(csr_ref); auto cuda_fact = diff --git a/cuda/test/factorization/par_ic_kernels.cpp b/cuda/test/factorization/par_ic_kernels.cpp new file mode 100644 index 00000000000..e957efbadf4 --- /dev/null +++ b/cuda/test/factorization/par_ic_kernels.cpp @@ -0,0 +1,167 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/par_ic_kernels.hpp" + + +#include +#include +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include + + +#include "core/factorization/factorization_kernels.hpp" +#include "core/matrix/csr_builder.hpp" +#include "core/matrix/csr_kernels.hpp" +#include "cuda/test/utils.hpp" +#include "matrices/config.hpp" + + +namespace { + + +class ParIc : public ::testing::Test { +protected: + using value_type = double; + using index_type = gko::int32; + using Coo = gko::matrix::Coo; + using Csr = gko::matrix::Csr; + + ParIc() + : mtx_size(624, 624), + rand_engine(43456), + ref(gko::ReferenceExecutor::create()), + cuda(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create())) + { + mtx_l = gko::test::generate_random_lower_triangular_matrix( + mtx_size[0], mtx_size[0], false, + std::uniform_int_distribution(10, mtx_size[0]), + std::normal_distribution>(0, 10.0), + rand_engine, ref); + + dmtx_ani = Csr::create(cuda); + dmtx_l_ani = Csr::create(cuda); + dmtx_l_ani_init = Csr::create(cuda); + dmtx_l = Csr::create(cuda); + dmtx_l->copy_from(lend(mtx_l)); + } + + void SetUp() + { + std::string file_name(gko::matrices::location_ani4_mtx); + auto input_file = std::ifstream(file_name, std::ios::in); + if (!input_file) { + FAIL() << "Could not find the file \"" << file_name + << "\", which is required for this test.\n"; + } + mtx_ani = gko::read(input_file, ref); + mtx_ani->sort_by_column_index(); + + { + mtx_l_ani = Csr::create(ref, mtx_ani->get_size()); + gko::matrix::CsrBuilder l_builder( + lend(mtx_l_ani)); + gko::kernels::reference::factorization::initialize_row_ptrs_l( + ref, lend(mtx_ani), mtx_l_ani->get_row_ptrs()); + auto l_nnz = + mtx_l_ani->get_const_row_ptrs()[mtx_ani->get_size()[0]]; + l_builder.get_col_idx_array().resize_and_reset(l_nnz); + l_builder.get_value_array().resize_and_reset(l_nnz); + gko::kernels::reference::factorization::initialize_l( + ref, lend(mtx_ani), lend(mtx_l_ani), false); + mtx_l_ani_init = Csr::create(ref); + mtx_l_ani_init->copy_from(lend(mtx_l_ani)); + gko::kernels::reference::par_ic_factorization::init_factor( + ref, lend(mtx_l_ani_init)); + } + dmtx_ani->copy_from(lend(mtx_ani)); + dmtx_l_ani->copy_from(lend(mtx_l_ani)); + dmtx_l_ani_init->copy_from(lend(mtx_l_ani_init)); + } + + std::shared_ptr ref; + std::shared_ptr cuda; + + const gko::dim<2> mtx_size; + std::default_random_engine rand_engine; + + std::unique_ptr mtx_l; + std::unique_ptr mtx_ani; + std::unique_ptr mtx_l_ani; + std::unique_ptr mtx_l_ani_init; + + std::unique_ptr dmtx_l; + std::unique_ptr dmtx_ani; + std::unique_ptr dmtx_l_ani; + std::unique_ptr dmtx_l_ani_init; +}; + + +TEST_F(ParIc, KernelInitFactorIsEquivalentToRef) +{ + gko::kernels::reference::par_ic_factorization::init_factor(ref, + lend(mtx_l)); + gko::kernels::cuda::par_ic_factorization::init_factor(cuda, lend(dmtx_l)); + + GKO_ASSERT_MTX_NEAR(mtx_l, dmtx_l, r::value); +} + + +TEST_F(ParIc, KernelComputeFactorIsEquivalentToRef) +{ + auto square_size = mtx_ani->get_size(); + auto mtx_l_coo = Coo::create(ref, square_size); + mtx_l_ani->convert_to(lend(mtx_l_coo)); + auto dmtx_l_coo = Coo::create(cuda, square_size); + dmtx_l_coo->copy_from(lend(mtx_l_coo)); + + gko::kernels::reference::par_ic_factorization::compute_factor( + ref, 1, lend(mtx_l_coo), lend(mtx_l_ani_init)); + gko::kernels::cuda::par_ic_factorization::compute_factor( + cuda, 100, lend(dmtx_l_coo), lend(dmtx_l_ani_init)); + + GKO_ASSERT_MTX_NEAR(mtx_l_ani_init, dmtx_l_ani_init, 1e-4); +} + + +} // namespace diff --git a/cuda/test/factorization/par_ict_kernels.cpp b/cuda/test/factorization/par_ict_kernels.cpp index f052ac4bc85..60360fce033 100644 --- a/cuda/test/factorization/par_ict_kernels.cpp +++ b/cuda/test/factorization/par_ict_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -138,17 +138,17 @@ class ParIct : public ::testing::Test { TEST_F(ParIct, KernelAddCandidatesIsEquivalentToRef) { - auto mtx_llt = Csr::create(ref, mtx_size); - mtx_l->apply(lend(mtx_l->transpose()), lend(mtx_llt)); - auto dmtx_llt = Csr::create(cuda, mtx_size); - dmtx_llt->copy_from(lend(mtx_llt)); + auto mtx_llh = Csr::create(ref, mtx_size); + mtx_l->apply(lend(mtx_l->conj_transpose()), lend(mtx_llh)); + auto dmtx_llh = Csr::create(cuda, mtx_size); + dmtx_llh->copy_from(lend(mtx_llh)); auto res_mtx_l = Csr::create(ref, mtx_size); auto dres_mtx_l = Csr::create(cuda, mtx_size); gko::kernels::reference::par_ict_factorization::add_candidates( - ref, lend(mtx_llt), lend(mtx), lend(mtx_l), lend(res_mtx_l)); + ref, lend(mtx_llh), lend(mtx), lend(mtx_l), lend(res_mtx_l)); gko::kernels::cuda::par_ict_factorization::add_candidates( - cuda, lend(dmtx_llt), lend(dmtx), lend(dmtx_l), lend(dres_mtx_l)); + cuda, lend(dmtx_llh), lend(dmtx), lend(dmtx_l), lend(dres_mtx_l)); GKO_ASSERT_MTX_EQ_SPARSITY(res_mtx_l, dres_mtx_l); GKO_ASSERT_MTX_NEAR(res_mtx_l, dres_mtx_l, 1e-14); diff --git a/cuda/test/factorization/par_ilu_kernels.cpp b/cuda/test/factorization/par_ilu_kernels.cpp index f3ae4150924..47c95061834 100644 --- a/cuda/test/factorization/par_ilu_kernels.cpp +++ b/cuda/test/factorization/par_ilu_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/test/factorization/par_ilut_kernels.cpp b/cuda/test/factorization/par_ilut_kernels.cpp index 7a66ffe2ec0..b59c57d0be8 100644 --- a/cuda/test/factorization/par_ilut_kernels.cpp +++ b/cuda/test/factorization/par_ilut_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -71,7 +71,11 @@ class ParIlut : public ::testing::Test { using ComplexCsr = gko::matrix::Csr, index_type>; ParIlut() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 231), +#else : mtx_size(532, 423), +#endif rand_engine(1337), ref(gko::ReferenceExecutor::create()), cuda(gko::CudaExecutor::create(0, ref)) diff --git a/cuda/test/matrix/CMakeLists.txt b/cuda/test/matrix/CMakeLists.txt index 65ce218ac71..a1d7ca4a7a0 100644 --- a/cuda/test/matrix/CMakeLists.txt +++ b/cuda/test/matrix/CMakeLists.txt @@ -1,5 +1,6 @@ ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) +ginkgo_create_test(fbcsr_kernels) ginkgo_create_test(dense_kernels) ginkgo_create_test(diagonal_kernels) ginkgo_create_test(ell_kernels) diff --git a/cuda/test/matrix/coo_kernels.cpp b/cuda/test/matrix/coo_kernels.cpp index 3522e918cc6..12b8b815d98 100644 --- a/cuda/test/matrix/coo_kernels.cpp +++ b/cuda/test/matrix/coo_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/coo_kernels.hpp" +#include "core/test/utils/unsort_matrix.hpp" #include "cuda/test/utils.hpp" @@ -58,6 +59,7 @@ class Coo : public ::testing::Test { protected: using Mtx = gko::matrix::Coo<>; using Vec = gko::matrix::Dense<>; + using ComplexVec = gko::matrix::Dense>; Coo() : rand_engine(42) {} @@ -75,9 +77,10 @@ class Coo : public ::testing::Test { } } - std::unique_ptr gen_mtx(int num_rows, int num_cols) + template + std::unique_ptr gen_mtx(int num_rows, int num_cols) { - return gko::test::generate_random_matrix( + return gko::test::generate_random_matrix( num_rows, num_cols, std::uniform_int_distribution<>(1, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } @@ -102,6 +105,11 @@ class Coo : public ::testing::Test { dbeta->copy_from(beta.get()); } + void unsort_mtx() + { + gko::test::unsort_matrix(mtx.get(), rand_engine); + dmtx->copy_from(mtx.get()); + } std::shared_ptr ref; std::shared_ptr cuda; @@ -133,6 +141,36 @@ TEST_F(Coo, SimpleApplyIsEquivalentToRef) } +TEST_F(Coo, SimpleApplyDoesntOverwritePadding) +{ + set_up_apply_data(); + auto dresult_padded = + Vec::create(cuda, dresult->get_size(), dresult->get_stride() + 1); + dresult_padded->copy_from(dresult.get()); + double padding_val{1234.0}; + cuda->copy_from(cuda->get_master().get(), 1, &padding_val, + dresult_padded->get_values() + 1); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult_padded.get()); + + GKO_ASSERT_MTX_NEAR(dresult_padded, expected, 1e-14); + ASSERT_EQ(cuda->copy_val_to_host(dresult_padded->get_values() + 1), 1234.0); +} + + +TEST_F(Coo, SimpleApplyIsEquivalentToRefUnsorted) +{ + set_up_apply_data(); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Coo, AdvancedApplyIsEquivalentToRef) { set_up_apply_data(); @@ -144,6 +182,24 @@ TEST_F(Coo, AdvancedApplyIsEquivalentToRef) } +TEST_F(Coo, AdvancedApplyDoesntOverwritePadding) +{ + set_up_apply_data(); + auto dresult_padded = + Vec::create(cuda, dresult->get_size(), dresult->get_stride() + 1); + dresult_padded->copy_from(dresult.get()); + double padding_val{1234.0}; + cuda->copy_from(cuda->get_master().get(), 1, &padding_val, + dresult_padded->get_values() + 1); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult_padded.get()); + + GKO_ASSERT_MTX_NEAR(dresult_padded, expected, 1e-14); + ASSERT_EQ(cuda->copy_val_to_host(dresult_padded->get_values() + 1), 1234.0); +} + + TEST_F(Coo, SimpleApplyAddIsEquivalentToRef) { set_up_apply_data(); @@ -232,6 +288,57 @@ TEST_F(Coo, AdvancedApplyAddToLargeDenseMatrixIsEquivalentToRef) } +TEST_F(Coo, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Coo, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Coo, ApplyAddToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply2(alpha.get(), complex_b.get(), complex_x.get()); + dmtx->apply2(dalpha.get(), dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + TEST_F(Coo, ConvertToDenseIsEquivalentToRef) { set_up_apply_data(); @@ -271,4 +378,26 @@ TEST_F(Coo, ExtractDiagonalIsEquivalentToRef) } +TEST_F(Coo, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +} + + +TEST_F(Coo, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +} + + } // namespace diff --git a/cuda/test/matrix/csr_kernels.cpp b/cuda/test/matrix/csr_kernels.cpp index 2f1bb544ac3..24beac46605 100644 --- a/cuda/test/matrix/csr_kernels.cpp +++ b/cuda/test/matrix/csr_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -52,6 +52,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/csr_kernels.hpp" +#include "core/test/utils/unsort_matrix.hpp" #include "cuda/test/utils.hpp" @@ -60,12 +61,20 @@ namespace { class Csr : public ::testing::Test { protected: - using Mtx = gko::matrix::Csr<>; + using Arr = gko::Array; using Vec = gko::matrix::Dense<>; + using Mtx = gko::matrix::Csr<>; using ComplexVec = gko::matrix::Dense>; using ComplexMtx = gko::matrix::Csr>; - Csr() : mtx_size(532, 231), rand_engine(42) {} + Csr() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 231), +#else + : mtx_size(532, 231), +#endif + rand_engine(42) + {} void SetUp() { @@ -114,6 +123,16 @@ class Csr : public ::testing::Test { dalpha->copy_from(alpha.get()); dbeta = Vec::create(cuda); dbeta->copy_from(beta.get()); + + std::vector tmp(mtx->get_size()[0], 0); + auto rng = std::default_random_engine{}; + std::iota(tmp.begin(), tmp.end(), 0); + std::shuffle(tmp.begin(), tmp.end(), rng); + std::vector tmp2(mtx->get_size()[1], 0); + std::iota(tmp2.begin(), tmp2.end(), 0); + std::shuffle(tmp2.begin(), tmp2.end(), rng); + rpermute_idxs = std::make_unique(ref, tmp.begin(), tmp.end()); + cpermute_idxs = std::make_unique(ref, tmp2.begin(), tmp2.end()); } void set_up_apply_complex_data( @@ -126,36 +145,10 @@ class Csr : public ::testing::Test { complex_dmtx->copy_from(complex_mtx.get()); } - struct matrix_pair { - std::unique_ptr ref; - std::unique_ptr cuda; - }; - - matrix_pair gen_unsorted_mtx() + void unsort_mtx() { - constexpr int min_nnz_per_row = 2; // Must be at least 2 - auto local_mtx_ref = - gen_mtx(mtx_size[0], mtx_size[1], min_nnz_per_row); - for (size_t row = 0; row < mtx_size[0]; ++row) { - const auto row_ptrs = local_mtx_ref->get_const_row_ptrs(); - const auto start_row = row_ptrs[row]; - auto col_idx = local_mtx_ref->get_col_idxs() + start_row; - auto vals = local_mtx_ref->get_values() + start_row; - const auto nnz_in_this_row = row_ptrs[row + 1] - row_ptrs[row]; - auto swap_idx_dist = - std::uniform_int_distribution<>(0, nnz_in_this_row - 1); - // shuffle `nnz_in_this_row / 2` times - for (size_t perm = 0; perm < nnz_in_this_row; perm += 2) { - const auto idx1 = swap_idx_dist(rand_engine); - const auto idx2 = swap_idx_dist(rand_engine); - std::swap(col_idx[idx1], col_idx[idx2]); - std::swap(vals[idx1], vals[idx2]); - } - } - auto local_mtx_cuda = Mtx::create(cuda); - local_mtx_cuda->copy_from(local_mtx_ref.get()); - - return {std::move(local_mtx_ref), std::move(local_mtx_cuda)}; + gko::test::unsort_matrix(mtx.get(), rand_engine); + dmtx->copy_from(mtx.get()); } std::shared_ptr ref; @@ -179,6 +172,8 @@ class Csr : public ::testing::Test { std::unique_ptr dy; std::unique_ptr dalpha; std::unique_ptr dbeta; + std::unique_ptr rpermute_idxs; + std::unique_ptr cpermute_idxs; }; @@ -202,6 +197,18 @@ TEST_F(Csr, SimpleApplyIsEquivalentToRefWithLoadBalance) } +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithLoadBalanceUnsorted) +{ + set_up_apply_data(std::make_shared(cuda)); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithLoadBalance) { set_up_apply_data(std::make_shared(cuda)); @@ -224,6 +231,18 @@ TEST_F(Csr, SimpleApplyIsEquivalentToRefWithCusparse) } +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithCusparseUnsorted) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithCusparse) { set_up_apply_data(std::make_shared()); @@ -246,6 +265,18 @@ TEST_F(Csr, SimpleApplyIsEquivalentToRefWithMergePath) } +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithMergePathUnsorted) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithMergePath) { set_up_apply_data(std::make_shared()); @@ -268,6 +299,18 @@ TEST_F(Csr, SimpleApplyIsEquivalentToRefWithClassical) } +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithClassicalUnsorted) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithClassical) { set_up_apply_data(std::make_shared()); @@ -408,16 +451,49 @@ TEST_F(Csr, AdvancedApplyToIdentityMatrixIsEquivalentToRef) } +TEST_F(Csr, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto complex_b = gen_mtx(this->mtx_size[1], 3, 1); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(this->mtx_size[0], 3, 1); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Csr, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto complex_b = gen_mtx(this->mtx_size[1], 3, 1); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(this->mtx_size[0], 3, 1); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + TEST_F(Csr, TransposeIsEquivalentToRef) { set_up_apply_data(std::make_shared(cuda)); - auto trans = mtx->transpose(); - auto d_trans = dmtx->transpose(); + auto trans = gko::as(mtx->transpose()); + auto d_trans = gko::as(dmtx->transpose()); - GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), - static_cast(trans.get()), 0.0); - ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); } @@ -425,13 +501,11 @@ TEST_F(Csr, ConjugateTransposeIsEquivalentToRef) { set_up_apply_complex_data(std::make_shared(cuda)); - auto trans = complex_mtx->conj_transpose(); - auto d_trans = complex_dmtx->conj_transpose(); + auto trans = gko::as(complex_mtx->conj_transpose()); + auto d_trans = gko::as(complex_dmtx->conj_transpose()); - GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), - static_cast(trans.get()), 0.0); - ASSERT_TRUE( - static_cast(d_trans.get())->is_sorted_by_column_index()); + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); } @@ -654,6 +728,86 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) } +TEST_F(Csr, IsPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto permuted = gko::as(square_mtx->permute(rpermute_idxs.get())); + auto dpermuted = gko::as(square_dmtx->permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + +TEST_F(Csr, IsInversePermutable) +{ + set_up_apply_data(std::make_shared()); + + auto permuted = + gko::as(square_mtx->inverse_permute(rpermute_idxs.get())); + auto dpermuted = + gko::as(square_dmtx->inverse_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + +TEST_F(Csr, IsRowPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto r_permute = gko::as(mtx->row_permute(rpermute_idxs.get())); + auto dr_permute = gko::as(dmtx->row_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(r_permute, dr_permute); + GKO_ASSERT_MTX_NEAR(r_permute, dr_permute, 0); +} + + +TEST_F(Csr, IsColPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto c_permute = gko::as(mtx->column_permute(cpermute_idxs.get())); + auto dc_permute = gko::as(dmtx->column_permute(cpermute_idxs.get())); + + ASSERT_TRUE(dc_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(c_permute, dc_permute); + GKO_ASSERT_MTX_NEAR(c_permute, dc_permute, 0); +} + + +TEST_F(Csr, IsInverseRowPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto inverse_r_permute = + gko::as(mtx->inverse_row_permute(rpermute_idxs.get())); + auto d_inverse_r_permute = + gko::as(dmtx->inverse_row_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_r_permute, d_inverse_r_permute); + GKO_ASSERT_MTX_NEAR(inverse_r_permute, d_inverse_r_permute, 0); +} + + +TEST_F(Csr, IsInverseColPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto inverse_c_permute = + gko::as(mtx->inverse_column_permute(cpermute_idxs.get())); + auto d_inverse_c_permute = + gko::as(dmtx->inverse_column_permute(cpermute_idxs.get())); + + ASSERT_TRUE(d_inverse_c_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_c_permute, d_inverse_c_permute); + GKO_ASSERT_MTX_NEAR(inverse_c_permute, d_inverse_c_permute, 0); +} + + TEST_F(Csr, RecognizeSortedMatrixIsEquivalentToRef) { set_up_apply_data(std::make_shared()); @@ -669,12 +823,13 @@ TEST_F(Csr, RecognizeSortedMatrixIsEquivalentToRef) TEST_F(Csr, RecognizeUnsortedMatrixIsEquivalentToRef) { - auto uns_mtx = gen_unsorted_mtx(); + set_up_apply_data(std::make_shared()); + unsort_mtx(); bool is_sorted_cuda{}; bool is_sorted_ref{}; - is_sorted_ref = uns_mtx.ref->is_sorted_by_column_index(); - is_sorted_cuda = uns_mtx.cuda->is_sorted_by_column_index(); + is_sorted_ref = mtx->is_sorted_by_column_index(); + is_sorted_cuda = dmtx->is_sorted_by_column_index(); ASSERT_EQ(is_sorted_ref, is_sorted_cuda); } @@ -694,13 +849,14 @@ TEST_F(Csr, SortSortedMatrixIsEquivalentToRef) TEST_F(Csr, SortUnsortedMatrixIsEquivalentToRef) { - auto uns_mtx = gen_unsorted_mtx(); + set_up_apply_data(std::make_shared()); + unsort_mtx(); - uns_mtx.ref->sort_by_column_index(); - uns_mtx.cuda->sort_by_column_index(); + mtx->sort_by_column_index(); + dmtx->sort_by_column_index(); // Values must be unchanged, therefore, tolerance is `0` - GKO_ASSERT_MTX_NEAR(uns_mtx.ref, uns_mtx.cuda, 0); + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 0); } @@ -740,4 +896,48 @@ TEST_F(Csr, ExtractDiagonalIsEquivalentToRef) } +TEST_F(Csr, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(cuda)); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +} + + +TEST_F(Csr, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(cuda)); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +} + + +TEST_F(Csr, InplaceAbsoluteComplexMatrixIsEquivalentToRef) +{ + set_up_apply_complex_data(std::make_shared(cuda)); + + complex_mtx->compute_absolute_inplace(); + complex_dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(complex_mtx, complex_dmtx, 1e-14); +} + + +TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) +{ + set_up_apply_complex_data(std::make_shared(cuda)); + + auto abs_mtx = complex_mtx->compute_absolute(); + auto dabs_mtx = complex_dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +} + + } // namespace diff --git a/cuda/test/matrix/dense_kernels.cpp b/cuda/test/matrix/dense_kernels.cpp index c520dfcf2a3..de96d27d823 100644 --- a/cuda/test/matrix/dense_kernels.cpp +++ b/cuda/test/matrix/dense_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -61,9 +61,13 @@ class Dense : public ::testing::Test { using itype = int; using vtype = double; using Mtx = gko::matrix::Dense; + using MixedMtx = gko::matrix::Dense>; using NormVector = gko::matrix::Dense>; using Arr = gko::Array; using ComplexMtx = gko::matrix::Dense>; + using Diagonal = gko::matrix::Diagonal; + using MixedComplexMtx = + gko::matrix::Dense>>; Dense() : rand_engine(15) {} @@ -118,6 +122,7 @@ class Dense : public ::testing::Test { expected = gen_mtx(65, 35); alpha = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); + square = gen_mtx(x->get_size()[0], x->get_size()[0]); dx = Mtx::create(cuda); dx->copy_from(x.get()); dc_x = ComplexMtx::create(cuda); @@ -130,6 +135,8 @@ class Dense : public ::testing::Test { dalpha->copy_from(alpha.get()); dbeta = Mtx::create(cuda); dbeta->copy_from(beta.get()); + dsquare = Mtx::create(cuda); + dsquare->copy_from(square.get()); std::vector tmp(x->get_size()[0], 0); auto rng = std::default_random_engine{}; @@ -138,14 +145,25 @@ class Dense : public ::testing::Test { std::vector tmp2(x->get_size()[1], 0); std::iota(tmp2.begin(), tmp2.end(), 0); std::shuffle(tmp2.begin(), tmp2.end(), rng); + std::vector tmp3(x->get_size()[0] / 10); + std::uniform_int_distribution row_dist(0, x->get_size()[0] - 1); + for (auto &i : tmp3) { + i = row_dist(rng); + } rpermute_idxs = std::unique_ptr(new Arr{ref, tmp.begin(), tmp.end()}); - drpermute_idxs = - std::unique_ptr(new Arr{cuda, tmp.begin(), tmp.end()}); cpermute_idxs = std::unique_ptr(new Arr{ref, tmp2.begin(), tmp2.end()}); - dcpermute_idxs = - std::unique_ptr(new Arr{cuda, tmp2.begin(), tmp2.end()}); + rgather_idxs = + std::unique_ptr(new Arr{ref, tmp3.begin(), tmp3.end()}); + } + + template + std::unique_ptr convert(InputType &&input) + { + auto result = ConvertedType::create(input->get_executor()); + input->convert_to(result.get()); + return result; } std::shared_ptr ref; @@ -159,189 +177,229 @@ class Dense : public ::testing::Test { std::unique_ptr alpha; std::unique_ptr beta; std::unique_ptr expected; + std::unique_ptr square; std::unique_ptr dresult; std::unique_ptr dx; std::unique_ptr dc_x; std::unique_ptr dy; std::unique_ptr dalpha; std::unique_ptr dbeta; + std::unique_ptr dsquare; std::unique_ptr rpermute_idxs; - std::unique_ptr drpermute_idxs; std::unique_ptr cpermute_idxs; - std::unique_ptr dcpermute_idxs; + std::unique_ptr rgather_idxs; }; -TEST_F(Dense, SingleVectorCudaScaleIsEquivalentToRef) +TEST_F(Dense, SingleVectorCudaComputeDotIsEquivalentToRef) { set_up_vector_data(1); - auto result = Mtx::create(ref); - x->scale(alpha.get()); - dx->scale(dalpha.get()); - result->copy_from(dx.get()); + x->compute_dot(y.get(), expected.get()); + dx->compute_dot(dy.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(result, x, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, MultipleVectorCudaScaleIsEquivalentToRef) +TEST_F(Dense, MultipleVectorCudaComputeDotIsEquivalentToRef) { set_up_vector_data(20); - x->scale(alpha.get()); - dx->scale(dalpha.get()); + x->compute_dot(y.get(), expected.get()); + dx->compute_dot(dy.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(dx, x, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, MultipleVectorCudaScaleWithDifferentAlphaIsEquivalentToRef) +TEST_F(Dense, SingleVectorCudaComputeConjDotIsEquivalentToRef) { - set_up_vector_data(20, true); + set_up_vector_data(1); - x->scale(alpha.get()); - dx->scale(dalpha.get()); + x->compute_conj_dot(y.get(), expected.get()); + dx->compute_conj_dot(dy.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(dx, x, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, SingleVectorCudaAddScaledIsEquivalentToRef) +TEST_F(Dense, MultipleVectorCudaComputeConjDotIsEquivalentToRef) { - set_up_vector_data(1); + set_up_vector_data(20); - x->add_scaled(alpha.get(), y.get()); - dx->add_scaled(dalpha.get(), dy.get()); + x->compute_conj_dot(y.get(), expected.get()); + dx->compute_conj_dot(dy.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(dx, x, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, MultipleVectorCudaAddScaledIsEquivalentToRef) +TEST_F(Dense, CudaComputeNorm2IsEquivalentToRef) { set_up_vector_data(20); + auto norm_size = gko::dim<2>{1, x->get_size()[1]}; + auto norm_expected = NormVector::create(this->ref, norm_size); + auto dnorm = NormVector::create(this->cuda, norm_size); - x->add_scaled(alpha.get(), y.get()); - dx->add_scaled(dalpha.get(), dy.get()); + x->compute_norm2(norm_expected.get()); + dx->compute_norm2(dnorm.get()); - GKO_ASSERT_MTX_NEAR(dx, x, 1e-14); + GKO_ASSERT_MTX_NEAR(norm_expected, dnorm, 1e-14); } -TEST_F(Dense, MultipleVectorCudaAddScaledWithDifferentAlphaIsEquivalentToRef) +TEST_F(Dense, SimpleApplyIsEquivalentToRef) { - set_up_vector_data(20); + set_up_apply_data(); - x->add_scaled(alpha.get(), y.get()); - dx->add_scaled(dalpha.get(), dy.get()); + x->apply(y.get(), expected.get()); + dx->apply(dy.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(dx, x, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, AddsScaledDiagIsEquivalentToRef) +TEST_F(Dense, SimpleApplyMixedIsEquivalentToRef) { - auto mat = gen_mtx(532, 532); - gko::Array diag_values(ref, 532); - gko::kernels::reference::components::fill_array(ref, diag_values.get_data(), - 532, Mtx::value_type{2.0}); - auto diag = - gko::matrix::Diagonal::create(ref, 532, diag_values); - alpha = gko::initialize({2.0}, ref); - auto dmat = Mtx::create(cuda); - dmat->copy_from(mat.get()); - auto ddiag = gko::matrix::Diagonal::create(cuda); - ddiag->copy_from(diag.get()); - dalpha = Mtx::create(cuda); - dalpha->copy_from(alpha.get()); + set_up_apply_data(); - mat->add_scaled(alpha.get(), diag.get()); - dmat->add_scaled(dalpha.get(), ddiag.get()); + x->apply(convert(y).get(), convert(expected).get()); + dx->apply(convert(dy).get(), convert(dresult).get()); - GKO_ASSERT_MTX_NEAR(mat, dmat, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-7); } -TEST_F(Dense, SingleVectorCudaComputeDotIsEquivalentToRef) +TEST_F(Dense, AdvancedApplyIsEquivalentToRef) { - set_up_vector_data(1); + set_up_apply_data(); - x->compute_dot(y.get(), expected.get()); - dx->compute_dot(dy.get(), dresult.get()); + x->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, MultipleVectorCudaComputeDotIsEquivalentToRef) +TEST_F(Dense, AdvancedApplyMixedIsEquivalentToRef) { - set_up_vector_data(20); + set_up_apply_data(); - x->compute_dot(y.get(), expected.get()); - dx->compute_dot(dy.get(), dresult.get()); + x->apply(convert(alpha).get(), convert(y).get(), + convert(beta).get(), convert(expected).get()); + dx->apply(convert(dalpha).get(), convert(dy).get(), + convert(dbeta).get(), convert(dresult).get()); - GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-7); } -TEST_F(Dense, CudaComputeNorm2IsEquivalentToRef) +TEST_F(Dense, ApplyToComplexIsEquivalentToRef) { - set_up_vector_data(20); - auto norm_size = gko::dim<2>{1, x->get_size()[1]}; - auto norm_expected = NormVector::create(this->ref, norm_size); - auto dnorm = NormVector::create(this->cuda, norm_size); + set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = ComplexMtx::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = ComplexMtx::create(cuda); + dcomplex_x->copy_from(complex_x.get()); - x->compute_norm2(norm_expected.get()); - dx->compute_norm2(dnorm.get()); + x->apply(complex_b.get(), complex_x.get()); + dx->apply(dcomplex_b.get(), dcomplex_x.get()); - GKO_ASSERT_MTX_NEAR(norm_expected, dnorm, 1e-14); + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); } -TEST_F(Dense, SimpleApplyIsEquivalentToRef) +TEST_F(Dense, ApplyToMixedComplexIsEquivalentToRef) { set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = MixedComplexMtx::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = MixedComplexMtx::create(cuda); + dcomplex_x->copy_from(complex_x.get()); - x->apply(y.get(), expected.get()); - dx->apply(dy.get(), dresult.get()); + x->apply(complex_b.get(), complex_x.get()); + dx->apply(dcomplex_b.get(), dcomplex_x.get()); - GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-7); } -TEST_F(Dense, AdvancedApplyIsEquivalentToRef) +TEST_F(Dense, AdvancedApplyToComplexIsEquivalentToRef) { set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = ComplexMtx::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = ComplexMtx::create(cuda); + dcomplex_x->copy_from(complex_x.get()); - x->apply(alpha.get(), y.get(), beta.get(), expected.get()); - dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + x->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); - GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); } -TEST_F(Dense, IsTransposable) +TEST_F(Dense, AdvancedApplyToMixedComplexIsEquivalentToRef) { set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = MixedComplexMtx::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = MixedComplexMtx::create(cuda); + dcomplex_x->copy_from(complex_x.get()); - auto trans = x->transpose(); - auto dtrans = dx->transpose(); + x->apply(convert(alpha).get(), complex_b.get(), + convert(beta).get(), complex_x.get()); + dx->apply(convert(dalpha).get(), dcomplex_b.get(), + convert(dbeta).get(), dcomplex_x.get()); - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-7); } -TEST_F(Dense, IsConjugateTransposable) +TEST_F(Dense, ComputeDotComplexIsEquivalentToRef) { set_up_apply_data(); + auto complex_b = gen_mtx(1234, 2); + auto dcomplex_b = ComplexMtx::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(1234, 2); + auto dcomplex_x = ComplexMtx::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + auto result = ComplexMtx::create(ref, gko::dim<2>{1, 2}); + auto dresult = ComplexMtx::create(cuda, gko::dim<2>{1, 2}); - auto trans = c_x->conj_transpose(); - auto dtrans = dc_x->conj_transpose(); + complex_b->compute_dot(complex_x.get(), result.get()); + dcomplex_b->compute_dot(dcomplex_x.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); + GKO_ASSERT_MTX_NEAR(result, dresult, 1e-14); +} + + +TEST_F(Dense, ComputeConjDotComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(1234, 2); + auto dcomplex_b = ComplexMtx::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(1234, 2); + auto dcomplex_x = ComplexMtx::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + auto result = ComplexMtx::create(ref, gko::dim<2>{1, 2}); + auto dresult = ComplexMtx::create(cuda, gko::dim<2>{1, 2}); + + complex_b->compute_conj_dot(complex_x.get(), result.get()); + dcomplex_b->compute_conj_dot(dcomplex_x.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(result, dresult, 1e-14); } @@ -492,7 +550,7 @@ TEST_F(Dense, CalculateNNZPerRowIsEquivalentToRef) &dnnz_per_row); auto tmp = gko::Array(ref, dnnz_per_row); - for (auto i = 0; i < nnz_per_row.get_num_elems(); i++) { + for (gko::size_type i = 0; i < nnz_per_row.get_num_elems(); i++) { ASSERT_EQ(nnz_per_row.get_const_data()[i], tmp.get_const_data()[i]); } } @@ -528,62 +586,67 @@ TEST_F(Dense, CalculateTotalColsIsEquivalentToRef) } -TEST_F(Dense, IsRowPermutable) -{ - set_up_apply_data(); - - auto r_permute = x->row_permute(rpermute_idxs.get()); - auto dr_permute = dx->row_permute(drpermute_idxs.get()); - - GKO_ASSERT_MTX_NEAR(static_cast(r_permute.get()), - static_cast(dr_permute.get()), 0); -} - - -TEST_F(Dense, IsColPermutable) +TEST_F(Dense, IsTransposable) { set_up_apply_data(); - auto c_permute = x->column_permute(cpermute_idxs.get()); - auto dc_permute = dx->column_permute(dcpermute_idxs.get()); + auto trans = x->transpose(); + auto dtrans = dx->transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(c_permute.get()), - static_cast(dc_permute.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); } -TEST_F(Dense, IsInverseRowPermutable) +TEST_F(Dense, IsTransposableIntoDenseCrossExecutor) { set_up_apply_data(); + auto row_span = gko::span{0, x->get_size()[0] - 2}; + auto col_span = gko::span{0, x->get_size()[1] - 2}; + auto sub_x = x->create_submatrix(row_span, col_span); + auto sub_dx = dx->create_submatrix(row_span, col_span); + // create the target matrices on another executor to + // force temporary clone + auto trans = Mtx::create(ref, gko::transpose(sub_x->get_size())); + auto dtrans = Mtx::create(ref, gko::transpose(sub_x->get_size()), + sub_x->get_size()[0] + 4); - auto inverse_r_permute = x->inverse_row_permute(rpermute_idxs.get()); - auto d_inverse_r_permute = dx->inverse_row_permute(drpermute_idxs.get()); + sub_x->transpose(trans.get()); + sub_dx->transpose(dtrans.get()); - GKO_ASSERT_MTX_NEAR(static_cast(inverse_r_permute.get()), - static_cast(d_inverse_r_permute.get()), 0); + GKO_ASSERT_MTX_NEAR(dtrans, trans, 0); } -TEST_F(Dense, IsInverseColPermutable) +TEST_F(Dense, IsConjugateTransposable) { set_up_apply_data(); - auto inverse_c_permute = x->inverse_column_permute(cpermute_idxs.get()); - auto d_inverse_c_permute = dx->inverse_column_permute(dcpermute_idxs.get()); + auto trans = c_x->conj_transpose(); + auto dtrans = dc_x->conj_transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(inverse_c_permute.get()), - static_cast(d_inverse_c_permute.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); } -TEST_F(Dense, ExtractDiagonalIsEquivalentToRef) +TEST_F(Dense, IsConjugateTransposableIntoDenseCrossExecutor) { set_up_apply_data(); - - auto diag = x->extract_diagonal(); - auto ddiag = dx->extract_diagonal(); - - GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); + auto row_span = gko::span{0, c_x->get_size()[0] - 2}; + auto col_span = gko::span{0, c_x->get_size()[1] - 2}; + auto sub_x = c_x->create_submatrix(row_span, col_span); + auto sub_dx = dc_x->create_submatrix(row_span, col_span); + // create the target matrices on another executor to + // force temporary clone + auto trans = ComplexMtx::create(ref, gko::transpose(sub_x->get_size())); + auto dtrans = ComplexMtx::create(ref, gko::transpose(sub_x->get_size()), + sub_x->get_size()[0] + 4); + + sub_x->conj_transpose(trans.get()); + sub_dx->conj_transpose(dtrans.get()); + + GKO_ASSERT_MTX_NEAR(dtrans, trans, 0); } diff --git a/cuda/test/matrix/diagonal_kernels.cpp b/cuda/test/matrix/diagonal_kernels.cpp index 8c8bbe207e2..d3c6f8c5973 100644 --- a/cuda/test/matrix/diagonal_kernels.cpp +++ b/cuda/test/matrix/diagonal_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/diagonal_kernels.hpp" -#include "core/test/utils.hpp" +#include "cuda/test/utils.hpp" namespace { @@ -62,7 +62,14 @@ class Diagonal : public ::testing::Test { using Dense = gko::matrix::Dense; using ComplexDiag = gko::matrix::Diagonal; - Diagonal() : mtx_size(532, 231), rand_engine(42) {} + Diagonal() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 231), +#else + : mtx_size(532, 231), +#endif + rand_engine(42) + {} void SetUp() { @@ -118,10 +125,10 @@ class Diagonal : public ::testing::Test { diag = gen_diag(mtx_size[0]); ddiag = Diag::create(cuda); ddiag->copy_from(diag.get()); - dense1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[0]); - dense2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[1]); - denseexpected1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[0]); - denseexpected2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[1]); + dense1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[1]); + dense2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[0]); + denseexpected1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[1]); + denseexpected2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[0]); ddense1 = Dense::create(cuda); ddense1->copy_from(dense1.get()); ddense2 = Dense::create(cuda); @@ -249,4 +256,26 @@ TEST_F(Diagonal, ConjTransposeIsEquivalentToRef) } +TEST_F(Diagonal, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + diag->compute_absolute_inplace(); + ddiag->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(diag, ddiag, 1e-14); +} + + +TEST_F(Diagonal, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + auto abs_diag = diag->compute_absolute(); + auto dabs_diag = ddiag->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_diag, dabs_diag, 1e-14); +} + + } // namespace diff --git a/cuda/test/matrix/ell_kernels.cpp b/cuda/test/matrix/ell_kernels.cpp index 960faaed20e..51c12fab531 100644 --- a/cuda/test/matrix/ell_kernels.cpp +++ b/cuda/test/matrix/ell_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,7 +48,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/ell_kernels.hpp" -#include "core/test/utils.hpp" #include "cuda/test/utils.hpp" @@ -59,8 +58,12 @@ class Ell : public ::testing::Test { protected: using Mtx = gko::matrix::Ell<>; using Vec = gko::matrix::Dense<>; + using Vec2 = gko::matrix::Dense; + using ComplexVec = gko::matrix::Dense>; - Ell() : rand_engine(42) {} + Ell() + : rand_engine(42), size{532, 231}, num_els_rowwise{300}, ell_stride{600} + {} void SetUp() { @@ -76,9 +79,10 @@ class Ell : public ::testing::Test { } } - std::unique_ptr gen_mtx(int num_rows, int num_cols) + template + std::unique_ptr gen_mtx(int num_rows, int num_cols) { - return gko::test::generate_random_matrix( + return gko::test::generate_random_matrix( num_rows, num_cols, std::uniform_int_distribution<>(1, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } @@ -91,38 +95,62 @@ class Ell : public ::testing::Test { stride); mtx->copy_from(gen_mtx(num_rows, num_cols)); expected = gen_mtx(num_rows, num_vectors); + expected2 = Vec2::create(ref); + expected2->copy_from(expected.get()); y = gen_mtx(num_cols, num_vectors); + y2 = Vec2::create(ref); + y2->copy_from(y.get()); alpha = gko::initialize({2.0}, ref); + alpha2 = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); + beta2 = gko::initialize({-1.0}, ref); dmtx = Mtx::create(cuda); dmtx->copy_from(mtx.get()); dresult = Vec::create(cuda); dresult->copy_from(expected.get()); + dresult2 = Vec2::create(cuda); + dresult2->copy_from(expected2.get()); dy = Vec::create(cuda); dy->copy_from(y.get()); + dy2 = Vec2::create(cuda); + dy2->copy_from(y2.get()); dalpha = Vec::create(cuda); dalpha->copy_from(alpha.get()); + dalpha2 = Vec2::create(cuda); + dalpha2->copy_from(alpha2.get()); dbeta = Vec::create(cuda); dbeta->copy_from(beta.get()); + dbeta2 = Vec2::create(cuda); + dbeta2->copy_from(beta2.get()); } - std::shared_ptr ref; std::shared_ptr cuda; std::ranlux48 rand_engine; + gko::dim<2> size; + gko::size_type num_els_rowwise; + gko::size_type ell_stride; std::unique_ptr mtx; std::unique_ptr expected; + std::unique_ptr expected2; std::unique_ptr y; + std::unique_ptr y2; std::unique_ptr alpha; + std::unique_ptr alpha2; std::unique_ptr beta; + std::unique_ptr beta2; std::unique_ptr dmtx; std::unique_ptr dresult; + std::unique_ptr dresult2; std::unique_ptr dy; + std::unique_ptr dy2; std::unique_ptr dalpha; + std::unique_ptr dalpha2; std::unique_ptr dbeta; + std::unique_ptr dbeta2; }; @@ -137,6 +165,39 @@ TEST_F(Ell, SimpleApplyIsEquivalentToRef) } +TEST_F(Ell, MixedSimpleApplyIsEquivalentToRef1) +{ + set_up_apply_data(); + + mtx->apply(y2.get(), expected2.get()); + dmtx->apply(dy2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedSimpleApplyIsEquivalentToRef2) +{ + set_up_apply_data(); + + mtx->apply(y2.get(), expected.get()); + dmtx->apply(dy2.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedSimpleApplyIsEquivalentToRef3) +{ + set_up_apply_data(); + + mtx->apply(y.get(), expected2.get()); + dmtx->apply(dy.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, AdvancedApplyIsEquivalentToRef) { set_up_apply_data(); @@ -148,9 +209,42 @@ TEST_F(Ell, AdvancedApplyIsEquivalentToRef) } +TEST_F(Ell, MixedAdvancedApplyIsEquivalentToRef1) +{ + set_up_apply_data(); + + mtx->apply(alpha2.get(), y2.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedAdvancedApplyIsEquivalentToRef2) +{ + set_up_apply_data(); + + mtx->apply(alpha2.get(), y2.get(), beta.get(), expected.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedAdvancedApplyIsEquivalentToRef3) +{ + set_up_apply_data(); + + mtx->apply(alpha.get(), y.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, SimpleApplyWithStrideIsEquivalentToRef) { - set_up_apply_data(532, 231, 1, 300, 600); + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); @@ -159,9 +253,42 @@ TEST_F(Ell, SimpleApplyWithStrideIsEquivalentToRef) } +TEST_F(Ell, MixedSimpleApplyWithStrideIsEquivalentToRef1) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected2.get()); + dmtx->apply(dy2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideIsEquivalentToRef2) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected.get()); + dmtx->apply(dy2.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideIsEquivalentToRef3) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(y.get(), expected2.get()); + dmtx->apply(dy.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, AdvancedApplyWithStrideIsEquivalentToRef) { - set_up_apply_data(532, 231, 1, 300, 600); + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); @@ -169,9 +296,42 @@ TEST_F(Ell, AdvancedApplyWithStrideIsEquivalentToRef) } +TEST_F(Ell, MixedAdvancedApplyWithStrideIsEquivalentToRef1) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideIsEquivalentToRef2) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta.get(), expected.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideIsEquivalentToRef3) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(alpha.get(), y.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, SimpleApplyWithStrideToDenseMatrixIsEquivalentToRef) { - set_up_apply_data(532, 231, 3, 300, 600); + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); @@ -180,9 +340,42 @@ TEST_F(Ell, SimpleApplyWithStrideToDenseMatrixIsEquivalentToRef) } +TEST_F(Ell, MixedSimpleApplyWithStrideToDenseMatrixIsEquivalentToRef1) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected2.get()); + dmtx->apply(dy2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideToDenseMatrixIsEquivalentToRef2) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected.get()); + dmtx->apply(dy2.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideToDenseMatrixIsEquivalentToRef3) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(y.get(), expected2.get()); + dmtx->apply(dy.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, AdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef) { - set_up_apply_data(532, 231, 3, 300, 600); + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); @@ -191,6 +384,39 @@ TEST_F(Ell, AdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef) } +TEST_F(Ell, MixedAdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef1) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef2) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta.get(), expected.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef3) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(alpha.get(), y.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, SimpleApplyByAtomicIsEquivalentToRef) { set_up_apply_data(10, 10000); @@ -279,6 +505,40 @@ TEST_F(Ell, AdvancedApplyOnSmallMatrixIsEquivalentToRef) } +TEST_F(Ell, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(size[1], 3); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(size[0], 3); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Ell, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(size[1], 3); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(size[0], 3); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + TEST_F(Ell, ConvertToDenseIsEquivalentToRef) { set_up_apply_data(); @@ -325,7 +585,7 @@ TEST_F(Ell, CalculateNNZPerRowIsEquivalentToRef) &dnnz_per_row); auto tmp = gko::Array(ref, dnnz_per_row); - for (auto i = 0; i < nnz_per_row.get_num_elems(); i++) { + for (gko::size_type i = 0; i < nnz_per_row.get_num_elems(); i++) { ASSERT_EQ(nnz_per_row.get_const_data()[i], tmp.get_const_data()[i]); } } @@ -356,4 +616,26 @@ TEST_F(Ell, ExtractDiagonalIsEquivalentToRef) } +TEST_F(Ell, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +} + + +TEST_F(Ell, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +} + + } // namespace diff --git a/cuda/test/matrix/fbcsr_kernels.cpp b/cuda/test/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..6b273de668d --- /dev/null +++ b/cuda/test/matrix/fbcsr_kernels.cpp @@ -0,0 +1,93 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include "core/test/matrix/fbcsr_sample.hpp" +#include "cuda/test/utils.hpp" + + +namespace { + + +class Fbcsr : public ::testing::Test { +protected: + using Mtx = gko::matrix::Fbcsr<>; + + void SetUp() + { + ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + cuda = gko::CudaExecutor::create(0, ref); + } + + void TearDown() + { + if (cuda != nullptr) { + ASSERT_NO_THROW(cuda->synchronize()); + } + } + + std::shared_ptr ref; + std::shared_ptr cuda; + + std::unique_ptr mtx; +}; + + +TEST_F(Fbcsr, CanWriteFromMatrixOnDevice) +{ + using value_type = Mtx::value_type; + using index_type = Mtx::index_type; + using MatData = gko::matrix_data; + gko::testing::FbcsrSample sample(ref); + auto refmat = sample.generate_fbcsr(); + auto cudamat = Mtx::create(cuda); + cudamat->copy_from(gko::lend(refmat)); + MatData refdata; + MatData cudadata; + + refmat->write(refdata); + cudamat->write(cudadata); + + ASSERT_TRUE(refdata.nonzeros == cudadata.nonzeros); +} + + +} // namespace diff --git a/cuda/test/matrix/hybrid_kernels.cpp b/cuda/test/matrix/hybrid_kernels.cpp index 5e7048632bc..4970e149aa3 100644 --- a/cuda/test/matrix/hybrid_kernels.cpp +++ b/cuda/test/matrix/hybrid_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -57,6 +57,7 @@ class Hybrid : public ::testing::Test { protected: using Mtx = gko::matrix::Hybrid<>; using Vec = gko::matrix::Dense<>; + using ComplexVec = gko::matrix::Dense>; Hybrid() : rand_engine(42) {} @@ -74,9 +75,11 @@ class Hybrid : public ::testing::Test { } } - std::unique_ptr gen_mtx(int num_rows, int num_cols, int min_nnz_row) + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) { - return gko::test::generate_random_matrix( + return gko::test::generate_random_matrix( num_rows, num_cols, std::uniform_int_distribution<>(min_nnz_row, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); @@ -181,6 +184,40 @@ TEST_F(Hybrid, AdvancedApplyToDenseMatrixIsEquivalentToRef) } +TEST_F(Hybrid, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3, 1); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3, 1); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Hybrid, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3, 1); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3, 1); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + TEST_F(Hybrid, CountNonzerosIsEquivalentToRef) { set_up_apply_data(); @@ -231,4 +268,33 @@ TEST_F(Hybrid, ExtractDiagonalIsEquivalentToRef) } +TEST_F(Hybrid, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +} + + +TEST_F(Hybrid, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, std::make_shared(2)); + using AbsMtx = gko::remove_complex; + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + auto abs_strategy = gko::as(abs_mtx->get_strategy()); + auto dabs_strategy = + gko::as(dabs_mtx->get_strategy()); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); + GKO_ASSERT_EQ(abs_strategy->get_num_columns(), + dabs_strategy->get_num_columns()); + GKO_ASSERT_EQ(abs_strategy->get_num_columns(), 2); +} + + } // namespace diff --git a/cuda/test/matrix/sellp_kernels.cpp b/cuda/test/matrix/sellp_kernels.cpp index a3cdeed95ad..706fedccd3f 100644 --- a/cuda/test/matrix/sellp_kernels.cpp +++ b/cuda/test/matrix/sellp_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,7 +48,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/sellp_kernels.hpp" -#include "core/test/utils.hpp" #include "cuda/test/utils.hpp" @@ -59,6 +58,7 @@ class Sellp : public ::testing::Test { protected: using Mtx = gko::matrix::Sellp<>; using Vec = gko::matrix::Dense<>; + using ComplexVec = gko::matrix::Dense>; Sellp() : rand_engine(42) {} @@ -76,46 +76,23 @@ class Sellp : public ::testing::Test { } } - std::unique_ptr gen_mtx(int num_rows, int num_cols) + template + std::unique_ptr gen_mtx(int num_rows, int num_cols) { - return gko::test::generate_random_matrix( + return gko::test::generate_random_matrix( num_rows, num_cols, std::uniform_int_distribution<>(1, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } - void set_up_apply_vector( - int slice_size = gko::matrix::default_slice_size, - int stride_factor = gko::matrix::default_stride_factor, - int total_cols = 0) - { - mtx = Mtx::create(ref); - mtx->copy_from(gen_mtx(532, 231)); - expected = gen_mtx(532, 1); - y = gen_mtx(231, 1); - alpha = gko::initialize({2.0}, ref); - beta = gko::initialize({-1.0}, ref); - dmtx = Mtx::create(cuda); - dmtx->copy_from(mtx.get()); - dresult = Vec::create(cuda); - dresult->copy_from(expected.get()); - dy = Vec::create(cuda); - dy->copy_from(y.get()); - dalpha = Vec::create(cuda); - dalpha->copy_from(alpha.get()); - dbeta = Vec::create(cuda); - dbeta->copy_from(beta.get()); - } - void set_up_apply_matrix( - int slice_size = gko::matrix::default_slice_size, - int stride_factor = gko::matrix::default_stride_factor, - int total_cols = 0) + int total_cols = 1, int slice_size = gko::matrix::default_slice_size, + int stride_factor = gko::matrix::default_stride_factor) { mtx = Mtx::create(ref); mtx->copy_from(gen_mtx(532, 231)); empty = Mtx::create(ref); - expected = gen_mtx(532, 64); - y = gen_mtx(231, 64); + expected = gen_mtx(532, total_cols); + y = gen_mtx(231, total_cols); alpha = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); dmtx = Mtx::create(cuda); @@ -154,79 +131,67 @@ class Sellp : public ::testing::Test { TEST_F(Sellp, SimpleApplyIsEquivalentToRef) { - set_up_apply_vector(); + set_up_apply_matrix(); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } TEST_F(Sellp, AdvancedApplyIsEquivalentToRef) { - set_up_apply_vector(); + set_up_apply_matrix(); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } TEST_F(Sellp, SimpleApplyWithSliceSizeAndStrideFactorIsEquivalentToRef) { - set_up_apply_vector(32, 2); + set_up_apply_matrix(1, 32, 2); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } TEST_F(Sellp, AdvancedApplyWithSliceSizeAndStrideFActorIsEquivalentToRef) { - set_up_apply_vector(32, 2); + set_up_apply_matrix(1, 32, 2); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } TEST_F(Sellp, SimpleApplyMultipleRHSIsEquivalentToRef) { - set_up_apply_matrix(); + set_up_apply_matrix(64); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } TEST_F(Sellp, AdvancedApplyMultipleRHSIsEquivalentToRef) { - set_up_apply_matrix(); + set_up_apply_matrix(64); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } @@ -238,9 +203,7 @@ TEST_F(Sellp, mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } @@ -252,16 +215,47 @@ TEST_F(Sellp, mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Sellp, ConvertToDenseIsEquivalentToRef) +TEST_F(Sellp, ApplyToComplexIsEquivalentToRef) { - set_up_apply_matrix(); + set_up_apply_matrix(64); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + +TEST_F(Sellp, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_matrix(64); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(cuda); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(cuda); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Sellp, ConvertToDenseIsEquivalentToRef) +{ + set_up_apply_matrix(64); auto dense_mtx = gko::matrix::Dense<>::create(ref); auto ddense_mtx = gko::matrix::Dense<>::create(cuda); @@ -274,8 +268,7 @@ TEST_F(Sellp, ConvertToDenseIsEquivalentToRef) TEST_F(Sellp, ConvertToCsrIsEquivalentToRef) { - set_up_apply_matrix(); - + set_up_apply_matrix(64); auto csr_mtx = gko::matrix::Csr<>::create(ref); auto dcsr_mtx = gko::matrix::Csr<>::create(cuda); @@ -288,8 +281,7 @@ TEST_F(Sellp, ConvertToCsrIsEquivalentToRef) TEST_F(Sellp, ConvertEmptyToDenseIsEquivalentToRef) { - set_up_apply_matrix(); - + set_up_apply_matrix(64); auto dense_mtx = gko::matrix::Dense<>::create(ref); auto ddense_mtx = gko::matrix::Dense<>::create(cuda); @@ -302,8 +294,7 @@ TEST_F(Sellp, ConvertEmptyToDenseIsEquivalentToRef) TEST_F(Sellp, ConvertEmptyToCsrIsEquivalentToRef) { - set_up_apply_matrix(); - + set_up_apply_matrix(64); auto csr_mtx = gko::matrix::Csr<>::create(ref); auto dcsr_mtx = gko::matrix::Csr<>::create(cuda); @@ -316,8 +307,7 @@ TEST_F(Sellp, ConvertEmptyToCsrIsEquivalentToRef) TEST_F(Sellp, CountNonzerosIsEquivalentToRef) { - set_up_apply_matrix(); - + set_up_apply_matrix(64); gko::size_type nnz; gko::size_type dnnz; @@ -330,7 +320,7 @@ TEST_F(Sellp, CountNonzerosIsEquivalentToRef) TEST_F(Sellp, ExtractDiagonalIsEquivalentToRef) { - set_up_apply_matrix(); + set_up_apply_matrix(64); auto diag = mtx->extract_diagonal(); auto ddiag = dmtx->extract_diagonal(); @@ -341,7 +331,7 @@ TEST_F(Sellp, ExtractDiagonalIsEquivalentToRef) TEST_F(Sellp, ExtractDiagonalWithSliceSizeAndStrideFactorIsEquivalentToRef) { - set_up_apply_matrix(32, 2); + set_up_apply_matrix(64, 32, 2); auto diag = mtx->extract_diagonal(); auto ddiag = dmtx->extract_diagonal(); @@ -350,4 +340,26 @@ TEST_F(Sellp, ExtractDiagonalWithSliceSizeAndStrideFactorIsEquivalentToRef) } +TEST_F(Sellp, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_matrix(64, 32, 2); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +} + + +TEST_F(Sellp, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_matrix(64, 32, 2); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +} + + } // namespace diff --git a/cuda/test/multigrid/CMakeLists.txt b/cuda/test/multigrid/CMakeLists.txt new file mode 100644 index 00000000000..8fe8bbeba48 --- /dev/null +++ b/cuda/test/multigrid/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(amgx_pgm_kernels) diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp new file mode 100644 index 00000000000..493192c8630 --- /dev/null +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -0,0 +1,316 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/multigrid/amgx_pgm_kernels.hpp" +#include "core/test/utils/matrix_generator.hpp" +#include "cuda/test/utils.hpp" + + +namespace { + + +class AmgxPgm : public ::testing::Test { +protected: + using value_type = gko::default_precision; + using index_type = gko::int32; + using Mtx = gko::matrix::Dense<>; + using Csr = gko::matrix::Csr; + using Diag = gko::matrix::Diagonal; + + AmgxPgm() : rand_engine(30) {} + + void SetUp() + { + ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + cuda = gko::CudaExecutor::create(0, ref); + } + + void TearDown() + { + if (cuda != nullptr) { + ASSERT_NO_THROW(cuda->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + gko::Array gen_array(gko::size_type num, index_type min_val, + index_type max_val) + { + return gko::test::generate_random_array( + num, std::uniform_int_distribution<>(min_val, max_val), rand_engine, + ref); + } + + gko::Array gen_agg_array(gko::size_type num, + gko::size_type num_agg) + { + auto agg_array = gen_array(num, 0, num_agg - 1); + auto agg_array_val = agg_array.get_data(); + std::vector select_agg(num); + std::iota(select_agg.begin(), select_agg.end(), 0); + // use the first num_agg item as the aggregated index. + std::shuffle(select_agg.begin(), select_agg.end(), rand_engine); + // the value of agg_array is the i-th of aggregate group + for (gko::size_type i = 0; i < num; i++) { + agg_array_val[i] = select_agg[agg_array_val[i]]; + } + // the aggregated group must contain the identifier-th element + // agg_val[i] == i holds in the aggregated group whose identifier is i + for (gko::size_type i = 0; i < num_agg; i++) { + auto agg_idx = select_agg[i]; + agg_array_val[agg_idx] = agg_idx; + } + return agg_array; + } + + void initialize_data() + { + m = 597; + n = 300; + int nrhs = 3; + + agg = gen_agg_array(m, n); + // only use 0 ~ n-2 and ensure the end isolated and not yet finished + unfinished_agg = gen_array(m, -1, n - 2); + unfinished_agg.get_data()[n - 1] = -1; + strongest_neighbor = gen_array(m, 0, n - 2); + strongest_neighbor.get_data()[n - 1] = n - 1; + coarse_vector = gen_mtx(n, nrhs); + fine_vector = gen_mtx(m, nrhs); + auto weight = gen_mtx(m, m); + make_weight(weight.get()); + weight_csr = Csr::create(ref); + weight->convert_to(weight_csr.get()); + weight_diag = weight_csr->extract_diagonal(); + auto system_dense = gen_mtx(m, m); + gko::test::make_hpd(system_dense.get()); + system_mtx = Csr::create(ref); + system_dense->convert_to(system_mtx.get()); + + d_agg.set_executor(cuda); + d_unfinished_agg.set_executor(cuda); + d_strongest_neighbor.set_executor(cuda); + d_coarse_vector = Mtx::create(cuda); + d_fine_vector = Mtx::create(cuda); + d_weight_csr = Csr::create(cuda); + d_weight_diag = Diag::create(cuda); + d_system_mtx = Csr::create(cuda); + d_agg = agg; + d_unfinished_agg = unfinished_agg; + d_strongest_neighbor = strongest_neighbor; + d_coarse_vector->copy_from(coarse_vector.get()); + d_fine_vector->copy_from(fine_vector.get()); + d_weight_csr->copy_from(weight_csr.get()); + d_weight_diag->copy_from(weight_diag.get()); + d_system_mtx->copy_from(system_mtx.get()); + } + + void make_weight(Mtx *mtx) + { + gko::test::make_symmetric(mtx); + // only works for real value cases. + mtx->compute_absolute_inplace(); + gko::test::make_diag_dominant(mtx); + } + + std::shared_ptr ref; + std::shared_ptr cuda; + + std::ranlux48 rand_engine; + + gko::Array agg; + gko::Array unfinished_agg; + gko::Array strongest_neighbor; + + gko::Array d_agg; + gko::Array d_unfinished_agg; + gko::Array d_strongest_neighbor; + + std::unique_ptr coarse_vector; + std::unique_ptr fine_vector; + std::unique_ptr weight_diag; + std::unique_ptr weight_csr; + std::shared_ptr system_mtx; + + std::unique_ptr d_coarse_vector; + std::unique_ptr d_fine_vector; + std::unique_ptr d_weight_diag; + std::unique_ptr d_weight_csr; + std::shared_ptr d_system_mtx; + + gko::size_type n; + gko::size_type m; +}; + + +TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + + gko::kernels::reference::amgx_pgm::match_edge(ref, strongest_neighbor, x); + gko::kernels::cuda::amgx_pgm::match_edge(cuda, d_strongest_neighbor, d_x); + + GKO_ASSERT_ARRAY_EQ(d_x, x); +} + + +TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) +{ + initialize_data(); + index_type num_unagg; + index_type d_num_unagg; + + gko::kernels::reference::amgx_pgm::count_unagg(ref, unfinished_agg, + &num_unagg); + gko::kernels::cuda::amgx_pgm::count_unagg(cuda, d_unfinished_agg, + &d_num_unagg); + + ASSERT_EQ(d_num_unagg, num_unagg); +} + + +TEST_F(AmgxPgm, RenumberIsEquivalentToRef) +{ + initialize_data(); + index_type num_agg; + index_type d_num_agg; + + gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); + gko::kernels::cuda::amgx_pgm::renumber(cuda, d_agg, &d_num_agg); + + ASSERT_EQ(d_num_agg, num_agg); + GKO_ASSERT_ARRAY_EQ(d_agg, agg); + ASSERT_EQ(num_agg, n); +} + + +TEST_F(AmgxPgm, FindStrongestNeighborIsEquivalentToRef) +{ + initialize_data(); + auto snb = strongest_neighbor; + auto d_snb = d_strongest_neighbor; + + gko::kernels::reference::amgx_pgm::find_strongest_neighbor( + ref, weight_csr.get(), weight_diag.get(), agg, snb); + gko::kernels::cuda::amgx_pgm::find_strongest_neighbor( + cuda, d_weight_csr.get(), d_weight_diag.get(), d_agg, d_snb); + + GKO_ASSERT_ARRAY_EQ(d_snb, snb); +} + + +TEST_F(AmgxPgm, AssignToExistAggIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + auto intermediate_agg = x; + auto d_intermediate_agg = d_x; + + gko::kernels::reference::amgx_pgm::assign_to_exist_agg( + ref, weight_csr.get(), weight_diag.get(), x, intermediate_agg); + gko::kernels::cuda::amgx_pgm::assign_to_exist_agg( + cuda, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); + + GKO_ASSERT_ARRAY_EQ(d_x, x); +} + + +TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) +{ + initialize_data(); + auto d_x = d_unfinished_agg; + auto d_intermediate_agg = gko::Array(cuda, 0); + index_type d_num_unagg; + + gko::kernels::cuda::amgx_pgm::assign_to_exist_agg( + cuda, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); + gko::kernels::cuda::amgx_pgm::count_unagg(cuda, d_agg, &d_num_unagg); + + // only test whether all elements are aggregated. + GKO_ASSERT_EQ(d_num_unagg, 0); +} + + +TEST_F(AmgxPgm, GenerateMgLevelIsEquivalentToRef) +{ + initialize_data(); + auto mg_level_factory = gko::multigrid::AmgxPgm::build() + .with_deterministic(true) + .on(ref); + auto d_mg_level_factory = gko::multigrid::AmgxPgm::build() + .with_deterministic(true) + .on(cuda); + + auto mg_level = mg_level_factory->generate(system_mtx); + auto d_mg_level = d_mg_level_factory->generate(d_system_mtx); + + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_restrict_op()), + gko::as(mg_level->get_restrict_op()), 1e-14); + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_coarse_op()), + gko::as(mg_level->get_coarse_op()), 1e-14); + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_prolong_op()), + gko::as(mg_level->get_prolong_op()), 1e-14); +} + + +} // namespace diff --git a/cuda/test/preconditioner/isai_kernels.cpp b/cuda/test/preconditioner/isai_kernels.cpp index fb8947e9ae6..9385c0f109d 100644 --- a/cuda/test/preconditioner/isai_kernels.cpp +++ b/cuda/test/preconditioner/isai_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -41,19 +41,24 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include +#include #include "core/preconditioner/isai_kernels.hpp" #include "cuda/base/config.hpp" #include "cuda/test/utils.hpp" +#include "matrices/config.hpp" namespace { -enum struct matrix_type { lower, upper }; +enum struct matrix_type { lower, upper, general, spd }; + + class Isai : public ::testing::Test { protected: using value_type = double; @@ -91,9 +96,24 @@ class Isai : public ::testing::Test { auto nz_dist = std::uniform_int_distribution(1, row_limit); auto val_dist = std::uniform_real_distribution(-1., 1.); mtx = Csr::create(ref); - mtx = gko::test::generate_random_triangular_matrix( - n, n, true, for_lower_tm, nz_dist, val_dist, rand_engine, ref, - gko::dim<2>{n, n}); + if (type == matrix_type::general) { + auto dense_mtx = gko::test::generate_random_matrix( + n, n, nz_dist, val_dist, rand_engine, ref, gko::dim<2>{n, n}); + ensure_diagonal(dense_mtx.get()); + mtx->copy_from(dense_mtx.get()); + } else if (type == matrix_type::spd) { + auto dense_mtx = gko::test::generate_random_band_matrix( + n, row_limit / 4, row_limit / 4, val_dist, rand_engine, ref, + gko::dim<2>{n, n}); + auto transp = gko::as(dense_mtx->transpose()); + auto spd_mtx = Dense::create(ref, gko::dim<2>{n, n}); + dense_mtx->apply(transp.get(), spd_mtx.get()); + mtx->copy_from(spd_mtx.get()); + } else { + mtx = gko::test::generate_random_triangular_matrix( + n, n, true, for_lower_tm, nz_dist, val_dist, rand_engine, ref, + gko::dim<2>{n, n}); + } inverse = clone_allocations(mtx.get()); d_mtx = Csr::create(cuda); @@ -102,6 +122,29 @@ class Isai : public ::testing::Test { d_inverse->copy_from(inverse.get()); } + template + std::unique_ptr read(const char *name) + { + std::ifstream mtxstream{std::string{gko::matrices::location_isai_mtxs} + + name}; + auto result = gko::read(mtxstream, ref); + // to avoid removing 0s, the matrices store 12345 instead + for (gko::size_type i = 0; i < result->get_num_stored_elements(); ++i) { + auto &val = result->get_values()[i]; + if (val == static_cast(12345.0)) { + val = 0; + } + } + return std::move(result); + } + + void ensure_diagonal(Dense *mtx) + { + for (int i = 0; i < mtx->get_size()[0]; ++i) { + mtx->at(i, i) = gko::one(); + } + } + std::shared_ptr ref; std::shared_ptr cuda; @@ -162,6 +205,52 @@ TEST_F(Isai, CudaIsaiGenerateUinverseShortIsEquivalentToRef) } +TEST_F(Isai, CudaIsaiGenerateAinverseShortIsEquivalentToRef) +{ + initialize_data(matrix_type::general, 615, 15); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::Array da1(cuda, num_rows + 1); + auto da2 = da1; + + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::kernels::cuda::isai::generate_general_inverse( + cuda, d_mtx.get(), d_inverse.get(), da1.get_data(), da2.get_data(), + false); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse); + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, r::value); + GKO_ASSERT_ARRAY_EQ(a1, da1); + GKO_ASSERT_ARRAY_EQ(a2, da2); + ASSERT_EQ(a1.get_const_data()[num_rows], 0); +} + + +TEST_F(Isai, CudaIsaiGenerateSpdinverseShortIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 15); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::Array da1(cuda, num_rows + 1); + auto da2 = da1; + + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::kernels::cuda::isai::generate_general_inverse( + cuda, d_mtx.get(), d_inverse.get(), da1.get_data(), da2.get_data(), + true); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse); + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 15 * r::value); + GKO_ASSERT_ARRAY_EQ(a1, da1); + GKO_ASSERT_ARRAY_EQ(a2, da2); + ASSERT_EQ(a1.get_const_data()[num_rows], 0); +} + + TEST_F(Isai, CudaIsaiGenerateLinverseLongIsEquivalentToRef) { initialize_data(matrix_type::lower, 554, 64); @@ -208,6 +297,52 @@ TEST_F(Isai, CudaIsaiGenerateUinverseLongIsEquivalentToRef) } +TEST_F(Isai, CudaIsaiGenerateAinverseLongIsEquivalentToRef) +{ + initialize_data(matrix_type::general, 695, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::Array da1(cuda, num_rows + 1); + auto da2 = da1; + + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::kernels::cuda::isai::generate_general_inverse( + cuda, d_mtx.get(), d_inverse.get(), da1.get_data(), da2.get_data(), + false); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse); + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 10 * r::value); + GKO_ASSERT_ARRAY_EQ(a1, da1); + GKO_ASSERT_ARRAY_EQ(a2, da2); + ASSERT_GT(a1.get_const_data()[num_rows], 0); +} + + +TEST_F(Isai, CudaIsaiGenerateSpdinverseLongIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::Array da1(cuda, num_rows + 1); + auto da2 = da1; + + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::kernels::cuda::isai::generate_general_inverse( + cuda, d_mtx.get(), d_inverse.get(), da1.get_data(), da2.get_data(), + false); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse); + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 10 * r::value); + GKO_ASSERT_ARRAY_EQ(a1, da1); + GKO_ASSERT_ARRAY_EQ(a2, da2); + ASSERT_GT(a1.get_const_data()[num_rows], 0); +} + + TEST_F(Isai, CudaIsaiGenerateExcessLinverseLongIsEquivalentToRef) { initialize_data(matrix_type::lower, 518, 40); @@ -227,10 +362,10 @@ TEST_F(Isai, CudaIsaiGenerateExcessLinverseLongIsEquivalentToRef) gko::kernels::reference::isai::generate_excess_system( ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), - excess.get(), e_rhs.get()); + excess.get(), e_rhs.get(), 0, num_rows); gko::kernels::cuda::isai::generate_excess_system( cuda, d_mtx.get(), d_inverse.get(), da1.get_const_data(), - da2.get_const_data(), dexcess.get(), de_rhs.get()); + da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); @@ -258,10 +393,10 @@ TEST_F(Isai, CudaIsaiGenerateExcessUinverseLongIsEquivalentToRef) gko::kernels::reference::isai::generate_excess_system( ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), - excess.get(), e_rhs.get()); + excess.get(), e_rhs.get(), 0, num_rows); gko::kernels::cuda::isai::generate_excess_system( cuda, d_mtx.get(), d_inverse.get(), da1.get_const_data(), - da2.get_const_data(), dexcess.get(), de_rhs.get()); + da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); @@ -270,6 +405,148 @@ TEST_F(Isai, CudaIsaiGenerateExcessUinverseLongIsEquivalentToRef) } +TEST_F(Isai, CudaIsaiGenerateExcessAinverseLongIsEquivalentToRef) +{ + initialize_data(matrix_type::general, 100, 51); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::Array da1(cuda, a1); + gko::Array da2(cuda, a2); + auto e_dim = a1.get_data()[num_rows]; + auto e_nnz = a2.get_data()[num_rows]; + auto excess = Csr::create(ref, gko::dim<2>(e_dim, e_dim), e_nnz); + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + auto dexcess = Csr::create(cuda, gko::dim<2>(e_dim, e_dim), e_nnz); + auto de_rhs = Dense::create(cuda, gko::dim<2>(e_dim, 1)); + + gko::kernels::reference::isai::generate_excess_system( + ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), + excess.get(), e_rhs.get(), 0, num_rows); + gko::kernels::cuda::isai::generate_excess_system( + cuda, d_mtx.get(), d_inverse.get(), da1.get_const_data(), + da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); + + GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); + GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); + GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, CudaIsaiGenerateExcessSpdinverseLongIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::Array da1(cuda, a1); + gko::Array da2(cuda, a2); + auto e_dim = a1.get_data()[num_rows]; + auto e_nnz = a2.get_data()[num_rows]; + auto excess = Csr::create(ref, gko::dim<2>(e_dim, e_dim), e_nnz); + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + auto dexcess = Csr::create(cuda, gko::dim<2>(e_dim, e_dim), e_nnz); + auto de_rhs = Dense::create(cuda, gko::dim<2>(e_dim, 1)); + + gko::kernels::reference::isai::generate_excess_system( + ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), + excess.get(), e_rhs.get(), 0, num_rows); + gko::kernels::cuda::isai::generate_excess_system( + cuda, d_mtx.get(), d_inverse.get(), da1.get_const_data(), + da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); + + GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); + GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); + GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, CudaIsaiGeneratePartialExcessIsEquivalentToRef) +{ + initialize_data(matrix_type::general, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::Array da1(cuda, a1); + gko::Array da2(cuda, a2); + auto e_dim = a1.get_data()[10] - a1.get_data()[5]; + auto e_nnz = a2.get_data()[10] - a2.get_data()[5]; + auto excess = Csr::create(ref, gko::dim<2>(e_dim, e_dim), e_nnz); + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + auto dexcess = Csr::create(cuda, gko::dim<2>(e_dim, e_dim), e_nnz); + auto de_rhs = Dense::create(cuda, gko::dim<2>(e_dim, 1)); + + gko::kernels::reference::isai::generate_excess_system( + ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), + excess.get(), e_rhs.get(), 5u, 10u); + gko::kernels::cuda::isai::generate_excess_system( + cuda, d_mtx.get(), d_inverse.get(), da1.get_const_data(), + da2.get_const_data(), dexcess.get(), de_rhs.get(), 5u, 10u); + + GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); + GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); + GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, CudaIsaiScaleExcessSolutionIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::Array da1(cuda, a1); + auto e_dim = a1.get_data()[num_rows]; + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + std::fill_n(e_rhs->get_values(), e_dim, 123456); + auto de_rhs = Dense::create(cuda); + de_rhs->copy_from(lend(e_rhs)); + d_inverse->copy_from(lend(inverse)); + + gko::kernels::reference::isai::scale_excess_solution( + ref, a1.get_const_data(), e_rhs.get(), 0, num_rows); + gko::kernels::cuda::isai::scale_excess_solution(cuda, da1.get_const_data(), + de_rhs.get(), 0, num_rows); + + GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); +} + + +TEST_F(Isai, CudaIsaiScalePartialExcessSolutionIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::Array da1(cuda, a1); + auto e_dim = a1.get_data()[10] - a1.get_data()[5]; + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + std::fill_n(e_rhs->get_values(), e_dim, 123456); + auto de_rhs = Dense::create(cuda); + de_rhs->copy_from(lend(e_rhs)); + + gko::kernels::reference::isai::scale_excess_solution( + ref, a1.get_const_data(), e_rhs.get(), 5u, 10u); + gko::kernels::cuda::isai::scale_excess_solution(cuda, da1.get_const_data(), + de_rhs.get(), 5u, 10u); + + GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); +} + + TEST_F(Isai, CudaIsaiScatterExcessSolutionLIsEquivalentToRef) { initialize_data(matrix_type::lower, 572, 52); @@ -287,9 +564,9 @@ TEST_F(Isai, CudaIsaiScatterExcessSolutionLIsEquivalentToRef) d_inverse->copy_from(lend(inverse)); gko::kernels::reference::isai::scatter_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), inverse.get()); + ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); gko::kernels::cuda::isai::scatter_excess_solution( - cuda, da1.get_const_data(), de_rhs.get(), d_inverse.get()); + cuda, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); ASSERT_GT(e_dim, 0); @@ -314,9 +591,90 @@ TEST_F(Isai, CudaIsaiScatterExcessSolutionUIsEquivalentToRef) d_inverse->copy_from(lend(inverse)); gko::kernels::reference::isai::scatter_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), inverse.get()); + ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); + gko::kernels::cuda::isai::scatter_excess_solution( + cuda, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); + + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, CudaIsaiScatterExcessSolutionAIsEquivalentToRef) +{ + initialize_data(matrix_type::general, 702, 45); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::Array da1(cuda, a1); + auto e_dim = a1.get_data()[num_rows]; + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + std::fill_n(e_rhs->get_values(), e_dim, 123456); + auto de_rhs = Dense::create(cuda); + de_rhs->copy_from(lend(e_rhs)); + // overwrite -1 values with inverse + d_inverse->copy_from(lend(inverse)); + + gko::kernels::reference::isai::scatter_excess_solution( + ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); + gko::kernels::cuda::isai::scatter_excess_solution( + cuda, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); + + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, CudaIsaiScatterExcessSolutionSpdIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::Array da1(cuda, a1); + auto e_dim = a1.get_data()[num_rows]; + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + std::fill_n(e_rhs->get_values(), e_dim, 123456); + auto de_rhs = Dense::create(cuda); + de_rhs->copy_from(lend(e_rhs)); + // overwrite -1 values with inverse + d_inverse->copy_from(lend(inverse)); + + gko::kernels::reference::isai::scatter_excess_solution( + ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); + gko::kernels::cuda::isai::scatter_excess_solution( + cuda, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); + + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, CudaIsaiScatterPartialExcessSolutionIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::Array da1(cuda, a1); + auto e_dim = a1.get_data()[10] - a1.get_data()[5]; + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + std::fill_n(e_rhs->get_values(), e_dim, 123456); + auto de_rhs = Dense::create(cuda); + de_rhs->copy_from(lend(e_rhs)); + // overwrite -1 values with inverse + d_inverse->copy_from(lend(inverse)); + + gko::kernels::reference::isai::scatter_excess_solution( + ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 5u, 10u); gko::kernels::cuda::isai::scatter_excess_solution( - cuda, da1.get_const_data(), de_rhs.get(), d_inverse.get()); + cuda, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 5u, 10u); GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); ASSERT_GT(e_dim, 0); diff --git a/cuda/test/preconditioner/jacobi_kernels.cpp b/cuda/test/preconditioner/jacobi_kernels.cpp index 05ea7d766e8..87e0cbb2ab9 100644 --- a/cuda/test/preconditioner/jacobi_kernels.cpp +++ b/cuda/test/preconditioner/jacobi_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/test/utils.hpp" +#include "core/test/utils/unsort_matrix.hpp" +#include "cuda/test/utils.hpp" namespace { @@ -75,7 +76,7 @@ class Jacobi : public ::testing::Test { std::initializer_list block_precisions, std::initializer_list condition_numbers, gko::uint32 max_block_size, int min_nnz, int max_nnz, int num_rhs = 1, - double accuracy = 0.1) + double accuracy = 0.1, bool skip_sorting = true) { std::ranlux48 engine(42); const auto dim = *(end(block_pointers) - 1); @@ -101,10 +102,12 @@ class Jacobi : public ::testing::Test { bj_factory = Bj::build() .with_max_block_size(max_block_size) .with_block_pointers(block_ptrs) + .with_skip_sorting(skip_sorting) .on(ref); d_bj_factory = Bj::build() .with_max_block_size(max_block_size) .with_block_pointers(block_ptrs) + .with_skip_sorting(skip_sorting) .on(cuda); } else { bj_factory = Bj::build() @@ -112,12 +115,14 @@ class Jacobi : public ::testing::Test { .with_block_pointers(block_ptrs) .with_storage_optimization(block_prec) .with_accuracy(accuracy) + .with_skip_sorting(skip_sorting) .on(ref); d_bj_factory = Bj::build() .with_max_block_size(max_block_size) .with_block_pointers(block_ptrs) .with_storage_optimization(block_prec) .with_accuracy(accuracy) + .with_skip_sorting(skip_sorting) .on(cuda); } b = gko::test::generate_random_matrix( @@ -290,7 +295,7 @@ TEST_F(Jacobi, } -TEST_F(Jacobi, CudaPreconditionerEquivalentToRefWithBlockSize32) +TEST_F(Jacobi, CudaPreconditionerEquivalentToRefWithBlockSize32Sorted) { initialize_data({0, 32, 64, 96, 128}, {}, {}, 32, 100, 110); @@ -301,6 +306,19 @@ TEST_F(Jacobi, CudaPreconditionerEquivalentToRefWithBlockSize32) } +TEST_F(Jacobi, CudaPreconditionerEquivalentToRefWithBlockSize32Unsorted) +{ + std::ranlux48 engine(42); + initialize_data({0, 32, 64, 96, 128}, {}, {}, 32, 100, 110, 1, 0.1, false); + gko::test::unsort_matrix(mtx.get(), engine); + + auto bj = bj_factory->generate(mtx); + auto d_bj = d_bj_factory->generate(mtx); + + GKO_ASSERT_MTX_NEAR(gko::as(d_bj.get()), gko::as(bj.get()), 1e-13); +} + + TEST_F(Jacobi, CudaPreconditionerEquivalentToRefWithDifferentBlockSize) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 32, @@ -394,6 +412,37 @@ TEST_F(Jacobi, CudaApplyEquivalentToRef) } +TEST_F(Jacobi, CudaScalarApplyEquivalentToRef) +{ + gko::size_type dim = 313; + std::ranlux48 engine(42); + auto dense_smtx = gko::share(gko::test::generate_random_matrix( + dim, dim, std::uniform_int_distribution<>(1, dim), + std::normal_distribution<>(1.0, 2.0), engine, ref)); + gko::test::make_diag_dominant(dense_smtx.get()); + auto smtx = gko::share(Mtx::create(ref)); + smtx->copy_from(dense_smtx.get()); + auto sb = gko::share(gko::test::generate_random_matrix( + dim, 3, std::uniform_int_distribution<>(1, 1), + std::normal_distribution<>(0.0, 1.0), engine, ref)); + auto sx = Vec::create(ref, sb->get_size()); + + auto d_smtx = gko::share(Mtx::create(cuda)); + auto d_sb = gko::share(Vec::create(cuda)); + auto d_sx = gko::share(Vec::create(cuda, sb->get_size())); + d_smtx->copy_from(smtx.get()); + d_sb->copy_from(sb.get()); + + auto sj = Bj::build().with_max_block_size(1u).on(ref)->generate(smtx); + auto d_sj = Bj::build().with_max_block_size(1u).on(cuda)->generate(d_smtx); + + sj->apply(sb.get(), sx.get()); + d_sj->apply(d_sb.get(), d_sx.get()); + + GKO_ASSERT_MTX_NEAR(sx.get(), d_sx.get(), 1e-12); +} + + TEST_F(Jacobi, CudaLinearCombinationApplyEquivalentToRef) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 13, @@ -412,6 +461,46 @@ TEST_F(Jacobi, CudaLinearCombinationApplyEquivalentToRef) } +TEST_F(Jacobi, CudaScalarLinearCombinationApplyEquivalentToRef) +{ + gko::size_type dim = 313; + std::ranlux48 engine(42); + auto dense_smtx = gko::share(gko::test::generate_random_matrix( + dim, dim, std::uniform_int_distribution<>(1, dim), + std::normal_distribution<>(1.0, 2.0), engine, ref)); + gko::test::make_diag_dominant(dense_smtx.get()); + auto smtx = gko::share(Mtx::create(ref)); + smtx->copy_from(dense_smtx.get()); + auto sb = gko::share(gko::test::generate_random_matrix( + dim, 3, std::uniform_int_distribution<>(1, 1), + std::normal_distribution<>(0.0, 1.0), engine, ref, gko::dim<2>(dim, 3), + 4)); + auto sx = gko::share(gko::test::generate_random_matrix( + dim, 3, std::uniform_int_distribution<>(1, 1), + std::normal_distribution<>(0.0, 1.0), engine, ref, gko::dim<2>(dim, 3), + 4)); + + auto d_smtx = gko::share(Mtx::create(cuda)); + auto d_sb = gko::share(Vec::create(cuda)); + auto d_sx = gko::share(Vec::create(cuda)); + d_smtx->copy_from(smtx.get()); + d_sb->copy_from(sb.get()); + d_sx->copy_from(sx.get()); + auto alpha = gko::initialize({2.0}, ref); + auto d_alpha = gko::initialize({2.0}, cuda); + auto beta = gko::initialize({-1.0}, ref); + auto d_beta = gko::initialize({-1.0}, cuda); + + auto sj = Bj::build().with_max_block_size(1u).on(ref)->generate(smtx); + auto d_sj = Bj::build().with_max_block_size(1u).on(cuda)->generate(d_smtx); + + sj->apply(alpha.get(), sb.get(), beta.get(), sx.get()); + d_sj->apply(d_alpha.get(), d_sb.get(), d_beta.get(), d_sx.get()); + + GKO_ASSERT_MTX_NEAR(sx.get(), d_sx.get(), 1e-12); +} + + TEST_F(Jacobi, CudaApplyToMultipleVectorsEquivalentToRef) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 13, diff --git a/cuda/test/reorder/CMakeLists.txt b/cuda/test/reorder/CMakeLists.txt new file mode 100644 index 00000000000..108e3b57dd5 --- /dev/null +++ b/cuda/test/reorder/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(rcm_kernels) \ No newline at end of file diff --git a/cuda/test/reorder/rcm_kernels.cpp b/cuda/test/reorder/rcm_kernels.cpp new file mode 100644 index 00000000000..afb3336134b --- /dev/null +++ b/cuda/test/reorder/rcm_kernels.cpp @@ -0,0 +1,85 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include "core/test/utils/assertions.hpp" + + +namespace { + + +class Rcm : public ::testing::Test { +protected: + using v_type = double; + using i_type = int; + using CsrMtx = gko::matrix::Csr; + using reorder_type = gko::reorder::Rcm; + using perm_type = gko::matrix::Permutation; + + + Rcm() + : exec(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(), + true)), + // clang-format off + p_mtx(gko::initialize({{1.0, 2.0, 0.0, -1.3, 2.1}, + {2.0, 5.0, 1.5, 0.0, 0.0}, + {0.0, 1.5, 1.5, 1.1, 0.0}, + {-1.3, 0.0, 1.1, 2.0, 0.0}, + {2.1, 0.0, 0.0, 0.0, 1.0}}, + exec)), + // clang-format on + rcm_factory(reorder_type::build().on(exec)), + reorder_op(rcm_factory->generate(p_mtx)) + {} + + std::shared_ptr exec; + std::unique_ptr rcm_factory; + std::shared_ptr p_mtx; + std::unique_ptr reorder_op; +}; + + +TEST_F(Rcm, IsExecutedOnCpuExecutor) +{ + // This only executes successfully if computed on cpu executor. + auto p = reorder_op->get_permutation(); + + ASSERT_TRUE(true); +} + + +} // namespace diff --git a/cuda/test/solver/CMakeLists.txt b/cuda/test/solver/CMakeLists.txt index 32dbb96fe61..ae4c6304c92 100644 --- a/cuda/test/solver/CMakeLists.txt +++ b/cuda/test/solver/CMakeLists.txt @@ -1,9 +1,5 @@ -ginkgo_create_test(bicg_kernels) -ginkgo_create_test(bicgstab_kernels) -ginkgo_create_test(cg_kernels) -ginkgo_create_test(cgs_kernels) -ginkgo_create_test(fcg_kernels) ginkgo_create_test(gmres_kernels) -ginkgo_create_test(ir_kernels) +ginkgo_create_test(cb_gmres_kernels) +ginkgo_create_test(idr_kernels) ginkgo_create_test_cpp_cuda_header(lower_trs_kernels) ginkgo_create_test_cpp_cuda_header(upper_trs_kernels) diff --git a/cuda/test/solver/cb_gmres_kernels.cpp b/cuda/test/solver/cb_gmres_kernels.cpp new file mode 100644 index 00000000000..ce28556ef24 --- /dev/null +++ b/cuda/test/solver/cb_gmres_kernels.cpp @@ -0,0 +1,364 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/solver/cb_gmres_accessor.hpp" +#include "core/solver/cb_gmres_kernels.hpp" +#include "cuda/test/utils.hpp" + + +namespace { + + +class CbGmres : public ::testing::Test { +protected: + using value_type = double; + using storage_type = float; + using index_type = int; + using size_type = gko::size_type; + using Range3dHelper = + gko::cb_gmres::Range3dHelper; + using Range3d = typename Range3dHelper::Range; + using Dense = gko::matrix::Dense; + using Mtx = Dense; + static constexpr unsigned int default_krylov_dim_mixed{100}; + + CbGmres() : rand_engine(30) {} + + void SetUp() + { + ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + cuda = gko::CudaExecutor::create(0, ref); + } + + void TearDown() + { + if (cuda != nullptr) { + ASSERT_NO_THROW(cuda->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, ref); + } + + Range3dHelper generate_krylov_helper(gko::dim<3> size) + { + auto helper = Range3dHelper{ref, size}; + auto &bases = helper.get_bases(); + const auto num_rows = size[0] * size[1]; + const auto num_cols = size[2]; + auto temp_krylov_bases = gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, + ref); + std::copy_n(temp_krylov_bases->get_const_values(), + bases.get_num_elems(), bases.get_data()); + // Only useful when the Accessor actually has a scale + auto range = helper.get_range(); + auto dist = std::normal_distribution(-1, 1); + for (size_type k = 0; k < size[0]; ++k) { + for (size_type i = 0; i < size[2]; ++i) { + gko::cb_gmres::helper_functions_accessor::write_scalar( + range, k, i, dist(rand_engine)); + } + } + return helper; + } + + void initialize_data() + { +#ifdef GINKGO_FAST_TESTS + int m = 123; +#else + int m = 597; +#endif + int n = 43; + x = gen_mtx(m, n); + y = gen_mtx(default_krylov_dim_mixed, n); + before_preconditioner = Mtx::create_with_config_of(x.get()); + b = gen_mtx(m, n); + arnoldi_norm = gen_mtx(3, n); + gko::dim<3> krylov_bases_dim(default_krylov_dim_mixed + 1, m, n); + range_helper = generate_krylov_helper(krylov_bases_dim); + + next_krylov_basis = gen_mtx(m, n); + hessenberg = + gen_mtx(default_krylov_dim_mixed + 1, default_krylov_dim_mixed * n); + hessenberg_iter = gen_mtx(default_krylov_dim_mixed + 1, n); + buffer_iter = gen_mtx(default_krylov_dim_mixed + 1, n); + residual = gen_mtx(m, n); + residual_norm = gen_mtx(1, n); + residual_norm_collection = gen_mtx(default_krylov_dim_mixed + 1, n); + givens_sin = gen_mtx(default_krylov_dim_mixed, n); + givens_cos = gen_mtx(default_krylov_dim_mixed, n); + stop_status = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { + stop_status->get_data()[i].reset(); + } + reorth_status = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < reorth_status->get_num_elems(); ++i) { + reorth_status->get_data()[i].reset(); + } + final_iter_nums = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < final_iter_nums->get_num_elems(); ++i) { + final_iter_nums->get_data()[i] = 5; + } + num_reorth = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < num_reorth->get_num_elems(); ++i) { + num_reorth->get_data()[i] = 5; + } + + d_x = Mtx::create(cuda); + d_x->copy_from(x.get()); + d_before_preconditioner = Mtx::create_with_config_of(d_x.get()); + d_y = Mtx::create(cuda); + d_y->copy_from(y.get()); + d_b = Mtx::create(cuda); + d_b->copy_from(b.get()); + d_arnoldi_norm = Mtx::create(cuda); + d_arnoldi_norm->copy_from(arnoldi_norm.get()); + d_range_helper = Range3dHelper{cuda, {}}; + d_range_helper = range_helper; + d_next_krylov_basis = Mtx::create(cuda); + d_next_krylov_basis->copy_from(next_krylov_basis.get()); + d_hessenberg = Mtx::create(cuda); + d_hessenberg->copy_from(hessenberg.get()); + d_hessenberg_iter = Mtx::create(cuda); + d_hessenberg_iter->copy_from(hessenberg_iter.get()); + d_buffer_iter = Mtx::create(cuda); + d_buffer_iter->copy_from(buffer_iter.get()); + d_residual = Mtx::create(cuda); + d_residual->copy_from(residual.get()); + d_residual_norm = Mtx::create(cuda); + d_residual_norm->copy_from(residual_norm.get()); + d_residual_norm_collection = Mtx::create(cuda); + d_residual_norm_collection->copy_from(residual_norm_collection.get()); + d_givens_sin = Mtx::create(cuda); + d_givens_sin->copy_from(givens_sin.get()); + d_givens_cos = Mtx::create(cuda); + d_givens_cos->copy_from(givens_cos.get()); + d_stop_status = std::unique_ptr>( + new gko::Array(cuda, n)); + *d_stop_status = *stop_status; + d_reorth_status = std::unique_ptr>( + new gko::Array(cuda, n)); + *d_reorth_status = *reorth_status; + d_final_iter_nums = std::unique_ptr>( + new gko::Array(cuda, n)); + *d_final_iter_nums = *final_iter_nums; + d_num_reorth = std::unique_ptr>( + new gko::Array(cuda, n)); + *d_num_reorth = *num_reorth; + } + + void assert_krylov_bases_near() + { + gko::Array d_to_host{ref}; + auto &krylov_bases = range_helper.get_bases(); + d_to_host = d_range_helper.get_bases(); + const auto tolerance = r::value; + using std::abs; + for (gko::size_type i = 0; i < krylov_bases.get_num_elems(); ++i) { + const auto ref_value = krylov_bases.get_const_data()[i]; + const auto dev_value = d_to_host.get_const_data()[i]; + ASSERT_LE(abs(dev_value - ref_value), tolerance); + } + } + + std::shared_ptr ref; + std::shared_ptr cuda; + + std::ranlux48 rand_engine; + + std::unique_ptr before_preconditioner; + std::unique_ptr x; + std::unique_ptr y; + std::unique_ptr b; + std::unique_ptr arnoldi_norm; + Range3dHelper range_helper; + std::unique_ptr next_krylov_basis; + std::unique_ptr hessenberg; + std::unique_ptr hessenberg_iter; + std::unique_ptr buffer_iter; + std::unique_ptr residual; + std::unique_ptr residual_norm; + std::unique_ptr residual_norm_collection; + std::unique_ptr givens_sin; + std::unique_ptr givens_cos; + std::unique_ptr> stop_status; + std::unique_ptr> reorth_status; + std::unique_ptr> final_iter_nums; + std::unique_ptr> num_reorth; + + std::unique_ptr d_x; + std::unique_ptr d_before_preconditioner; + std::unique_ptr d_y; + std::unique_ptr d_b; + std::unique_ptr d_arnoldi_norm; + Range3dHelper d_range_helper; + std::unique_ptr d_next_krylov_basis; + std::unique_ptr d_hessenberg; + std::unique_ptr d_hessenberg_iter; + std::unique_ptr d_buffer_iter; + std::unique_ptr d_residual; + std::unique_ptr d_residual_norm; + std::unique_ptr d_residual_norm_collection; + std::unique_ptr d_givens_sin; + std::unique_ptr d_givens_cos; + std::unique_ptr> d_stop_status; + std::unique_ptr> d_reorth_status; + std::unique_ptr> d_final_iter_nums; + std::unique_ptr> d_num_reorth; +}; + + +TEST_F(CbGmres, CudaCbGmresInitialize1IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::cb_gmres::initialize_1( + ref, b.get(), residual.get(), givens_sin.get(), givens_cos.get(), + stop_status.get(), default_krylov_dim_mixed); + gko::kernels::cuda::cb_gmres::initialize_1( + cuda, d_b.get(), d_residual.get(), d_givens_sin.get(), + d_givens_cos.get(), d_stop_status.get(), default_krylov_dim_mixed); + + GKO_ASSERT_MTX_NEAR(d_residual, residual, 1e-14); + GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, 1e-14); + GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, 1e-14); + GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); +} + + +TEST_F(CbGmres, CudaCbGmresInitialize2IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::cb_gmres::initialize_2( + ref, residual.get(), residual_norm.get(), + residual_norm_collection.get(), arnoldi_norm.get(), + range_helper.get_range(), next_krylov_basis.get(), + final_iter_nums.get(), default_krylov_dim_mixed); + gko::kernels::cuda::cb_gmres::initialize_2( + cuda, d_residual.get(), d_residual_norm.get(), + d_residual_norm_collection.get(), d_arnoldi_norm.get(), + d_range_helper.get_range(), d_next_krylov_basis.get(), + d_final_iter_nums.get(), default_krylov_dim_mixed); + + GKO_ASSERT_MTX_NEAR(d_arnoldi_norm, arnoldi_norm, 1e-14); + GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, 1e-14); + GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, + 1e-14); + assert_krylov_bases_near(); + GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); +} + + +TEST_F(CbGmres, CudaCbGmresStep1IsEquivalentToRef) +{ + initialize_data(); + int iter = 5; + + gko::kernels::reference::cb_gmres::step_1( + ref, next_krylov_basis.get(), givens_sin.get(), givens_cos.get(), + residual_norm.get(), residual_norm_collection.get(), + range_helper.get_range(), hessenberg_iter.get(), buffer_iter.get(), + arnoldi_norm.get(), iter, final_iter_nums.get(), stop_status.get(), + reorth_status.get(), num_reorth.get()); + gko::kernels::cuda::cb_gmres::step_1( + cuda, d_next_krylov_basis.get(), d_givens_sin.get(), d_givens_cos.get(), + d_residual_norm.get(), d_residual_norm_collection.get(), + d_range_helper.get_range(), d_hessenberg_iter.get(), + d_buffer_iter.get(), d_arnoldi_norm.get(), iter, + d_final_iter_nums.get(), d_stop_status.get(), d_reorth_status.get(), + d_num_reorth.get()); + + GKO_ASSERT_MTX_NEAR(d_arnoldi_norm, arnoldi_norm, 1e-14); + GKO_ASSERT_MTX_NEAR(d_next_krylov_basis, next_krylov_basis, 1e-14); + GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, 1e-14); + GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, 1e-14); + GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, 1e-14); + GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, + 1e-14); + GKO_ASSERT_MTX_NEAR(d_hessenberg_iter, hessenberg_iter, 1e-14); + assert_krylov_bases_near(); + GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); +} + + +TEST_F(CbGmres, CudaCbGmresStep2IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::cb_gmres::step_2( + ref, residual_norm_collection.get(), + range_helper.get_range().get_accessor().to_const(), hessenberg.get(), + y.get(), before_preconditioner.get(), final_iter_nums.get()); + gko::kernels::cuda::cb_gmres::step_2( + cuda, d_residual_norm_collection.get(), + d_range_helper.get_range().get_accessor().to_const(), + d_hessenberg.get(), d_y.get(), d_before_preconditioner.get(), + d_final_iter_nums.get()); + + GKO_ASSERT_MTX_NEAR(d_y, y, 1e-14); + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); +} + + +} // namespace diff --git a/cuda/test/solver/cg_kernels.cpp b/cuda/test/solver/cg_kernels.cpp deleted file mode 100644 index 65f8d78781f..00000000000 --- a/cuda/test/solver/cg_kernels.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include - - -#include "core/solver/cg_kernels.hpp" -#include "cuda/test/utils.hpp" - - -namespace { - - -class Cg : public ::testing::Test { -protected: - using Mtx = gko::matrix::Dense<>; - Cg() : rand_engine(30) {} - - void SetUp() - { - ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); - ref = gko::ReferenceExecutor::create(); - cuda = gko::CudaExecutor::create(0, ref); - } - - void TearDown() - { - if (cuda != nullptr) { - ASSERT_NO_THROW(cuda->synchronize()); - } - } - - std::unique_ptr gen_mtx(int num_rows, int num_cols) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(num_cols, num_cols), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - void initialize_data() - { - int m = 597; - int n = 43; - b = gen_mtx(m, n); - r = gen_mtx(m, n); - z = gen_mtx(m, n); - p = gen_mtx(m, n); - q = gen_mtx(m, n); - x = gen_mtx(m, n); - beta = gen_mtx(1, n); - prev_rho = gen_mtx(1, n); - rho = gen_mtx(1, n); - stop_status = std::unique_ptr>( - new gko::Array(ref, n)); - for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { - stop_status->get_data()[i].reset(); - } - - d_b = Mtx::create(cuda); - d_b->copy_from(b.get()); - d_r = Mtx::create(cuda); - d_r->copy_from(r.get()); - d_z = Mtx::create(cuda); - d_z->copy_from(z.get()); - d_p = Mtx::create(cuda); - d_p->copy_from(p.get()); - d_q = Mtx::create(cuda); - d_q->copy_from(q.get()); - d_x = Mtx::create(cuda); - d_x->copy_from(x.get()); - d_beta = Mtx::create(cuda); - d_beta->copy_from(beta.get()); - d_prev_rho = Mtx::create(cuda); - d_prev_rho->copy_from(prev_rho.get()); - d_rho = Mtx::create(cuda); - d_rho->copy_from(rho.get()); - d_stop_status = std::unique_ptr>( - new gko::Array(cuda, n)); - *d_stop_status = *stop_status; - } - - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - - std::shared_ptr ref; - std::shared_ptr cuda; - - std::ranlux48 rand_engine; - - std::unique_ptr b; - std::unique_ptr r; - std::unique_ptr z; - std::unique_ptr p; - std::unique_ptr q; - std::unique_ptr x; - std::unique_ptr beta; - std::unique_ptr prev_rho; - std::unique_ptr rho; - std::unique_ptr> stop_status; - - std::unique_ptr d_b; - std::unique_ptr d_r; - std::unique_ptr d_z; - std::unique_ptr d_p; - std::unique_ptr d_q; - std::unique_ptr d_x; - std::unique_ptr d_beta; - std::unique_ptr d_prev_rho; - std::unique_ptr d_rho; - std::unique_ptr> d_stop_status; -}; - - -TEST_F(Cg, CudaCgInitializeIsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cg::initialize(ref, b.get(), r.get(), z.get(), - p.get(), q.get(), prev_rho.get(), - rho.get(), stop_status.get()); - gko::kernels::cuda::cg::initialize(cuda, d_b.get(), d_r.get(), d_z.get(), - d_p.get(), d_q.get(), d_prev_rho.get(), - d_rho.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z, z, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); - GKO_ASSERT_MTX_NEAR(d_prev_rho, prev_rho, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho, rho, 1e-14); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); -} - - -TEST_F(Cg, CudaCgStep1IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cg::step_1(ref, p.get(), z.get(), rho.get(), - prev_rho.get(), stop_status.get()); - gko::kernels::cuda::cg::step_1(cuda, d_p.get(), d_z.get(), d_rho.get(), - d_prev_rho.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z, z, 1e-14); -} - - -TEST_F(Cg, CudaCgStep2IsEquivalentToRef) -{ - initialize_data(); - gko::kernels::reference::cg::step_2(ref, x.get(), r.get(), p.get(), q.get(), - beta.get(), rho.get(), - stop_status.get()); - gko::kernels::cuda::cg::step_2(cuda, d_x.get(), d_r.get(), d_p.get(), - d_q.get(), d_beta.get(), d_rho.get(), - d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); -} - - -TEST_F(Cg, ApplyIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = Mtx::create(cuda); - d_mtx->copy_from(mtx.get()); - auto d_x = Mtx::create(cuda); - d_x->copy_from(x.get()); - auto d_b = Mtx::create(cuda); - d_b->copy_from(b.get()); - auto cg_factory = - gko::solver::Cg<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(ref), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(ref)) - .on(ref); - auto d_cg_factory = - gko::solver::Cg<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(cuda), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(cuda)) - .on(cuda); - auto solver = cg_factory->generate(std::move(mtx)); - auto d_solver = d_cg_factory->generate(std::move(d_mtx)); - - solver->apply(b.get(), x.get()); - d_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -} // namespace diff --git a/cuda/test/solver/cgs_kernels.cpp b/cuda/test/solver/cgs_kernels.cpp deleted file mode 100644 index 3e49804ddab..00000000000 --- a/cuda/test/solver/cgs_kernels.cpp +++ /dev/null @@ -1,349 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include - - -#include "core/solver/cgs_kernels.hpp" -#include "cuda/test/utils.hpp" - - -namespace { - - -class Cgs : public ::testing::Test { -protected: - using Mtx = gko::matrix::Dense<>; - using Solver = gko::solver::Cgs<>; - - Cgs() : rand_engine(30) {} - - void SetUp() - { - ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); - ref = gko::ReferenceExecutor::create(); - cuda = gko::CudaExecutor::create(0, ref); - - mtx = gen_mtx(123, 123); - make_diag_dominant(mtx.get()); - d_mtx = Mtx::create(cuda); - d_mtx->copy_from(mtx.get()); - cuda_cgs_factory = - Solver::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(246u).on(cuda), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-15) - .on(cuda)) - .on(cuda); - ref_cgs_factory = - Solver::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(246u).on(ref), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-15) - .on(ref)) - .on(ref); - } - - void TearDown() - { - if (cuda != nullptr) { - ASSERT_NO_THROW(cuda->synchronize()); - } - } - - std::unique_ptr gen_mtx(int num_rows, int num_cols) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(num_cols, num_cols), - std::normal_distribution<>(0.0, 1.0), rand_engine, ref); - } - - void initialize_data() - { - int m = 597; - int n = 43; - b = gen_mtx(m, n); - r = gen_mtx(m, n); - r_tld = gen_mtx(m, n); - p = gen_mtx(m, n); - q = gen_mtx(m, n); - u = gen_mtx(m, n); - u_hat = gen_mtx(m, n); - v_hat = gen_mtx(m, n); - t = gen_mtx(m, n); - x = gen_mtx(m, n); - alpha = gen_mtx(1, n); - beta = gen_mtx(1, n); - gamma = gen_mtx(1, n); - rho = gen_mtx(1, n); - rho_prev = gen_mtx(1, n); - stop_status = std::unique_ptr>( - new gko::Array(ref, n)); - for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { - stop_status->get_data()[i].reset(); - } - - d_b = Mtx::create(cuda); - d_b->copy_from(b.get()); - d_r = Mtx::create(cuda); - d_r->copy_from(r.get()); - d_r_tld = Mtx::create(cuda); - d_r_tld->copy_from(r_tld.get()); - d_p = Mtx::create(cuda); - d_p->copy_from(p.get()); - d_q = Mtx::create(cuda); - d_q->copy_from(q.get()); - d_u = Mtx::create(cuda); - d_u->copy_from(u.get()); - d_u_hat = Mtx::create(cuda); - d_u_hat->copy_from(u_hat.get()); - d_v_hat = Mtx::create(cuda); - d_v_hat->copy_from(v_hat.get()); - d_t = Mtx::create(cuda); - d_t->copy_from(t.get()); - d_x = Mtx::create(cuda); - d_x->copy_from(x.get()); - d_alpha = Mtx::create(cuda); - d_alpha->copy_from(alpha.get()); - d_beta = Mtx::create(cuda); - d_beta->copy_from(beta.get()); - d_gamma = Mtx::create(cuda); - d_gamma->copy_from(gamma.get()); - d_rho_prev = Mtx::create(cuda); - d_rho_prev->copy_from(rho_prev.get()); - d_rho = Mtx::create(cuda); - d_rho->copy_from(rho.get()); - d_stop_status = std::unique_ptr>( - new gko::Array(cuda, n)); - // because there is no public function copy_from, use overloaded = - // operator - *d_stop_status = *stop_status; - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - std::shared_ptr ref; - std::shared_ptr cuda; - - std::ranlux48 rand_engine; - - std::shared_ptr mtx; - std::shared_ptr d_mtx; - std::unique_ptr cuda_cgs_factory; - std::unique_ptr ref_cgs_factory; - - std::unique_ptr b; - std::unique_ptr r; - std::unique_ptr r_tld; - std::unique_ptr t; - std::unique_ptr p; - std::unique_ptr q; - std::unique_ptr u; - std::unique_ptr u_hat; - std::unique_ptr v_hat; - std::unique_ptr x; - std::unique_ptr alpha; - std::unique_ptr beta; - std::unique_ptr gamma; - std::unique_ptr rho; - std::unique_ptr rho_prev; - std::unique_ptr> stop_status; - - std::unique_ptr d_b; - std::unique_ptr d_r; - std::unique_ptr d_r_tld; - std::unique_ptr d_t; - std::unique_ptr d_p; - std::unique_ptr d_q; - std::unique_ptr d_u; - std::unique_ptr d_u_hat; - std::unique_ptr d_v_hat; - std::unique_ptr d_x; - std::unique_ptr d_alpha; - std::unique_ptr d_beta; - std::unique_ptr d_gamma; - std::unique_ptr d_rho; - std::unique_ptr d_rho_prev; - std::unique_ptr> d_stop_status; -}; - - -TEST_F(Cgs, CudaCgsInitializeIsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cgs::initialize( - ref, b.get(), r.get(), r_tld.get(), p.get(), q.get(), u.get(), - u_hat.get(), v_hat.get(), t.get(), alpha.get(), beta.get(), gamma.get(), - rho_prev.get(), rho.get(), stop_status.get()); - gko::kernels::cuda::cgs::initialize( - cuda, d_b.get(), d_r.get(), d_r_tld.get(), d_p.get(), d_q.get(), - d_u.get(), d_u_hat.get(), d_v_hat.get(), d_t.get(), d_alpha.get(), - d_beta.get(), d_gamma.get(), d_rho_prev.get(), d_rho.get(), - d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r_tld, r_tld, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); - GKO_ASSERT_MTX_NEAR(d_u, u, 1e-14); - GKO_ASSERT_MTX_NEAR(d_t, t, 1e-14); - GKO_ASSERT_MTX_NEAR(d_u_hat, u_hat, 1e-14); - GKO_ASSERT_MTX_NEAR(d_v_hat, v_hat, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho_prev, rho_prev, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho, rho, 1e-14); - GKO_ASSERT_MTX_NEAR(d_alpha, alpha, 1e-14); - GKO_ASSERT_MTX_NEAR(d_beta, beta, 1e-14); - GKO_ASSERT_MTX_NEAR(d_gamma, gamma, 1e-14); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); -} - - -TEST_F(Cgs, CudaCgsStep1IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cgs::step_1(ref, r.get(), u.get(), p.get(), - q.get(), beta.get(), rho.get(), - rho_prev.get(), stop_status.get()); - gko::kernels::cuda::cgs::step_1(cuda, d_r.get(), d_u.get(), d_p.get(), - d_q.get(), d_beta.get(), d_rho.get(), - d_rho_prev.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_beta, beta, 1e-14); - GKO_ASSERT_MTX_NEAR(d_u, u, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); -} - - -TEST_F(Cgs, CudaCgsStep2IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cgs::step_2(ref, u.get(), v_hat.get(), q.get(), - t.get(), alpha.get(), rho.get(), - gamma.get(), stop_status.get()); - gko::kernels::cuda::cgs::step_2(cuda, d_u.get(), d_v_hat.get(), d_q.get(), - d_t.get(), d_alpha.get(), d_rho.get(), - d_gamma.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_alpha, alpha, 1e-14); - GKO_ASSERT_MTX_NEAR(d_t, t, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); -} - - -TEST_F(Cgs, CudaCgsStep3IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cgs::step_3(ref, t.get(), u_hat.get(), r.get(), - x.get(), alpha.get(), - stop_status.get()); - gko::kernels::cuda::cgs::step_3(cuda, d_t.get(), d_u_hat.get(), d_r.get(), - d_x.get(), d_alpha.get(), - d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); -} - - -TEST_F(Cgs, CudaCgsApplyOneRHSIsEquivalentToRef) -{ - int m = 123; - int n = 1; - auto ref_solver = ref_cgs_factory->generate(mtx); - auto cuda_solver = cuda_cgs_factory->generate(d_mtx); - auto b = gen_mtx(m, n); - auto x = gen_mtx(m, n); - auto d_b = Mtx::create(cuda); - auto d_x = Mtx::create(cuda); - d_b->copy_from(b.get()); - d_x->copy_from(x.get()); - - ref_solver->apply(b.get(), x.get()); - cuda_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); -} - - -TEST_F(Cgs, CudaCgsApplyMultipleRHSIsEquivalentToRef) -{ - int m = 123; - int n = 16; - auto cuda_solver = cuda_cgs_factory->generate(d_mtx); - auto ref_solver = ref_cgs_factory->generate(mtx); - auto b = gen_mtx(m, n); - auto x = gen_mtx(m, n); - auto d_b = Mtx::create(cuda); - auto d_x = Mtx::create(cuda); - d_b->copy_from(b.get()); - d_x->copy_from(x.get()); - - ref_solver->apply(b.get(), x.get()); - cuda_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); -} - -} // namespace diff --git a/cuda/test/solver/fcg_kernels.cpp b/cuda/test/solver/fcg_kernels.cpp deleted file mode 100644 index 2b5f3ac5441..00000000000 --- a/cuda/test/solver/fcg_kernels.cpp +++ /dev/null @@ -1,285 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include - - -#include "core/solver/fcg_kernels.hpp" -#include "cuda/test/utils.hpp" - - -namespace { - - -class Fcg : public ::testing::Test { -protected: - using Mtx = gko::matrix::Dense<>; - using Solver = gko::solver::Fcg<>; - - Fcg() : rand_engine(30) {} - - void SetUp() - { - ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); - ref = gko::ReferenceExecutor::create(); - cuda = gko::CudaExecutor::create(0, ref); - } - - void TearDown() - { - if (cuda != nullptr) { - ASSERT_NO_THROW(cuda->synchronize()); - } - } - - std::unique_ptr gen_mtx(int num_rows, int num_cols) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(num_cols, num_cols), - std::normal_distribution<>(0.0, 1.0), rand_engine, ref); - } - - void initialize_data() - { - int m = 597; - int n = 43; - b = gen_mtx(m, n); - r = gen_mtx(m, n); - t = gen_mtx(m, n); - z = gen_mtx(m, n); - p = gen_mtx(m, n); - q = gen_mtx(m, n); - x = gen_mtx(m, n); - beta = gen_mtx(1, n); - prev_rho = gen_mtx(1, n); - rho = gen_mtx(1, n); - rho_t = gen_mtx(1, n); - stop_status = std::unique_ptr>( - new gko::Array(ref, n)); - for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { - stop_status->get_data()[i].reset(); - } - - d_b = Mtx::create(cuda); - d_b->copy_from(b.get()); - d_r = Mtx::create(cuda); - d_r->copy_from(r.get()); - d_t = Mtx::create(cuda); - d_t->copy_from(t.get()); - d_z = Mtx::create(cuda); - d_z->copy_from(z.get()); - d_p = Mtx::create(cuda); - d_p->copy_from(p.get()); - d_q = Mtx::create(cuda); - d_q->copy_from(q.get()); - d_x = Mtx::create(cuda); - d_x->copy_from(x.get()); - d_beta = Mtx::create(cuda); - d_beta->copy_from(beta.get()); - d_prev_rho = Mtx::create(cuda); - d_prev_rho->copy_from(prev_rho.get()); - d_rho_t = Mtx::create(cuda); - d_rho_t->copy_from(rho_t.get()); - d_rho = Mtx::create(cuda); - d_rho->copy_from(rho.get()); - d_stop_status = std::unique_ptr>( - new gko::Array(cuda, n)); - *d_stop_status = *stop_status; - } - - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - - std::shared_ptr ref; - std::shared_ptr cuda; - - std::ranlux48 rand_engine; - - std::unique_ptr b; - std::unique_ptr r; - std::unique_ptr t; - std::unique_ptr z; - std::unique_ptr p; - std::unique_ptr q; - std::unique_ptr x; - std::unique_ptr beta; - std::unique_ptr prev_rho; - std::unique_ptr rho; - std::unique_ptr rho_t; - std::unique_ptr> stop_status; - - std::unique_ptr d_b; - std::unique_ptr d_r; - std::unique_ptr d_t; - std::unique_ptr d_z; - std::unique_ptr d_p; - std::unique_ptr d_q; - std::unique_ptr d_x; - std::unique_ptr d_beta; - std::unique_ptr d_prev_rho; - std::unique_ptr d_rho; - std::unique_ptr d_rho_t; - std::unique_ptr> d_stop_status; -}; - - -TEST_F(Fcg, CudaFcgInitializeIsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::fcg::initialize( - ref, b.get(), r.get(), z.get(), p.get(), q.get(), t.get(), - prev_rho.get(), rho.get(), rho_t.get(), stop_status.get()); - gko::kernels::cuda::fcg::initialize( - cuda, d_b.get(), d_r.get(), d_z.get(), d_p.get(), d_q.get(), d_t.get(), - d_prev_rho.get(), d_rho.get(), d_rho_t.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_t, t, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z, z, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); - GKO_ASSERT_MTX_NEAR(d_prev_rho, prev_rho, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho, rho, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho_t, rho_t, 1e-14); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); -} - - -TEST_F(Fcg, CudaFcgStep1IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::fcg::step_1(ref, p.get(), z.get(), rho_t.get(), - prev_rho.get(), stop_status.get()); - gko::kernels::cuda::fcg::step_1(cuda, d_p.get(), d_z.get(), d_rho_t.get(), - d_prev_rho.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z, z, 1e-14); -} - - -TEST_F(Fcg, CudaFcgStep2IsEquivalentToRef) -{ - initialize_data(); - gko::kernels::reference::fcg::step_2(ref, x.get(), r.get(), t.get(), - p.get(), q.get(), beta.get(), - rho.get(), stop_status.get()); - gko::kernels::cuda::fcg::step_2(cuda, d_x.get(), d_r.get(), d_t.get(), - d_p.get(), d_q.get(), d_beta.get(), - d_rho.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_t, t, 1e-14); -} - - -TEST_F(Fcg, ApplyIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = Mtx::create(cuda); - d_mtx->copy_from(mtx.get()); - auto d_x = Mtx::create(cuda); - d_x->copy_from(x.get()); - auto d_b = Mtx::create(cuda); - d_b->copy_from(b.get()); - auto fcg_factory = - Solver::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(ref), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(ref)) - .on(ref); - auto d_fcg_factory = - Solver::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(cuda), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(cuda)) - .on(cuda); - auto solver = fcg_factory->generate(std::move(mtx)); - auto d_solver = d_fcg_factory->generate(std::move(d_mtx)); - - solver->apply(b.get(), x.get()); - d_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -} // namespace diff --git a/cuda/test/solver/gmres_kernels.cpp b/cuda/test/solver/gmres_kernels.cpp index 2dcd4d2653c..7571bf848e5 100644 --- a/cuda/test/solver/gmres_kernels.cpp +++ b/cuda/test/solver/gmres_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -92,7 +92,11 @@ class Gmres : public ::testing::Test { void initialize_data(int nrhs = 43) { +#ifdef GINKGO_FAST_TESTS + int m = 123; +#else int m = 597; +#endif x = gen_mtx(m, nrhs); y = gen_mtx(gko::solver::default_krylov_dim, nrhs); before_preconditioner = Mtx::create_with_config_of(x.get()); diff --git a/cuda/test/solver/idr_kernels.cpp b/cuda/test/solver/idr_kernels.cpp new file mode 100644 index 00000000000..cc075f9191d --- /dev/null +++ b/cuda/test/solver/idr_kernels.cpp @@ -0,0 +1,375 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/solver/idr_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +class Idr : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense<>; + using Solver = gko::solver::Idr<>; + + Idr() : rand_engine(30) {} + + void SetUp() + { + ref = gko::ReferenceExecutor::create(); + cuda = gko::CudaExecutor::create(0, ref); + + cuda_idr_factory = + Solver::build() + .with_deterministic(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(cuda)) + .on(cuda); + + ref_idr_factory = + Solver::build() + .with_deterministic(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(ref)) + .on(ref); + } + + void TearDown() + { + if (cuda != nullptr) { + ASSERT_NO_THROW(cuda->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(0.0, 1.0), rand_engine, ref); + } + + void initialize_data(int size = 597, int input_nrhs = 17) + { + nrhs = input_nrhs; + int s = 4; + mtx = gen_mtx(size, size); + x = gen_mtx(size, nrhs); + b = gen_mtx(size, nrhs); + r = gen_mtx(size, nrhs); + m = gen_mtx(s, nrhs * s); + f = gen_mtx(s, nrhs); + g = gen_mtx(size, nrhs * s); + u = gen_mtx(size, nrhs * s); + c = gen_mtx(s, nrhs); + v = gen_mtx(size, nrhs); + p = gen_mtx(s, size); + alpha = gen_mtx(1, nrhs); + omega = gen_mtx(1, nrhs); + tht = gen_mtx(1, nrhs); + residual_norm = gen_mtx(1, nrhs); + stop_status = std::unique_ptr>( + new gko::Array(ref, nrhs)); + for (size_t i = 0; i < nrhs; ++i) { + stop_status->get_data()[i].reset(); + } + + d_mtx = Mtx::create(cuda); + d_x = Mtx::create(cuda); + d_b = Mtx::create(cuda); + d_r = Mtx::create(cuda); + d_m = Mtx::create(cuda); + d_f = Mtx::create(cuda); + d_g = Mtx::create(cuda); + d_u = Mtx::create(cuda); + d_c = Mtx::create(cuda); + d_v = Mtx::create(cuda); + d_p = Mtx::create(cuda); + d_alpha = Mtx::create(cuda); + d_omega = Mtx::create(cuda); + d_tht = Mtx::create(cuda); + d_residual_norm = Mtx::create(cuda); + d_stop_status = std::unique_ptr>( + new gko::Array(cuda)); + + d_mtx->copy_from(mtx.get()); + d_x->copy_from(x.get()); + d_b->copy_from(b.get()); + d_r->copy_from(r.get()); + d_m->copy_from(m.get()); + d_f->copy_from(f.get()); + d_g->copy_from(g.get()); + d_u->copy_from(u.get()); + d_c->copy_from(c.get()); + d_v->copy_from(v.get()); + d_p->copy_from(p.get()); + d_alpha->copy_from(alpha.get()); + d_omega->copy_from(omega.get()); + d_tht->copy_from(tht.get()); + d_residual_norm->copy_from(residual_norm.get()); + *d_stop_status = + *stop_status; // copy_from is not a public member function of Array + } + + std::shared_ptr ref; + std::shared_ptr cuda; + + std::ranlux48 rand_engine; + + std::shared_ptr mtx; + std::shared_ptr d_mtx; + std::unique_ptr cuda_idr_factory; + std::unique_ptr ref_idr_factory; + + gko::size_type nrhs; + + std::unique_ptr x; + std::unique_ptr b; + std::unique_ptr r; + std::unique_ptr m; + std::unique_ptr f; + std::unique_ptr g; + std::unique_ptr u; + std::unique_ptr c; + std::unique_ptr v; + std::unique_ptr p; + std::unique_ptr alpha; + std::unique_ptr omega; + std::unique_ptr tht; + std::unique_ptr residual_norm; + std::unique_ptr> stop_status; + + std::unique_ptr d_x; + std::unique_ptr d_b; + std::unique_ptr d_r; + std::unique_ptr d_m; + std::unique_ptr d_f; + std::unique_ptr d_g; + std::unique_ptr d_u; + std::unique_ptr d_c; + std::unique_ptr d_v; + std::unique_ptr d_p; + std::unique_ptr d_alpha; + std::unique_ptr d_omega; + std::unique_ptr d_tht; + std::unique_ptr d_residual_norm; + std::unique_ptr> d_stop_status; +}; + + +TEST_F(Idr, IdrInitializeIsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::idr::initialize(ref, nrhs, m.get(), p.get(), true, + stop_status.get()); + gko::kernels::cuda::idr::initialize(cuda, nrhs, d_m.get(), d_p.get(), true, + d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(m, d_m, 1e-14); + GKO_ASSERT_MTX_NEAR(p, d_p, 1e-14); +} + + +TEST_F(Idr, IdrStep1IsEquivalentToRef) +{ + initialize_data(); + + gko::size_type k = 2; + gko::kernels::reference::idr::step_1(ref, nrhs, k, m.get(), f.get(), + r.get(), g.get(), c.get(), v.get(), + stop_status.get()); + gko::kernels::cuda::idr::step_1(cuda, nrhs, k, d_m.get(), d_f.get(), + d_r.get(), d_g.get(), d_c.get(), d_v.get(), + d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(c, d_c, 1e-14); + GKO_ASSERT_MTX_NEAR(v, d_v, 1e-14); +} + + +TEST_F(Idr, IdrStep2IsEquivalentToRef) +{ + initialize_data(); + + gko::size_type k = 2; + gko::kernels::reference::idr::step_2(ref, nrhs, k, omega.get(), v.get(), + c.get(), u.get(), stop_status.get()); + gko::kernels::cuda::idr::step_2(cuda, nrhs, k, d_omega.get(), d_v.get(), + d_c.get(), d_u.get(), d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(u, d_u, 1e-14); +} + + +TEST_F(Idr, IdrStep3IsEquivalentToRef) +{ + initialize_data(); + + gko::size_type k = 2; + gko::kernels::reference::idr::step_3( + ref, nrhs, k, p.get(), g.get(), v.get(), u.get(), m.get(), f.get(), + alpha.get(), r.get(), x.get(), stop_status.get()); + gko::kernels::cuda::idr::step_3( + cuda, nrhs, k, d_p.get(), d_g.get(), d_v.get(), d_u.get(), d_m.get(), + d_f.get(), d_alpha.get(), d_r.get(), d_x.get(), d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(g, d_g, 1e-14); + GKO_ASSERT_MTX_NEAR(v, d_v, 1e-14); + GKO_ASSERT_MTX_NEAR(u, d_u, 1e-14); + GKO_ASSERT_MTX_NEAR(m, d_m, 1e-14); + GKO_ASSERT_MTX_NEAR(f, d_f, 1e-14); + GKO_ASSERT_MTX_NEAR(r, d_r, 1e-14); + GKO_ASSERT_MTX_NEAR(x, d_x, 1e-14); +} + + +TEST_F(Idr, IdrComputeOmegaIsEquivalentToRef) +{ + initialize_data(); + + double kappa = 0.7; + gko::kernels::reference::idr::compute_omega(ref, nrhs, kappa, tht.get(), + residual_norm.get(), + omega.get(), stop_status.get()); + gko::kernels::cuda::idr::compute_omega(cuda, nrhs, kappa, d_tht.get(), + d_residual_norm.get(), d_omega.get(), + d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(omega, d_omega, 1e-14); +} + + +TEST_F(Idr, IdrIterationOneRHSIsEquivalentToRef) +{ + initialize_data(123, 1); + auto ref_solver = ref_idr_factory->generate(mtx); + auto cuda_solver = cuda_idr_factory->generate(d_mtx); + + ref_solver->apply(b.get(), x.get()); + cuda_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); +} + + +TEST_F(Idr, IdrIterationWithComplexSubspaceOneRHSIsEquivalentToRef) +{ + initialize_data(123, 1); + cuda_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(cuda)) + .on(cuda); + ref_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(ref)) + .on(ref); + auto ref_solver = ref_idr_factory->generate(mtx); + auto cuda_solver = cuda_idr_factory->generate(d_mtx); + + ref_solver->apply(b.get(), x.get()); + cuda_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); +} + + +TEST_F(Idr, IdrIterationMultipleRHSIsEquivalentToRef) +{ + initialize_data(123, 16); + auto cuda_solver = cuda_idr_factory->generate(d_mtx); + auto ref_solver = ref_idr_factory->generate(mtx); + + ref_solver->apply(b.get(), x.get()); + cuda_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, 1e-12); + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-12); +} + + +TEST_F(Idr, IdrIterationWithComplexSubspaceMultipleRHSIsEquivalentToRef) +{ + initialize_data(123, 16); + cuda_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(cuda)) + .on(cuda); + ref_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(ref)) + .on(ref); + auto cuda_solver = cuda_idr_factory->generate(d_mtx); + auto ref_solver = ref_idr_factory->generate(mtx); + + ref_solver->apply(b.get(), x.get()); + cuda_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); +} + + +} // namespace diff --git a/cuda/test/solver/ir_kernels.cpp b/cuda/test/solver/ir_kernels.cpp deleted file mode 100644 index 35b844274b9..00000000000 --- a/cuda/test/solver/ir_kernels.cpp +++ /dev/null @@ -1,259 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include - - -#include "core/solver/ir_kernels.hpp" -#include "cuda/test/utils.hpp" - - -namespace { - - -class Ir : public ::testing::Test { -protected: - using Mtx = gko::matrix::Dense<>; - Ir() : rand_engine(30) {} - - void SetUp() - { - ref = gko::ReferenceExecutor::create(); - cuda = gko::CudaExecutor::create(0, ref); - } - - void TearDown() - { - if (cuda != nullptr) { - ASSERT_NO_THROW(cuda->synchronize()); - } - } - - std::unique_ptr gen_mtx(int num_rows, int num_cols) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(num_cols, num_cols), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - std::shared_ptr ref; - std::shared_ptr cuda; - - std::ranlux48 rand_engine; -}; - - -TEST_F(Ir, InitializeIsEquivalentToRef) -{ - auto stop_status = gko::Array(ref, 43); - for (size_t i = 0; i < stop_status.get_num_elems(); ++i) { - stop_status.get_data()[i].reset(); - } - auto d_stop_status = gko::Array(cuda, stop_status); - - gko::kernels::reference::ir::initialize(ref, &stop_status); - gko::kernels::cuda::ir::initialize(cuda, &d_stop_status); - - auto tmp = gko::Array(ref, d_stop_status); - for (int i = 0; i < stop_status.get_num_elems(); ++i) { - ASSERT_EQ(stop_status.get_const_data()[i], tmp.get_const_data()[i]); - } -} - - -TEST_F(Ir, ApplyIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = clone(cuda, mtx); - auto d_x = clone(cuda, x); - auto d_b = clone(cuda, b); - // Forget about accuracy - Richardson is not going to converge for a random - // matrix, just check that a couple of iterations gives the same result on - // both executors - auto ir_factory = - gko::solver::Ir<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(ref)) - .on(ref); - auto d_ir_factory = - gko::solver::Ir<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(cuda)) - .on(cuda); - auto solver = ir_factory->generate(std::move(mtx)); - auto d_solver = d_ir_factory->generate(std::move(d_mtx)); - - solver->apply(lend(b), lend(x)); - d_solver->apply(lend(d_b), lend(d_x)); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -TEST_F(Ir, ApplyWithIterativeInnerSolverIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = clone(cuda, mtx); - auto d_x = clone(cuda, x); - auto d_b = clone(cuda, b); - - auto ir_factory = - gko::solver::Ir<>::build() - .with_solver( - gko::solver::Gmres<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(1u).on( - ref)) - .on(ref)) - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(ref)) - .on(ref); - auto d_ir_factory = - gko::solver::Ir<>::build() - .with_solver( - gko::solver::Gmres<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(1u).on( - cuda)) - .on(cuda)) - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(cuda)) - .on(cuda); - auto solver = ir_factory->generate(std::move(mtx)); - auto d_solver = d_ir_factory->generate(std::move(d_mtx)); - - solver->apply(lend(b), lend(x)); - d_solver->apply(lend(d_b), lend(d_x)); - - // Note: 1e-12 instead of 1e-14, as the difference in the inner gmres - // iteration gets amplified by the difference in IR. - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-12); -} - - -TEST_F(Ir, RichardsonApplyIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = clone(cuda, mtx); - auto d_x = clone(cuda, x); - auto d_b = clone(cuda, b); - // Forget about accuracy - Richardson is not going to converge for a random - // matrix, just check that a couple of iterations gives the same result on - // both executors - auto ir_factory = - gko::solver::Ir<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(ref)) - .with_relaxation_factor(0.9) - .on(ref); - auto d_ir_factory = - gko::solver::Ir<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(cuda)) - .with_relaxation_factor(0.9) - .on(cuda); - auto solver = ir_factory->generate(std::move(mtx)); - auto d_solver = d_ir_factory->generate(std::move(d_mtx)); - - solver->apply(lend(b), lend(x)); - d_solver->apply(lend(d_b), lend(d_x)); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -TEST_F(Ir, RichardsonApplyWithIterativeInnerSolverIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = clone(cuda, mtx); - auto d_x = clone(cuda, x); - auto d_b = clone(cuda, b); - auto ir_factory = - gko::solver::Ir<>::build() - .with_solver( - gko::solver::Gmres<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(1u).on( - ref)) - .on(ref)) - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(ref)) - .with_relaxation_factor(0.9) - .on(ref); - auto d_ir_factory = - gko::solver::Ir<>::build() - .with_solver( - gko::solver::Gmres<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(1u).on( - cuda)) - .on(cuda)) - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(cuda)) - .with_relaxation_factor(0.9) - .on(cuda); - auto solver = ir_factory->generate(std::move(mtx)); - auto d_solver = d_ir_factory->generate(std::move(d_mtx)); - - solver->apply(lend(b), lend(x)); - d_solver->apply(lend(d_b), lend(d_x)); - - // Note: 1e-12 instead of 1e-14, as the difference in the inner gmres - // iteration gets amplified by the difference in IR. - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-12); -} - - -} // namespace diff --git a/cuda/test/solver/lower_trs_kernels.cpp b/cuda/test/solver/lower_trs_kernels.cpp index b677b9eb10d..ea63bbf0f44 100644 --- a/cuda/test/solver/lower_trs_kernels.cpp +++ b/cuda/test/solver/lower_trs_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/test/solver/upper_trs_kernels.cpp b/cuda/test/solver/upper_trs_kernels.cpp index 9da9d33930c..fbafd9f3d23 100644 --- a/cuda/test/solver/upper_trs_kernels.cpp +++ b/cuda/test/solver/upper_trs_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/test/stop/criterion_kernels.cpp b/cuda/test/stop/criterion_kernels.cpp index 8265ffea284..882c6730424 100644 --- a/cuda/test/stop/criterion_kernels.cpp +++ b/cuda/test/stop/criterion_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/cuda/test/stop/residual_norm_kernels.cpp b/cuda/test/stop/residual_norm_kernels.cpp index ec5dc3bf511..6af9e73c056 100644 --- a/cuda/test/stop/residual_norm_kernels.cpp +++ b/cuda/test/stop/residual_norm_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,9 +45,324 @@ namespace { constexpr double tol = 1.0e-14; +class ResidualNorm : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense<>; + using NormVector = gko::matrix::Dense>; + + ResidualNorm() + { + ref_ = gko::ReferenceExecutor::create(); + cuda_ = gko::CudaExecutor::create(0, ref_); + factory_ = + gko::stop::ResidualNorm<>::build().with_reduction_factor(tol).on( + cuda_); + rel_factory_ = gko::stop::ResidualNorm<>::build() + .with_reduction_factor(tol) + .with_baseline(gko::stop::mode::initial_resnorm) + .on(cuda_); + abs_factory_ = gko::stop::ResidualNorm<>::build() + .with_reduction_factor(tol) + .with_baseline(gko::stop::mode::absolute) + .on(cuda_); + } + + std::unique_ptr::Factory> factory_; + std::unique_ptr::Factory> rel_factory_; + std::unique_ptr::Factory> abs_factory_; + std::shared_ptr cuda_; + std::shared_ptr ref_; +}; + + +TEST_F(ResidualNorm, WaitsTillResidualGoalForRhsResNorm) +{ + auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(cuda_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + auto rhs_norm = gko::initialize({0.0}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(cuda_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(cuda_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1 * rhs_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(cuda_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9 * rhs_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalMultipleRHSForRhsResNorm) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(cuda_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + auto rhs_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(cuda_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(cuda_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9 * rhs_norm->at(0, 0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(cuda_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9 * rhs_norm->at(0, 1); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalForRelResNorm) +{ + auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(cuda_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + std::shared_ptr d_rhs = Mtx::create(cuda_); + d_rhs->copy_from(rhs.get()); + auto criterion = + rel_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(cuda_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1 * res_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(cuda_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9 * res_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalMultipleRHSForRelResNorm) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(cuda_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + std::shared_ptr d_rhs = Mtx::create(cuda_); + d_rhs->copy_from(rhs.get()); + auto criterion = + rel_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(cuda_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9 * res_norm->at(0, 0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(cuda_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9 * res_norm->at(0, 1); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalForAbsResNorm) +{ + auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(cuda_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + std::shared_ptr d_rhs = Mtx::create(cuda_); + d_rhs->copy_from(rhs.get()); + auto criterion = + abs_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(cuda_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1; + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(cuda_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalMultipleRHSForAbsResNorm) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(cuda_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + std::shared_ptr d_rhs = Mtx::create(cuda_); + d_rhs->copy_from(rhs.get()); + auto criterion = + abs_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(cuda_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(cuda_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + class ResidualNormReduction : public ::testing::Test { protected: using Mtx = gko::matrix::Dense<>; + using NormVector = gko::matrix::Dense>; ResidualNormReduction() { @@ -67,6 +382,8 @@ class ResidualNormReduction : public ::testing::Test { TEST_F(ResidualNormReduction, WaitsTillResidualGoal) { auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); auto d_res = Mtx::create(cuda_); d_res->copy_from(res.get()); std::shared_ptr rhs = gko::initialize({10.0}, ref_); @@ -84,7 +401,7 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoal) .residual_norm(d_res.get()) .check(RelativeStoppingId, true, &stop_status, &one_changed)); - res->at(0) = tol * 1.1e+2; + res->at(0) = tol * 1.1 * res_norm->at(0); d_res->copy_from(res.get()); ASSERT_FALSE( criterion->update() @@ -95,7 +412,7 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoal) stop_status.set_executor(cuda_); ASSERT_FALSE(one_changed); - res->at(0) = tol * 0.9e+2; + res->at(0) = tol * 0.9 * res_norm->at(0); d_res->copy_from(res.get()); ASSERT_TRUE( criterion->update() @@ -110,6 +427,8 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoal) TEST_F(ResidualNormReduction, WaitsTillResidualGoalMultipleRHS) { auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); auto d_res = Mtx::create(cuda_); d_res->copy_from(res.get()); std::shared_ptr rhs = @@ -129,7 +448,7 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoalMultipleRHS) .residual_norm(d_res.get()) .check(RelativeStoppingId, true, &stop_status, &one_changed)); - res->at(0, 0) = tol * 0.9e+2; + res->at(0, 0) = tol * 0.9 * res_norm->at(0, 0); d_res->copy_from(res.get()); ASSERT_FALSE( criterion->update() @@ -140,7 +459,7 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoalMultipleRHS) stop_status.set_executor(cuda_); ASSERT_TRUE(one_changed); - res->at(0, 1) = tol * 0.9e+2; + res->at(0, 1) = tol * 0.9 * res_norm->at(0, 1); d_res->copy_from(res.get()); ASSERT_TRUE( criterion->update() @@ -155,6 +474,7 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoalMultipleRHS) class RelativeResidualNorm : public ::testing::Test { protected: using Mtx = gko::matrix::Dense<>; + using NormVector = gko::matrix::Dense>; RelativeResidualNorm() { @@ -177,6 +497,8 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoal) auto d_res = Mtx::create(cuda_); d_res->copy_from(res.get()); std::shared_ptr rhs = gko::initialize({10.0}, ref_); + auto rhs_norm = gko::initialize({0.0}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); std::shared_ptr d_rhs = Mtx::create(cuda_); d_rhs->copy_from(rhs.get()); auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); @@ -191,7 +513,7 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoal) .residual_norm(d_res.get()) .check(RelativeStoppingId, true, &stop_status, &one_changed)); - res->at(0) = tol * 1.1e+1; + res->at(0) = tol * 1.1 * rhs_norm->at(0); d_res->copy_from(res.get()); ASSERT_FALSE( criterion->update() @@ -202,7 +524,7 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoal) stop_status.set_executor(cuda_); ASSERT_FALSE(one_changed); - res->at(0) = tol * 0.9e+1; + res->at(0) = tol * 0.9 * rhs_norm->at(0); d_res->copy_from(res.get()); ASSERT_TRUE( criterion->update() @@ -221,6 +543,8 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoalMultipleRHS) d_res->copy_from(res.get()); std::shared_ptr rhs = gko::initialize({{10.0, 10.0}}, ref_); + auto rhs_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); std::shared_ptr d_rhs = Mtx::create(cuda_); d_rhs->copy_from(rhs.get()); auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); @@ -236,7 +560,7 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoalMultipleRHS) .residual_norm(d_res.get()) .check(RelativeStoppingId, true, &stop_status, &one_changed)); - res->at(0, 0) = tol * 0.9e+1; + res->at(0, 0) = tol * 0.9 * rhs_norm->at(0, 0); d_res->copy_from(res.get()); ASSERT_FALSE( criterion->update() @@ -247,7 +571,7 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoalMultipleRHS) stop_status.set_executor(cuda_); ASSERT_TRUE(one_changed); - res->at(0, 1) = tol * 0.9e+1; + res->at(0, 1) = tol * 0.9 * rhs_norm->at(0, 1); d_res->copy_from(res.get()); ASSERT_TRUE( criterion->update() @@ -259,9 +583,122 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoalMultipleRHS) } +class ImplicitResidualNorm : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense<>; + using NormVector = gko::matrix::Dense>; + + ImplicitResidualNorm() + { + ref_ = gko::ReferenceExecutor::create(); + cuda_ = gko::CudaExecutor::create(0, ref_); + factory_ = gko::stop::ImplicitResidualNorm<>::build() + .with_reduction_factor(tol) + .on(cuda_); + } + + std::unique_ptr::Factory> factory_; + std::shared_ptr cuda_; + std::shared_ptr ref_; +}; + + +TEST_F(ImplicitResidualNorm, WaitsTillResidualGoal) +{ + auto res = gko::initialize({100.0}, ref_); + auto d_res = Mtx::create(cuda_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + auto rhs_norm = gko::initialize({0.0}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(cuda_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(cuda_); + + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = std::pow(tol * 1.1 * rhs_norm->at(0), 2); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(cuda_); + ASSERT_FALSE(one_changed); + + res->at(0) = std::pow(tol * 0.9 * rhs_norm->at(0), 2); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ImplicitResidualNorm, WaitsTillResidualGoalMultipleRHS) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto d_res = Mtx::create(cuda_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + auto rhs_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(cuda_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(cuda_); + + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = std::pow(tol * 0.9 * rhs_norm->at(0, 0), 2); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(cuda_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = std::pow(tol * 0.9 * rhs_norm->at(0, 1), 2); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + class AbsoluteResidualNorm : public ::testing::Test { protected: using Mtx = gko::matrix::Dense<>; + using NormVector = gko::matrix::Dense>; AbsoluteResidualNorm() { diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp index 903ed6a77c3..7667c7beb9f 100644 --- a/cuda/test/utils.hpp +++ b/cuda/test/utils.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,9 +43,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { +// Visual Studio does not define the constructor of std::mutex as constexpr, +// causing it to not be initialized when creating this executor (which uses +// the mutex) +#if !defined(_MSC_VER) // prevent device reset after each test auto no_reset_exec = gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(), true); +#endif } // namespace diff --git a/cuda/test/utils/assertions_test.cpp b/cuda/test/utils/assertions_test.cpp index 71a2fb0109b..6dccae4b02a 100644 --- a/cuda/test/utils/assertions_test.cpp +++ b/cuda/test/utils/assertions_test.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/dev_tools/oneapi/add_host_function.sh b/dev_tools/oneapi/add_host_function.sh new file mode 100755 index 00000000000..845684537d4 --- /dev/null +++ b/dev_tools/oneapi/add_host_function.sh @@ -0,0 +1,176 @@ +#!/bin/bash + +# add_host_function adds a host function to wrap the cuda kernel call with template and parameter configuration. +# +# For example +# ```` +# template +# __global__ kernel(ValueType a) {...} +# ``` +# add_host_function will add another host function with the same template and calling the cuda call +# ``` +# template +# void kernel_AUTOHOSTFUNC(dim3 grid, dim3 block, size_type dynamic_shared_memory, cudaStream_t stream, ValueType a) { +# /*KEEP*/kernel<<>>(a); +# } +# ``` +# _AUTOHOSTFUNC and /*KEEP*/ is internal step and they are removed in the end. +# It will use the same template as original cuda call and pust the kernel args into input args. +# Note. This script does not translate original cuda kernel call to corresponding call. +# convert_source.sh will handle it later. + + +SCRIPT_DIR="$( dirname "${BASH_SOURCE[0]}" )" +source "${SCRIPT_DIR}/shared.sh" + +extract_varname() { + local str="$1" + # local GET_PARAM=" *([^ ]*) *$" + # Need to remove the = .... + # note. it only remove the simple one + local GET_PARAM=" *([^ =\*]*) *(= *.*)* *$" + local parameter="" + local temp="" + IFS=',' read -ra par <<< "$str" + for var in "${par[@]}"; do + if [ -n "${temp}" ]; then + temp="${temp}," + fi + temp="${temp}${var}" + # only handle one pair <> currently + if [[ "${temp}" =~ "<" ]] && [[ ! "${temp}" =~ ">" ]]; then + continue + fi + # If the string contains typename, do not extract it. + # It should automatically be decided from argument + # Also need to ignore = ... + if [[ "${temp}" =~ "typename" ]]; then + : + elif [[ "${temp}" =~ $GET_PARAM ]]; then + if [ -n "${parameter}" ]; then + parameter="${parameter}, " + fi + parameter="${parameter}${BASH_REMATCH[1]}" + fi + temp="" + done + echo "$parameter" +} + + +GLOBAL_KEYWORD="__global__" +TEMPLATE_REGEX="^ *template <*" +FUNCTION_START="^ *(template *<|${GLOBAL_KEYWORD}|void)" +FUNCTION_NAME_END=".*\{.*" +SCOPE_START="${FUNCTION_NAME_END}" +SCOPE_END=".*\}.*" +CHECK_GLOBAL_KEYWORD=".*${GLOBAL_KEYWORD}.*" +FUNCTION_HANDLE="" +DURING_FUNCNAME="false" +ANAYSIS_FUNC=" *(template *<(.*)>)?.* (.*)\((.*)\)" +START_BLOCK_REX="^( *\/\*| *\/\/)" +END_BLOCK_REX="\*\/$| *\/\/" +IN_BLOCK=0 +IN_FUNC=0 +STORE_LINE="" +STORE_REGEX="__ *$" +EXTRACT_KERNEL="false" +DURING_LICENSE="false" +SKIP="false" + +rm "${MAP_FILE}" +while IFS='' read -r line || [ -n "$line" ]; do + if [ "${EXTRACT_KERNEL}" = "false" ] && ([ "${line}" = "/*${GINKGO_LICENSE_BEACON}" ] || [ "${DURING_LICENSE}" = "true" ]); then + DURING_LICENSE="true" + if [ "${line}" = "${GINKGO_LICENSE_BEACON}*/" ]; then + DURING_LICENSE="false" + SKIP="true" + fi + continue + fi + # When do not need the license, do not need the space between license and other codes, neither. + if [ ${SKIP} = "true" ] && [ -z "${line}" ]; then + continue + fi + SKIP="false" + # It prints the original text into new file. + if [[ "$line" =~ ${STORE_REGEX} ]]; then + STORE_LINE="${STORE_LINE} ${line}" + elif [[ -n "${STORE_LINE}" ]]; then + echo "${STORE_LINE} ${line}" + STORE_LINE="" + else + echo "${line}" + fi + + # handle comments + if [[ "$line" =~ ${START_BLOCK_REX} ]] || [[ "${IN_BLOCK}" -gt 0 ]]; then + if [[ "$line" =~ ${START_BLOCK_REX} ]]; then + IN_BLOCK=$((IN_BLOCK+1)) + fi + if [[ "$line" =~ ${END_BLOCK_REX} ]]; then + IN_BLOCK=$((IN_BLOCK-1)) + fi + # output to new file + continue + fi + # handle functions + if [[ "${line}" =~ $FUNCTION_START ]] || [[ $DURING_FUNCNAME = "true" ]]; then + DURING_FUNCNAME="true" + FUNCTION_HANDLE="${FUNCTION_HANDLE} $line" + if [[ "${line}" =~ ${FUNCTION_NAME_END} ]]; then + DURING_FUNCNAME="false" + fi + if [[ "${line}" =~ ${SCOPE_START} ]]; then + IN_FUNC=$((IN_FUNC+1)) + fi + if [[ "${line}" =~ ${SCOPE_END} ]]; then + IN_FUNC=$((IN_FUNC-1)) + fi + # output to new file + continue + fi + + if [ -n "${FUNCTION_HANDLE}" ] && [[ ${DURING_FUNCNAME} = "false" ]]; then + if [[ "${line}" =~ ${SCOPE_START} ]]; then + IN_FUNC=$((IN_FUNC+1)) + fi + if [[ "${line}" =~ ${SCOPE_END} ]]; then + IN_FUNC=$((IN_FUNC-1)) + fi + + # make sure the function is end + if [[ "${IN_FUNC}" -eq 0 ]]; then + + if [[ "${FUNCTION_HANDLE}" =~ $CHECK_GLOBAL_KEYWORD ]]; then + echo "" + # remove additional space + FUNCTION_HANDLE=$(echo "${FUNCTION_HANDLE}" | sed -E 's/ +/ /g;') + + if [[ "${FUNCTION_HANDLE}" =~ $ANAYSIS_FUNC ]]; then + TEMPLATE="${BASH_REMATCH[1]}" + TEMPLATE_CONTENT="${BASH_REMATCH[2]}" + NAME="${BASH_REMATCH[3]}" + VARIABLE="${BASH_REMATCH[4]}" + VARIABLE=$(echo ${VARIABLE} | sed 's/__restrict__ //g') + VAR_INPUT=$(extract_varname "${VARIABLE}") + TEMPLATE_INPUT=$(extract_varname "${TEMPLATE_CONTENT}") + if [ -n "${TEMPLATE_INPUT}" ]; then + TEMPLATE_INPUT="<${TEMPLATE_INPUT}>" + fi + echo "${TEMPLATE} void ${NAME}${HOST_SUFFIX} (dim3 grid, dim3 block, size_type dynamic_shared_memory, cudaStream_t queue, ${VARIABLE}) { + /*KEEP*/${NAME}${TEMPLATE_INPUT}<<>>(${VAR_INPUT}); + }" + echo "${NAME} -> ${NAME}${HOST_SUFFIX}" >> ${MAP_FILE} + fi + fi + FUNCTION_HANDLE="" + fi + fi + + +done < "$1" + +# Maybe it only works in Linux +sort "${MAP_FILE}" | uniq > "${MAP_FILE}_temp" +mv "${MAP_FILE}_temp" "${MAP_FILE}" diff --git a/dev_tools/oneapi/convert_source.sh b/dev_tools/oneapi/convert_source.sh new file mode 100755 index 00000000000..b0ed62252f7 --- /dev/null +++ b/dev_tools/oneapi/convert_source.sh @@ -0,0 +1,377 @@ +#!/bin/bash + +# convert_source.sh converts cuda (and c++ code) to dpcpp code with ginkgo design. + +# Usage: +# EnvironementSet ./dev_tools/oneapi/convert_source.sh +# can be .hpp/.cpp/.cu/.cuh + +# the following are parameters set by environment variables +# REQUIRED: +# CUDA_HEADER_DIR: contains the cuda headers +# OPTIONAL: +# ROOT_DIR: the ginkgo folder. The default is current path +# BUILD_DIR: the ginkgo build folder, which needs cmake to generate config.hpp and gtest include. The default is "build" +# Note. It requires GINKGO_BUILD_TESTS=ON to download gtest but it is not required to compile ginkgo. +# If GTEST_HEADER_DIR is available elsewhere GINKGO_BUILD_TESTS is not required. +# CMake's step is not required if copying the ginkgo config.hpp from another ginkgo build into "${ROOT_DIR}/include/ginkgo/". +# ROOT_BUILD_DIR: the complete path for build folder. The default is "${ROOT_DIR}/${BUILD_DIR}" +# GTEST_HEADER_DIR: the gtest header folder. The default is "${ROOT_BUILD_DIR}/third_party_gtest/src/googletest/include" +# CLANG_FORMAT: the clang-format exec. The default is "clang-format" +CURRENT_DIR="$( pwd )" +cd "$( dirname "${BASH_SOURCE[0]}" )" +SCRIPT_DIR="$( pwd )" + +source "${SCRIPT_DIR}/shared.sh" + +ROOT_DIR="${ROOT_DIR:="${CURRENT_DIR}"}" +BUILD_DIR="${BUILD_DIR:="build"}" +ROOT_BUILD_DIR="${ROOT_BUILD_DIR:="${ROOT_DIR}/${BUILD_DIR}"}" +CUDA_HEADER_DIR="${CUDA_HEADER_DIR}" +GTEST_HEADER_DIR="${GTEST_HEADER_DIR:="${ROOT_BUILD_DIR}/_deps/googletest-src/googletest/include"}" +CLANG_FORMAT=${CLANG_FORMAT:="clang-format"} +if [[ "${VERBOSE}" == 1 ]]; then + echo "#####################" + echo "# Enviroment Setting:" + echo "CURRENT_DIR ${CURRENT_DIR}" + echo "SCRIPT_DIR ${SCRIPT_DIR}" + echo "ROOT_DIR ${ROOT_DIR}" + echo "ROOT_BUILD_DIR ${ROOT_BUILD_DIR}" + echo "GTEST_HEADER_DIR ${GTEST_HEADER_DIR}" + echo "CUDA_HEADER_DIR ${CUDA_HEADER_DIR}" + echo "CLANG_FORMAT ${CLANG_FORMAT}" + echo "#####################" +fi +if [[ "${CUDA_HEADER_DIR}" == "" ]]; then + echo "Please set the environment variable CUDA_HEADER_DIR" + exit 1 +fi +# move to working_directory +cd working_directory + + +KERNEL_SYNTAX_START="<<<" +KERNEL_SYNTAX_END=">>>" +DEVICE_CODE_SYNTAX="#include \"(common.*)\"" +FUNCTION_END=");" +EXTRACT_KERNEL="false" +GLOBAL_FILE="global_kernel" + +# Converts a CUDA kernel call to the DPC++ equivalent. Also takes care of DPC++ queue manipulation if needed. +convert_cuda_kernel_call() { + # the syntax_regex turn the string into the following + # [1]: kernel name + # [2]: template parameters if it exists. if template contain template struct, it will fail. + # [3]: CUDA parameters + # [4]: the rest of the function call (parameters, ...) + local syntax_regex="([^<>]*)(<[^<>]*>)?<<<(.*)>>>(.*)" + local str="$1" + str=$(echo "${str}" | sed -E 's/ +/ /g') + local temp="" + local num=0 + local var="" + if [[ "${str}" =~ ${syntax_regex} ]]; then + local kernel_name="${BASH_REMATCH[1]}" + local template_param="${BASH_REMATCH[2]}" + local cuda_arg="${BASH_REMATCH[3]}" + local arg="${BASH_REMATCH[4]}" + temp=${cuda_arg//\[*\]/} + temp=${temp//\(*\)/} + temp=${temp//<*>/} + temp=${temp//[^,]} + num=$((${#temp} + 1)) + if [[ ${num} -eq 4 ]]; then + local split_regex="(.*),([^,]*)$" + # avoid overwriting the BASH_REMATCH + if [[ "${cuda_arg}" =~ ${split_regex} ]]; then + var="${BASH_REMATCH[1]}" + if [[ "${BASH_REMATCH[2]}" -eq 0 ]]; then + var="${var}, GET_QUEUE" + else + var="${var}, Error" + fi + else + var="Error" + fi + elif [[ "${num}" -lt 2 ]]; then + var="Error" + elif [[ ${num} -eq 2 ]]; then + var="${cuda_arg}, 0, GET_QUEUE" + elif [[ ${num} -eq 3 ]]; then + var="${cuda_arg}, GET_QUEUE" + fi + local suffix="" + local function_name=$(echo "${kernel_name}" | sed -E 's/(.*::)| //g') + suffix=$(cat "${MAP_FILE}" | sed -nE "s/${function_name} -> ${function_name}(.*)/\1/p") + local suffix_matches=$(echo "${suffix}" | wc -l) + if [[ "${suffix_matches}" -gt 1 ]]; then + echo "static_assert(false, \"Has ${suffix_matches} matches for ${function_name}\");" + suffix=$(echo "${suffix}" | head -1) + fi + + local result="${kernel_name}${suffix}${template_param}(${var})${arg}" + result=$(echo "${result}" | sed -E 's/\) *\(+/,/g;s/, *\)/\)/g') + if [[ "${suffix}" = "" ]]; then + # if the function does not exist in the file, comment it. + echo "//remove//${result}" + else + echo "${result}" + fi + fi +} + + +convert_regex_allowed() { + local str="$1" + str=$(echo "${str}" | sed -E 's~([/().])~\\\1~g') + echo "$str" +} + +# Transfer header file to the correct location +# cuda -> dpcpp +# add sycl header + +input="$1" +filename="${ROOT_DIR}/$input" +if [[ "${VERBOSE}" == 1 ]]; then + echo "Porting file ${filename}" +fi + +# check file exists +if [ ! -f "${filename}" ]; then + echo "${filename} does not exist" + exit 1 +fi + + +temp="" +IN_SYNTAX="false" +DEVICE_FILE="" +UNFORMAT_FILE="unformat.cpp" +FORMAT_FILE="format.cpp" +cp "${filename}" "${UNFORMAT_FILE}" +OUTPUT_FILE="source.cpp" +EMBED_FILE="embed.cu" +EMBED_HOST_FILE="embed_host.cu" +if [[ "${VERBOSE}" == 1 ]]; then + echo "###################################" + echo "# Some generated file for debugging" + echo "the original file ${UNFORMAT_FILE}" + echo "the formatted file ${FORMAT_FILE}" + echo "collect common/*.inc in file ${EMBED_FILE}" + echo "add autohost func in file ${EMBED_HOST_FILE}" + echo "convert original CUDA call in file ${OUTPUT_FILE}, which is the file for dpct" + echo "###################################" +fi +rm "${OUTPUT_FILE}" +echo "#define GET_QUEUE 0" >> "${OUTPUT_FILE}" +# add empty ginkgo license such that format_header recognize some header before header def macro +echo "/*${GINKGO_LICENSE_BEACON}" >> "${OUTPUT_FILE}" +echo "${GINKGO_LICENSE_BEACON}*/" >> "${OUTPUT_FILE}" +rm "${GLOBAL_FILE}" +rm "${EMBED_FILE}" + +# Embed shared kernels into ${EMBED_FILE} +while IFS='' read -r line; do + if [[ "${line}" =~ $DEVICE_CODE_SYNTAX ]]; then + # hold the command to easy replace + device_file="${BASH_REMATCH[1]}" + [ "${EXTRACT_KERNEL}" == "true" ] && echo "/**** ${device_file} - start ****/" >> "${EMBED_FILE}" + cat "${ROOT_DIR}/${device_file}" >> "${EMBED_FILE}" + [ "${EXTRACT_KERNEL}" == "true" ] && echo "/**** ${device_file} - end ****/" >> "${EMBED_FILE}" + if [ -n "${DEVICE_FILE}" ]; then + DEVICE_FILE="${DEVICE_FILE};" + fi + DEVICE_FILE="${DEVICE_FILE}${device_file}" + else + echo "${line}" >> "${EMBED_FILE}" + fi +done < "${UNFORMAT_FILE}" + +# Call clang-format for better formatting. +${CLANG_FORMAT} -style=file "${EMBED_FILE}" > "${FORMAT_FILE}" + +# Add an extra host function so that the converted DPC++ code will look like CUDA. +${SCRIPT_DIR}/add_host_function.sh "${FORMAT_FILE}" > "${EMBED_HOST_FILE}" + +while IFS='' read -r line; do + if [[ "${line}" =~ ${KERNEL_SYNTAX_START} ]] || [[ "${IN_SYNTAX}" = "true" ]]; then + temp="${temp} ${line}" + IN_SYNTAX="true" + if [[ "${line}" =~ ${FUNCTION_END} ]]; then + IN_SYNTAX="false" + modified="" + if [[ "${temp}" = *"/*KEEP*/"* ]]; then + modified="${temp/\/\*KEEP\*\//}" + else + # change <<<>>> to (grid, block, dynamic, queue) + modified=$(convert_cuda_kernel_call "$temp") + fi + echo "${modified}" >> "${OUTPUT_FILE}" + temp="" + fi + else + echo "${line}" >> "${OUTPUT_FILE}" + fi +done < "${EMBED_HOST_FILE}" + +# Other fix on OUTPUT_FILE +# dim3 -> dim3_t (for easy replace) +# this_thread_block -> this_thread_block_t +# tiled_partition -> tiled_partition_t +# thread_id.cuh -> use local +# cooperative_group.cuh -> use local +replace_regex="s/dim3/dim3_t/g" +replace_regex="${replace_regex};s/this_thread_block/this_thread_block_t/g" +replace_regex="${replace_regex};s/this_grid/this_grid_t/g" +replace_regex="${replace_regex};s/tiled_partition/tiled_partition_t/g" +replace_regex="${replace_regex};s/thread::/thread_t::/g" +replace_regex="${replace_regex};s/bitonic_sort/bitonic_sort_t/g" +replace_regex="${replace_regex};s/reduction_array/reduction_array_t/g" +replace_regex="${replace_regex};s|cuda/components/thread_ids\.cuh|trick/thread_ids.hpp|g" +replace_regex="${replace_regex};s|cuda/components/cooperative_groups\.cuh|trick/cooperative_groups.hpp|g" +replace_regex="${replace_regex};s|cuda/components/sorting\.cuh|trick/sorting.hpp|g" +replace_regex="${replace_regex};s|cuda/components/reduction\.cuh|trick/reduction.hpp|g" +replace_regex="${replace_regex};s|CUH_|DP_HPP_|g" +# keep using original xxx.sync(); ->xxx;//.sync(); +replace_regex="${replace_regex};s|(\.sync\(\);)|;//.sync()|g" +# template macro(); lead std::length_error +replace_regex="${replace_regex};s|(template GKO.*;)|// \1|g" +sed -i -E "${replace_regex}" "${OUTPUT_FILE}" + +# Add the trick dim3_t header if dim3 is used. +if grep -Eq "dim3" ${OUTPUT_FILE}; then + # Found + sed -i '1 i#include "trick/dim3_t.hpp"' ${OUTPUT_FILE} +fi +# add the cooperative group header according to group:: because some sources forget to add it +if grep -Eq "group::" ${OUTPUT_FILE}; then + # Found + sed -i '1 i#include "trick/cooperative_groups.hpp"' ${OUTPUT_FILE} +fi + +OUTPUT_FOLDER="output" +if [[ "${VERBOSE}" == 1 ]]; then + echo "The dpct calling:" + echo "dpct --extra-arg=\"-std=c++14\" --extra-arg=\"-I ${ROOT_DIR}\" --extra-arg=\"-I ${ROOT_DIR}/include\" --extra-arg=\"-I ${ROOT_BUILD_DIR}/include\" --extra-arg=\"-I ${ROOT_DIR}/dev_tools/oneapi\" --extra-arg=\"-I ${GTEST_HEADER_DIR}\" --cuda-include-path=\"${CUDA_HEADER_DIR}\" --format-range=none ${OUTPUT_FILE} --suppress-warnings=1049 --out-root=${OUTPUT_FOLDER}" +fi + +# Delete output/source.cpp +rm "${OUTPUT_FOLDER}/${OUTPUT_FILE}" +rm "${OUTPUT_FOLDER}/${OUTPUT_FILE}.dp.cpp" + +# Call DPCT +echo "# Call DPCT on the previosly generated file." +echo "############################################" +dpct --extra-arg="-std=c++14" --extra-arg="-I ${ROOT_DIR}" --extra-arg="-I ${ROOT_DIR}/include" --extra-arg="-I ${ROOT_BUILD_DIR}/include" --extra-arg="-I ${ROOT_DIR}/dev_tools/oneapi" --extra-arg="-I ${GTEST_HEADER_DIR}" --cuda-include-path="${CUDA_HEADER_DIR}" --format-range=none ${OUTPUT_FILE} --suppress-warnings=1049 --out-root=${OUTPUT_FOLDER} +echo "############################################" +echo "# The DPCT process is end" + +dpct_file="" +if [ -f "${OUTPUT_FOLDER}/${OUTPUT_FILE}.dp.cpp" ]; then + dpct_file="${OUTPUT_FOLDER}/${OUTPUT_FILE}.dp.cpp" +elif [ -f "${OUTPUT_FOLDER}/${OUTPUT_FILE}" ]; then + dpct_file="${OUTPUT_FOLDER}/${OUTPUT_FILE}" +else + echo "No file" + exit 1 +fi + +cp "${dpct_file}" "${dpct_file}_bkp" +if [[ "${VERBOSE}" == 1 ]]; then + echo "the dpct result ${dpct_file}_bkp" + echo "recover the temporary change in the file ${dpct_file}" +fi + +# global reverse fix +replace_regex="s/dim3_t/dim3/g" +replace_regex="${replace_regex};s|trick/dim3\.hpp|dpcpp/base/dim3.dp.hpp|g" +replace_regex="${replace_regex};s/this_thread_block_t/this_thread_block/g" +replace_regex="${replace_regex};s/thread_t::/thread::/g" +replace_regex="${replace_regex};s/this_grid_t/this_grid/g" +replace_regex="${replace_regex};s/bitonic_sort_t/bitonic_sort/g" +replace_regex="${replace_regex};s/reduce_array_t/reduce_array/g" +replace_regex="${replace_regex};s/auto dpct_local_range = block;//g" +replace_regex="${replace_regex};s/sycl::nd_range<3>.*, *$/sycl_nd_range(grid, block), /g" +# do not use c-style casting. use .get() to get the pointer. +replace_regex="${replace_regex};s/\(UninitializedArray<.*> \*\)(.*\.get_pointer\(\))/\1.get()/g" +replace_regex="${replace_regex};s/tiled_partition_t/tiled_partition/g" +replace_regex="${replace_regex};s|trick/thread_ids.hpp|dpcpp/components/thread_ids.dp.hpp|g" +replace_regex="${replace_regex};s|trick/cooperative_groups\.hpp|dpcpp/components/cooperative_groups.dp.hpp|g" +replace_regex="${replace_regex};s|trick/sorting\.hpp|dpcpp/components/sorting.dp.hpp|g" +replace_regex="${replace_regex};s|trick/reduction\.hpp|dpcpp/components/reduction.dp.hpp|g" +replace_regex="${replace_regex};s/#define GET_QUEUE 0//g" +replace_regex="${replace_regex};s/GET_QUEUE/exec->get_queue()/g" +replace_regex="${replace_regex};s/cuda/dpcpp/g" +replace_regex="${replace_regex};s/Cuda/Dpcpp/g" +replace_regex="${replace_regex};s/CUDA/DPCPP/g" +replace_regex="${replace_regex};s/(cusparse|cublas)_bindings/onemkl_bindings/g" +replace_regex="${replace_regex};s/\.cuh/.dp.hpp/g" +replace_regex="${replace_regex};s/\.cu/.dp.cpp/g" +replace_regex="${replace_regex};s/${HOST_SUFFIX}//g" +replace_regex="${replace_regex};s|#include ||g" +replace_regex="${replace_regex};s|#include \"dpcpp/base/types\.hpp\"||g" +replace_regex="${replace_regex};s|#include \"dpcpp/test/utils\.hpp\"|#include \"core/test/utils.hpp\"|g" +# remove as_dpcpp_type\(content\) -> content +# If the content use some brackets, only allowed one nested bracket now. +replace_regex="${replace_regex};s/as_dpcpp_type\((([^()]*(\([^()]*\))[^()]*)*)\)/\1/g" +replace_regex="${replace_regex};s/as_dpcpp_type\(([^()]*)\)/\1/g" +# dcpt can not convert idx in static_cast of nested template +# It should be fixed already, but let's keep it +replace_regex="${replace_regex};s/threadIdx\.x/item_ct1.get_local_id(2)/g" +replace_regex="${replace_regex};s/threadIdx\.y/item_ct1.get_local_id(1)/g" +replace_regex="${replace_regex};s/threadIdx\.z/item_ct1.get_local_id(0)/g" + +replace_regex="${replace_regex};s/blockIdx\.x/item_ct1.get_group(2)/g" +replace_regex="${replace_regex};s/blockIdx\.y/item_ct1.get_group(1)/g" +replace_regex="${replace_regex};s/blockIdx\.z/item_ct1.get_group(0)/g" + +replace_regex="${replace_regex};s/blockDim\.x/item_ct1.get_local_range().get(2)/g" +replace_regex="${replace_regex};s/blockDim\.y/item_ct1.get_local_range().get(1)/g" +replace_regex="${replace_regex};s/blockDim\.z/item_ct1.get_local_range().get(0)/g" + +replace_regex="${replace_regex};s/gridDim\.x/item_ct1.get_group_range(2)/g" +replace_regex="${replace_regex};s/gridDim\.y/item_ct1.get_group_range(1)/g" +replace_regex="${replace_regex};s/gridDim\.z/item_ct1.get_group_range(0)/g" +# Workaround for abs +replace_regex="${replace_regex};s/sycl::fabs/std::abs/g" +# Remove unneed warning DPCT1049 - check the block size +# It should be properly handled by dpct, but let's keep it without hurt +replace_regex="${replace_regex};/\/\*$/{N;N;/ *\/\*\n *DPCT1049.*\n *\*\//d}" +# Recover // template GKO_... +replace_regex="${replace_regex};s|// (template GKO.*;)|\1|g" +# Recover // xxx;//.sync();->xxx.sync(); +replace_regex="${replace_regex};s|;//\.sync\(\)|.sync();|g" +replace_regex="${replace_regex};s|//remove//||g" + +sed -i -E "${replace_regex}" "${dpct_file}" + +# Include dpct.hpp if __dpct_inline__ or __dpct_align__ are found +need_dpct=$(grep -Eq "__(dpct_align|dpct_inline)__" ${dpct_file}) +if grep -Eq "__(dpct_align|dpct_inline)__" ${dpct_file}; then + # Found + sed -i '1 i#include "dpcpp/base/dpct.hpp"' ${dpct_file} +fi + +# extract device_code +if [ "${EXTRACT_KERNEL}" = "true" ]; then + IFS=';' read -ra individual_device <<< "${DEVICE_FILE}" + for variable in "${individual_device[@]}"; do + device_regex=$(convert_regex_allowed "${variable}") + dpct_device_path=$(echo "${variable}" | sed 's/common/dpcpp_code/g') + dpct_device_file=$(echo "${dpct_device_path}" | sed 's|/|@|g') + dpct_device_file="output/${dpct_device_file}" + cat ${dpct_file} | sed -n "/${device_regex} - start/,/${device_regex} - end/p" | sed "1d;\$d" > ${dpct_device_file} + sed -i "/${device_regex} - start/,/${device_regex} - end/d;s~// *#include \"${device_regex}\"~#include \"${dpct_device_path}\"~g" ${dpct_file} + dpct_dir=$(dirname "${dpct_device_path}") + mkdir -p "${ROOT_DIR}/${dpct_dir}" + cp "${dpct_device_file}" "${ROOT_DIR}/${dpct_device_path}" + done +fi + +# Integrate the new DPC++ file into the correct place +target_file=$(echo "${input}" | sed 's|cuda/|dpcpp/|g;s/\.cuh/\.dp\.hpp/g;s/\.cu/.dp.cpp/g') +target_dir=$(dirname "${target_file}") +mkdir -p "${ROOT_DIR}/${target_dir}" +echo "cp ${dpct_file} ${ROOT_DIR}/${target_file}" +cp "${dpct_file}" "${ROOT_DIR}/${target_file}" diff --git a/dev_tools/oneapi/fake_interface/cooperative_groups.cuh b/dev_tools/oneapi/fake_interface/cooperative_groups.cuh new file mode 100644 index 00000000000..752197e63c2 --- /dev/null +++ b/dev_tools/oneapi/fake_interface/cooperative_groups.cuh @@ -0,0 +1,62 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef FAKE_INTERFACE_COOPERATIVE_GROUPS_CUH_ +#define FAKE_INTERFACE_COOPERATIVE_GROUPS_CUH_ + + +#include + + +namespace gko { +namespace kernels { +namespace cuda { +namespace group { + + +__device__ __forceinline__ grid_group this_grid_i() { return this_grid(); } + +__device__ auto this_thread_block_i() { return this_thread_block(); } + +template +__device__ __forceinline__ auto tiled_partition_i(const Group &g) +{ + return ::gko::kernels::cuda::group::tiled_partition(g); +} + + +} // namespace group +} // namespace cuda +} // namespace kernels +} // namespace gko + +#endif // FAKE_INTERFACE_COOPERATIVE_GROUPS_CUH_ diff --git a/dev_tools/oneapi/shared.sh b/dev_tools/oneapi/shared.sh new file mode 100644 index 00000000000..7caee4d0c61 --- /dev/null +++ b/dev_tools/oneapi/shared.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Checks if $1 is self contained code, that is it does not have an open and +# unclosed code portion (<>()[]), e.g. `my_struct->my_func(xxx,` should fail. +check_closed() { + local str="$1" + # remove -> to avoid the confusion + str="${str//->}" + # Replace everything except begin or end characters, resp. (<[ and )>] + str_start="${str//[^(<\[]}" + str_end="${str//[^>)\]]}" + # Check that there are as many begin as end characters + if [[ "${#str_start}" -eq "${#str_end}" ]]; then + echo "true" + else + echo "false" + fi +} + +GINKGO_LICENSE_BEACON="************************************************************" +HOST_SUFFIX="_AUTOHOSTFUNC" +MAP_FILE="map_list" diff --git a/dev_tools/oneapi/working_directory/.clang-format b/dev_tools/oneapi/working_directory/.clang-format new file mode 100644 index 00000000000..ffa29df7461 --- /dev/null +++ b/dev_tools/oneapi/working_directory/.clang-format @@ -0,0 +1,108 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 1000 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... + diff --git a/cuda/solver/ir_kernels.cu b/dev_tools/oneapi/working_directory/trick/cooperative_groups.hpp similarity index 70% rename from cuda/solver/ir_kernels.cu rename to dev_tools/oneapi/working_directory/trick/cooperative_groups.hpp index 7b26ab3527f..74f2a6d1656 100644 --- a/cuda/solver/ir_kernels.cu +++ b/dev_tools/oneapi/working_directory/trick/cooperative_groups.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,45 +30,44 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "core/solver/ir_kernels.hpp" +#ifndef TRICK_COOPERATIVE_GROUPS_HPP_ +#define TRICK_COOPERATIVE_GROUPS_HPP_ -#include +#include -#include "cuda/components/thread_ids.cuh" +#include namespace gko { namespace kernels { namespace cuda { -/** - * @brief The IR solver namespace. - * - * @ingroup ir - */ -namespace ir { - - -constexpr int default_block_size = 512; - - -#include "common/solver/ir_kernels.hpp.inc" +namespace group { +template +__device__ __forceinline__ auto tiled_partition_t(const Group &g) +{ + return tiled_partition_i(g); +} -void initialize(std::shared_ptr exec, - Array *stop_status) +__device__ inline grid_group this_grid_t() { - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size(ceildiv(stop_status->get_num_elems(), block_size.x), 1, - 1); + auto tidx = threadIdx.x; + return this_grid_i(); +} - initialize_kernel<<>>( - stop_status->get_num_elems(), stop_status->get_data()); +__device__ auto this_thread_block_t() +{ + auto tidx = threadIdx.x; + return this_thread_block_i(); } -} // namespace ir +} // namespace group } // namespace cuda } // namespace kernels } // namespace gko + + +#endif // TRICK_COOPERATIVE_GROUPS_HPP_ diff --git a/dev_tools/oneapi/working_directory/trick/dim3_t.hpp b/dev_tools/oneapi/working_directory/trick/dim3_t.hpp new file mode 100644 index 00000000000..b77d597d768 --- /dev/null +++ b/dev_tools/oneapi/working_directory/trick/dim3_t.hpp @@ -0,0 +1,46 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef TRICK_DIM3_T_HPP_ +#define TRICK_DIM3_T_HPP_ + +struct dim3_t { + unsigned int x; + unsigned int y; + unsigned int z; + + dim3_t(unsigned int xval, unsigned int yval = 1, unsigned int zval = 1) : x(xval), y(yval), z(zval) {} + + operator dim3() { return dim3{x, y, z}; } +}; + +#endif // TRICK_DIM3_T_HPP_ diff --git a/dev_tools/oneapi/working_directory/trick/reduction.hpp b/dev_tools/oneapi/working_directory/trick/reduction.hpp new file mode 100644 index 00000000000..deaf0bb73b9 --- /dev/null +++ b/dev_tools/oneapi/working_directory/trick/reduction.hpp @@ -0,0 +1,57 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef TRICK_REDUCTION_HPP_ +#define TRICK_REDUCTION_HPP_ + + +#include + + +namespace gko { +namespace kernels { +namespace cuda { + + +template +void __device__ reduce_array_t(size_type size, const ValueType *__restrict__ source, ValueType *__restrict__ result, Operator reduce_op = Operator{}) +{ + auto tid = threadIdx.x; + reduce_array(size, source, result, reduce_op); +} + + +} // namespace cuda +} // namespace kernels +} // namespace gko + +#endif // TRICK_REDUCTION_HPP_ diff --git a/dev_tools/oneapi/working_directory/trick/sorting.hpp b/dev_tools/oneapi/working_directory/trick/sorting.hpp new file mode 100644 index 00000000000..2d5103a4d9d --- /dev/null +++ b/dev_tools/oneapi/working_directory/trick/sorting.hpp @@ -0,0 +1,61 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef TRICK_SORTING_HPP_ +#define TRICK_SORTING_HPP_ + + +#include + + +#include "cuda/components/sorting.cuh" + + +namespace gko { +namespace kernels { +namespace cuda { + + +template +__forceinline__ __device__ void bitonic_sort_t(ValueType *local_elements, ValueType *shared_elements) +{ + auto tidx = threadIdx.x; + bitonic_sort(local_elements, shared_elements); +} + + +} // namespace cuda +} // namespace kernels +} // namespace gko + + +#endif // TRICK_SORTING_HPP_ diff --git a/dev_tools/oneapi/working_directory/trick/thread_ids.hpp b/dev_tools/oneapi/working_directory/trick/thread_ids.hpp new file mode 100644 index 00000000000..9d946c04eff --- /dev/null +++ b/dev_tools/oneapi/working_directory/trick/thread_ids.hpp @@ -0,0 +1,286 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef TRICK_THREAD_IDS_HPP_ +#define TRICK_THREAD_IDS_HPP_ + + +#include "cuda/base/config.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The CUDA thread namespace. + * + * @ingroup cuda_thread + */ +namespace thread_t { + + +/** + * @internal + * + * Returns the ID of the block group this thread belongs to. + * + * @return the ID of the block group this thread belongs to + * + * @note Assumes that grid dimensions are in standard format: + * `(block_group_size, first_grid_dimension, second grid_dimension)` + */ +__device__ __forceinline__ size_type get_block_group_id() { return static_cast(blockIdx.z) * gridDim.y + blockIdx.y; } + +/** + * @internal + * + * Returns the ID of the block this thread belongs to. + * + * @return the ID of the block this thread belongs to + * + * @note Assumes that grid dimensions are in standard format: + * `(block_group_size, first_grid_dimension, second grid_dimension)` + */ +__device__ __forceinline__ size_type get_block_id() { return get_block_group_id() * gridDim.x + blockIdx.x; } + + +/** + * @internal + * + * Returns the local ID of the warp (relative to the block) this thread belongs + * to. + * + * @return the local ID of the warp (relative to the block) this thread belongs + * to + * + * @note Assumes that block dimensions are in standard format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` + */ +__device__ __forceinline__ size_type get_local_warp_id() { return static_cast(threadIdx.z); } + + +/** + * @internal + * + * Returns the local ID of the sub-warp (relative to the block) this thread + * belongs to. + * + * @tparam subwarp_size size of the subwarp + * + * @return the local ID of the sub-warp (relative to the block) this thread + * belongs to + * + * @note Assumes that block dimensions are in standard format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` + */ +template +__device__ __forceinline__ size_type get_local_subwarp_id() +{ + constexpr auto subwarps_per_warp = config::warp_size / subwarp_size; + return get_local_warp_id() * subwarps_per_warp + threadIdx.y; +} + + +/** + * @internal + * + * Returns the local ID of the thread (relative to the block). + * to. + * + * @tparam subwarp_size size of the subwarp + * + * @return the local ID of the thread (relative to the block) + * + * @note Assumes that block dimensions are in standard format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` + */ +template +__device__ __forceinline__ size_type get_local_thread_id() +{ + return get_local_subwarp_id() * subwarp_size + threadIdx.x; +} + + +/** + * @internal + * + * Returns the global ID of the warp this thread belongs to. + * + * @tparam warps_per_block number of warps within each block + * + * @return the global ID of the warp this thread belongs to. + * + * @note Assumes that block dimensions and grid dimensions are in standard + * format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` and + * `(block_group_size, first_grid_dimension, second grid_dimension)`, + * respectively. + */ +template +__device__ __forceinline__ size_type get_warp_id() +{ + return get_block_id() * warps_per_block + get_local_warp_id(); +} + + +/** + * @internal + * + * Returns the global ID of the sub-warp this thread belongs to. + * + * @tparam subwarp_size size of the subwarp + * + * @return the global ID of the sub-warp this thread belongs to. + * + * @note Assumes that block dimensions and grid dimensions are in standard + * format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` and + * `(block_group_size, first_grid_dimension, second grid_dimension)`, + * respectively. + */ +template +__device__ __forceinline__ size_type get_subwarp_id() +{ + constexpr auto subwarps_per_warp = config::warp_size / subwarp_size; + return get_warp_id() * subwarps_per_warp + threadIdx.y; +} + + +/** + * @internal + * + * Returns the global ID of the thread. + * + * @return the global ID of the thread. + * + * @tparam subwarp_size size of the subwarp + * + * @note Assumes that block dimensions and grid dimensions are in standard + * format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` and + * `(block_group_size, first_grid_dimension, second grid_dimension)`, + * respectively. + */ +template +__device__ __forceinline__ size_type get_thread_id() +{ + return get_subwarp_id() * subwarp_size + threadIdx.x; +} + + +/** + * @internal + * + * Returns the global ID of the thread in the given index type. + * This function assumes one-dimensional thread and block indexing. + * + * @return the global ID of the thread in the given index type. + * + * @tparam IndexType the index type + */ +template +__device__ __forceinline__ IndexType get_thread_id_flat() +{ + return threadIdx.x + static_cast(blockDim.x) * blockIdx.x; +} + + +/** + * @internal + * + * Returns the total number of threads in the given index type. + * This function assumes one-dimensional thread and block indexing. + * + * @return the total number of threads in the given index type. + * + * @tparam IndexType the index type + */ +template +__device__ __forceinline__ IndexType get_thread_num_flat() +{ + return blockDim.x * static_cast(gridDim.x); +} + + +/** + * @internal + * + * Returns the global ID of the subwarp in the given index type. + * This function assumes one-dimensional thread and block indexing + * with a power of two block size of at least subwarp_size. + * + * @return the global ID of the subwarp in the given index type. + * + * @tparam subwarp_size the size of the subwarp. Must be a power of two! + * @tparam IndexType the index type + */ +template +__device__ __forceinline__ IndexType get_subwarp_id_flat() +{ + static_assert(!(subwarp_size & (subwarp_size - 1)), "subwarp_size must be a power of two"); + return threadIdx.x / subwarp_size + static_cast(blockDim.x / subwarp_size) * blockIdx.x; +} + + +/** + * @internal + * + * Returns the total number of subwarps in the given index type. + * This function assumes one-dimensional thread and block indexing + * with a power of two block size of at least subwarp_size. + * + * @return the total number of subwarps in the given index type. + * + * @tparam subwarp_size the size of the subwarp. Must be a power of two! + * @tparam IndexType the index type + */ +template +__device__ __forceinline__ IndexType get_subwarp_num_flat() +{ + static_assert(!(subwarp_size & (subwarp_size - 1)), "subwarp_size must be a power of two"); + return blockDim.x / subwarp_size * static_cast(gridDim.x); +} + + +} // namespace thread_t +} // namespace cuda +} // namespace kernels +} // namespace gko + + +#endif // TRICK_THREAD_IDS_HPP_ diff --git a/dev_tools/plots/tuning_heatmap.R b/dev_tools/plots/tuning_heatmap.R new file mode 100644 index 00000000000..cb4fc9510a8 --- /dev/null +++ b/dev_tools/plots/tuning_heatmap.R @@ -0,0 +1,49 @@ +# These packages are required, to install them, use the package manager or open +# an R session and type: +# install.packages("jsonlite", "tidyr", "ggplot2", "scales") +library(jsonlite) +library(ggplot2) +library(scales) +library(tidyr) + +# Manage arguments +args <- commandArgs(trailingOnly=TRUE) +if (length(args)!=2) { + stop("Usage: Rscript tuning_heatmap.R input_directory output_graphics_file\n", call=FALSE) +} +input <- args[1] +output <- args[2] + +# Read the input json files into a dataframe +files <- list.files(paste(input), recursive=TRUE, pattern = "*.json", full.names=TRUE) +df_tmp <- list() +count <- 1 +for (i in files) +{ + tmp <-jsonlite::fromJSON(i,flatten=TRUE) + df_tmp[[count]] <- as.data.frame(tmp)[,c("problem.name", "problem.nonzeros", + "spmv.coo.time", "spmv.coo.tuning.values", + "spmv.coo.tuning.time")] + count <- count +1 +} +# Merge all the separate dataframes +df_merged <- rbind_pages(df_tmp) +# Unnest the two vectors +df <- as.data.frame(unnest(df,spmv.coo.tuning.values,spmv.coo.tuning.time)) +# Now that all columns are vectors, compute the speedup using vector operations +df$spmv.coo.speedup <- df$spmv.coo.time/df$spmv.coo.tuning.time + +# Plot the values +ggplot(df, aes(factor(problem.nonzeros), factor(spmv.coo.tuning.values), fill=spmv.coo.speedup)) + + geom_tile() + + scale_fill_gradientn( + colours=c("red", "yellow", "skyblue", "darkblue"), + values = rescale(c(min(df$speedup), + 1.0, + 1.11, + max(df$speedup)))) + + ggtitle("Speedup of tuned value against COO SpMV")+ xlab("nonzeros")+ ylab("tuned value (multiple)") + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), plot.title = element_text(hjust=0.5)) + +# Save to the output file +ggsave(paste(output), width=9, height=7) diff --git a/dev_tools/scripts/config b/dev_tools/scripts/config index 768f3693327..bb1d9d1810f 100644 --- a/dev_tools/scripts/config +++ b/dev_tools/scripts/config @@ -4,23 +4,53 @@ - FixInclude: "ginkgo/core/base/executor.hpp" - "hip/base/config.hip.hpp" - FixInclude: "hip/hip_runtime.h" -- "(cuda|hip|omp)/test/factorization/par_ilu_kernels" +- "(cuda|hip|omp|dpcpp)/test/factorization/par_ilu_kernels" - FixInclude: "core/factorization/par_ilu_kernels.hpp" -- "(cuda|hip)/preconditioner/jacobi_" +- "(cuda|hip|omp|dpcpp)/test/factorization/par_ilut_kernels" + - FixInclude: "core/factorization/par_ilut_kernels.hpp" +- "(cuda|hip|omp|dpcpp)/test/factorization/par_ict_kernels" + - FixInclude: "core/factorization/par_ict_kernels.hpp" +- "(cuda|hip|omp|dpcpp)/test/factorization/par_ic_kernels" + - FixInclude: "core/factorization/par_ic_kernels.hpp" +- "cuda/factorization/par_ilut_select_common" + - FixInclude: "cuda/factorization/par_ilut_select_common.cuh" +- "hip/factorization/par_ilut_select_common" + - FixInclude: "hip/factorization/par_ilut_select_common.hip.hpp" +- "(cuda|hip|dpcpp)/factorization/par_ilut_" + - FixInclude: "core/factorization/par_ilut_kernels.hpp" +- "(cuda|hip|dpcpp)/factorization/par_ict_" + - FixInclude: "core/factorization/par_ict_kernels.hpp" +- "(cuda|hip|dpcpp)/preconditioner/jacobi_" - FixInclude: "core/preconditioner/jacobi_kernels.hpp" +- "(cuda|hip|dpcpp|omp)/base/kernel_launch\." + - FixInclude: "common/unified/base/kernel_launch.hpp" +- "(cuda|hip|dpcpp|omp)/test/base/kernel_launch\." + - FixInclude: "common/unified/base/kernel_launch.hpp" +- "(cuda|hip|dpcpp|omp)/base/kernel_launch_solver\." + - FixInclude: "common/unified/base/kernel_launch_solver.hpp" - "core/test/base/(extended_float|iterator_factory)" - RemoveTest: "true" +- "core/test/base/allocator" + - FixInclude: "core/base/allocator.hpp" +- "reference/test/base/utils" + - FixInclude: "core/base/utils.hpp" - "_builder\.cpp" - RemoveTest: "true" - "_builder\.hpp" - CoreSuffix: "_builder" -- "components.*_kernels(\.hip)?\.(cu|cpp|hpp|cuh)" +- "dpcpp/test/base/dim3\.dp\.cpp" + - FixInclude: "dpcpp/base/dim3.dp.hpp" +- "components.*_kernels(\.hip|\.dp)?\.(cu|cpp|hpp|cuh)" - CoreSuffix: "_kernels" - RemoveTest: "true" - "components" - RemoveTest: "true" - PathIgnore: "1" - PathPrefix: "core" +- "test/base/kernel_launch" + - RemoveTest: "true" + - PathIgnore: "1" + - PathPrefix: "(cuda|hip|omp|dpcpp)" - "test/utils" - CoreSuffix: "_test" - PathIgnore: "1" diff --git a/dev_tools/scripts/create_new_algorithm.sh b/dev_tools/scripts/create_new_algorithm.sh index f6893f68c82..707e5803283 100755 --- a/dev_tools/scripts/create_new_algorithm.sh +++ b/dev_tools/scripts/create_new_algorithm.sh @@ -99,11 +99,13 @@ TEMPLATE_FILES=( "${name}_kernels.cpp" "${name}_*.[ch]*" "${name}_kernels.hip.cpp" + "${name}_kernels.dp.cpp" "${name}.cpp" "${name}_kernels.cpp" "${name}_kernels.cpp" "${name}_kernels.cpp" "${name}_kernels.*" + "${name}_kernels.*" ) CMAKE_FILES=( "core/CMakeLists.txt" @@ -113,11 +115,13 @@ CMAKE_FILES=( "omp/CMakeLists.txt" "cuda/CMakeLists.txt" "hip/CMakeLists.txt" + "dpcpp/CMakeLists.txt" "core/test/$source_type/CMakeLists.txt" "reference/test/$source_type/CMakeLists.txt" "omp/test/$source_type/CMakeLists.txt" "cuda/test/$source_type/CMakeLists.txt" "hip/test/$source_type/CMakeLists.txt" + "dpcpp/test/$source_type/CMakeLists.txt" ) TEMPLATE_FILES_LOCATIONS=( "core/$source_type" @@ -127,11 +131,13 @@ TEMPLATE_FILES_LOCATIONS=( "omp/$source_type" "cuda/$source_type" "hip/$source_type" + "dpcpp/$source_type" "core/test/$source_type" "reference/test/$source_type" "omp/test/$source_type" "cuda/test/$source_type" "hip/test/$source_type" + "dpcpp/test/$source_type" ) TEMPLATE_FILES_TYPES=( "$source_type file" @@ -141,11 +147,13 @@ TEMPLATE_FILES_TYPES=( "OpenMP kernel file" "CUDA kernel file" "HIP kernel file" + "DPC++ kernel file" "unit tests for ${name} $source_type" "unit tests for ${name} reference kernels" "unit tests for ${name} OMP kernels" "unit tests for ${name} CUDA kernels" "unit tests for ${name} HIP kernels" + "unit tests for ${name} DPC++ kernels" ) TEMPLATE_FILES_DESCRIPTIONS=( "This is where the ${name} algorithm needs to be implemented." @@ -155,11 +163,13 @@ TEMPLATE_FILES_DESCRIPTIONS=( "OMP kernels for ${name} need to be implemented here." "CUDA kernels for ${name} need to be implemented here." "HIP kernels for ${name} need to be implemented here." + "DPC++ kernels for ${name} need to be implemented here." "This is where core related unit tests should be implemented, i.e. relating to the interface without executor usage." "This is where tests with the Reference executor should be implemented. Usually, this means comparing against previously known values." "This is where tests with the OpenMP executor should be implemented. Usually, this means comparing against a Reference execution." "This is where tests with the CUDA executor should be implemented. Usually, this means comparing against a Reference execution." "This is where tests with the HIP executor should be implemented. Usually, this means comparing against a Reference execution." + "This is where tests with the DPC++ executor should be implemented. Usually, this means comparing against a Reference execution." ) mkdir ${TMPDIR} @@ -389,6 +399,10 @@ then echo "hip/test/${source_type}/CMakeLists.txt" | tee -a todo_${name}.txt echo "" | tee -a todo_${name}.txt echo "" | tee -a todo_${name}.txt + echo "dpcpp/CMakeLists.txt" | tee -a todo_${name}.txt + echo "dpcpp/test/${source_type}/CMakeLists.txt" | tee -a todo_${name}.txt + echo "" | tee -a todo_${name}.txt + echo "" | tee -a todo_${name}.txt echo "The following header file has to be modified:" | tee -a todo_${name}.txt echo "core/device_hooks/common_kernels.inc.cpp" | tee -a todo_${name}.txt echo "Equivalent to the other solvers, the following part has to be appended:" | tee -a todo_${name}.txt diff --git a/dev_tools/scripts/format_header.sh b/dev_tools/scripts/format_header.sh index 21b92419ccd..7a24f50bef6 100755 --- a/dev_tools/scripts/format_header.sh +++ b/dev_tools/scripts/format_header.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +CLANG_FORMAT=${CLANG_FORMAT:="clang-format"} + convert_header () { local regex="^(#include )(<|\")(.*)(\"|>)$" if [[ $@ =~ ${regex} ]]; then @@ -10,7 +12,7 @@ convert_header () { else echo "#include \"${header_file}\"" fi - elif [ "${header_file}" = "matrices/config.hpp" ]; then + elif [ "${header_file}" = "matrices/config.hpp" ]; then echo "#include \"${header_file}\"" else echo "#include <${header_file}>" @@ -23,7 +25,7 @@ convert_header () { get_header_def () { local regex="\.(hpp|cuh)" if [[ $@ =~ $regex ]]; then - local def=$(echo "$@" | sed -E "s~include/ginkgo/~~g;s~/|\.~_~g") + local def=$(echo "$@" | sed -E "s~include/ginkgo/~PUBLIC_~g;s~/|\.~_~g") def=$(echo GKO_${def^^}_) echo $def else @@ -48,7 +50,7 @@ remove_regroup () { # - CoreSuffix: "core_suffix_regex" (default "") # - PathPrefix: "path_prefix_regex" (default "") # - PathIgnore: "path_ignore_number" (default "0") -# - RemoveTest: "false/true" (default "test") +# - RemoveTest: "false/true" (default "false") # - FixInclude: "the specific main header" (default "") # Only "file_regex" without any setting is fine, and it means find the same name with header suffix # For example, /path/to/file.cpp will change to /path/to/file.hpp @@ -104,9 +106,9 @@ get_include_regex () { if [ ! -z "${path_prefix}" ]; then path_prefix="${path_prefix}/" fi - local_output=$(echo "${file}" | sed -E "s~\.hip~~g;s~$path_regex~$path_prefix\2~g") + local_output=$(echo "${file}" | sed -E "s~\.(hip|dp)~~g;s~$path_regex~$path_prefix\2~g") local_output=$(echo "${local_output}" | sed -E "s~$core_suffix$~~g") - local_output="#include (<|\")$local_output\.(hpp|hip\.hpp|cuh)(\"|>)" + local_output="#include (<|\")$local_output\.(hpp|hip\.hpp|dp\.hpp|cuh)(\"|>)" if [ "${remove_test}" = "true" ]; then local_output=$(echo "${local_output}" | sed -E "s~test/~~g") fi @@ -152,12 +154,11 @@ CONSIDER_REGEX="${START_BLOCK_REX}|${END_BLOCK_REX}|${COMMENT_REGEX}|${INCLUDE_R # This part capture the main header and give the possible fail arrangement information while IFS='' read -r line || [ -n "$line" ]; do - if [ "${line}" = '#include "hip/hip_runtime.h"' ] && [ "${SKIP}" = "true" ]; then - HAS_HIP_RUNTIME="true" - elif [ "${line}" = "/*${GINKGO_LICENSE_BEACON}" ] || [ "${DURING_LICENSE}" = "true" ]; then + if [ "${line}" = "/*${GINKGO_LICENSE_BEACON}" ] || [ "${DURING_LICENSE}" = "true" ]; then DURING_LICENSE="true" if [ "${line}" = "${GINKGO_LICENSE_BEACON}*/" ]; then DURING_LICENSE="false" + SKIP="true" fi elif [ "${SKIP}" = "true" ] && ([ "$line" = "${FORCE_TOP_ON}" ] || [ "${DURING_FORCE_TOP}" = "true" ]); then DURING_FORCE_TOP="true" @@ -249,7 +250,7 @@ fi # Write the main header and give warnning if there are multiple matches if [ -f "${BEFORE}" ]; then # sort or remove the duplication - clang-format -i -style=file ${BEFORE} + "${CLANG_FORMAT}" -i -style=file ${BEFORE} if [ $(wc -l < ${BEFORE}) -gt "1" ]; then echo "Warning $1: there are multiple main header matchings" fi @@ -261,26 +262,23 @@ if [ -f "${BEFORE}" ]; then rm "${BEFORE}" fi -# Arrange the remain files and give +# Arrange the remain files and give if [ -f "${CONTENT}" ]; then add_regroup - if [ "${HAS_HIP_RUNTIME}" = "true" ]; then - echo "#include " > temp - fi head -n -${KEEP_LINES} ${CONTENT} >> temp if [ ! -z "${IFNDEF}" ] && [ ! -z "${DEFINE}" ]; then # Ignore the last line #endif if [[ "${LAST_NONEMPTY}" =~ $ENDIF_REX ]]; then head -n -1 temp > ${CONTENT} echo "#endif // $HEADER_DEF" >> ${CONTENT} - else + else echo "Warning $1: Found the begin header_def but did not find the end of header_def" cat temp > ${CONTENT} fi else cat temp > "${CONTENT}" fi - clang-format -i -style=file "${CONTENT}" + "${CLANG_FORMAT}" -i -style=file "${CONTENT}" rm temp remove_regroup PREV_INC=0 diff --git a/dev_tools/scripts/regroup b/dev_tools/scripts/regroup index 036d5d81588..1756481e2e4 100644 --- a/dev_tools/scripts/regroup +++ b/dev_tools/scripts/regroup @@ -2,7 +2,7 @@ IncludeBlocks: Regroup IncludeCategories: - Regex: '^<(rapidjson|gflags|gtest|papi).*' Priority: 3 - - Regex: '^<(omp|cu|hip|thrust).*' + - Regex: '^<(omp|cu|hip|thrust|CL/|cooperative|oneapi).*' Priority: 2 - Regex: '^****************************** +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +namespace gko { + + +std::shared_ptr CudaExecutor::get_master() noexcept +{ + return master_; +} + + +std::shared_ptr CudaExecutor::get_master() const noexcept +{ + return master_; +} + + +bool CudaExecutor::verify_memory_to(const CudaExecutor *dest_exec) const +{ + return this->get_device_id() == dest_exec->get_device_id(); +} + + +bool CudaExecutor::verify_memory_to(const HipExecutor *dest_exec) const +{ +#if GINKGO_HIP_PLATFORM_NVCC + return this->get_device_id() == dest_exec->get_device_id(); +#else + return false; +#endif +} + + +void CudaExecutor::increase_num_execs(unsigned device_id) +{ +#ifdef GKO_COMPILING_CUDA_DEVICE + // increase the Cuda Device count only when ginkgo build cuda + std::lock_guard guard(nvidia_device::get_mutex(device_id)); + nvidia_device::get_num_execs(device_id)++; +#endif // GKO_COMPILING_CUDA_DEVICE +} + + +void CudaExecutor::decrease_num_execs(unsigned device_id) +{ +#ifdef GKO_COMPILING_CUDA_DEVICE + // increase the Cuda Device count only when ginkgo build cuda + std::lock_guard guard(nvidia_device::get_mutex(device_id)); + nvidia_device::get_num_execs(device_id)--; +#endif // GKO_COMPILING_CUDA_DEVICE +} + + +unsigned CudaExecutor::get_num_execs(unsigned device_id) +{ + std::lock_guard guard(nvidia_device::get_mutex(device_id)); + return nvidia_device::get_num_execs(device_id); +} + + +} // namespace gko diff --git a/devices/device.cpp b/devices/device.cpp new file mode 100644 index 00000000000..ca6597d652d --- /dev/null +++ b/devices/device.cpp @@ -0,0 +1,71 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include + + +#include + + +namespace gko { + + +std::mutex &nvidia_device::get_mutex(int i) +{ + static std::mutex mutex[max_devices]; + return mutex[i]; +} + + +int &nvidia_device::get_num_execs(int i) +{ + static int num_execs[max_devices]; + return num_execs[i]; +} + + +std::mutex &amd_device::get_mutex(int i) +{ + static std::mutex mutex[max_devices]; + return mutex[i]; +} + + +int &amd_device::get_num_execs(int i) +{ + static int num_execs[max_devices]; + return num_execs[i]; +} + + +} // namespace gko diff --git a/devices/dpcpp/CMakeLists.txt b/devices/dpcpp/CMakeLists.txt new file mode 100644 index 00000000000..f6fdb354ff2 --- /dev/null +++ b/devices/dpcpp/CMakeLists.txt @@ -0,0 +1,2 @@ +ginkgo_add_object_library(ginkgo_dpcpp_device + executor.cpp) diff --git a/core/devices/cuda/executor.cpp b/devices/dpcpp/executor.cpp similarity index 84% rename from core/devices/cuda/executor.cpp rename to devices/dpcpp/executor.cpp index 3566578a681..7a32d5e49a3 100644 --- a/core/devices/cuda/executor.cpp +++ b/devices/dpcpp/executor.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,25 +33,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include +#include + + +#include +#include + + namespace gko { -std::shared_ptr CudaExecutor::get_master() noexcept +std::shared_ptr DpcppExecutor::get_master() noexcept { return master_; } -std::shared_ptr CudaExecutor::get_master() const noexcept +std::shared_ptr DpcppExecutor::get_master() const noexcept { return master_; } -unsigned CudaExecutor::num_execs[max_devices]; - - -std::mutex CudaExecutor::mutex[max_devices]; - - } // namespace gko diff --git a/devices/hip/CMakeLists.txt b/devices/hip/CMakeLists.txt new file mode 100644 index 00000000000..d05db433984 --- /dev/null +++ b/devices/hip/CMakeLists.txt @@ -0,0 +1,5 @@ +ginkgo_add_object_library(ginkgo_hip_device + executor.cpp) +if(GINKGO_BUILD_HIP) + target_compile_definitions(ginkgo_hip_device PRIVATE GKO_COMPILING_HIP_DEVICE) +endif() diff --git a/devices/hip/executor.cpp b/devices/hip/executor.cpp new file mode 100644 index 00000000000..53da0db72c2 --- /dev/null +++ b/devices/hip/executor.cpp @@ -0,0 +1,98 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +namespace gko { + + +std::shared_ptr HipExecutor::get_master() noexcept { return master_; } + + +std::shared_ptr HipExecutor::get_master() const noexcept +{ + return master_; +} + + +bool HipExecutor::verify_memory_to(const HipExecutor *dest_exec) const +{ + return this->get_device_id() == dest_exec->get_device_id(); +} + + +bool HipExecutor::verify_memory_to(const CudaExecutor *dest_exec) const +{ +#if GINKGO_HIP_PLATFORM_NVCC + return this->get_device_id() == dest_exec->get_device_id(); +#else + return false; +#endif +} + + +#if (GINKGO_HIP_PLATFORM_NVCC == 1) +using hip_device_class = nvidia_device; +#else +using hip_device_class = amd_device; +#endif + + +void HipExecutor::increase_num_execs(int device_id) +{ +#ifdef GKO_COMPILING_HIP_DEVICE + // increase the HIP Device count only when ginkgo build hip + std::lock_guard guard(hip_device_class::get_mutex(device_id)); + hip_device_class::get_num_execs(device_id)++; +#endif // GKO_COMPILING_HIP_DEVICE +} + + +void HipExecutor::decrease_num_execs(int device_id) +{ +#ifdef GKO_COMPILING_HIP_DEVICE + // increase the HIP Device count only when ginkgo build hip + std::lock_guard guard(hip_device_class::get_mutex(device_id)); + hip_device_class::get_num_execs(device_id)--; +#endif // GKO_COMPILING_HIP_DEVICE +} + + +int HipExecutor::get_num_execs(int device_id) +{ + std::lock_guard guard(hip_device_class::get_mutex(device_id)); + return hip_device_class::get_num_execs(device_id); +} + + +} // namespace gko diff --git a/devices/machine_topology.cpp b/devices/machine_topology.cpp new file mode 100644 index 00000000000..4028ad31dae --- /dev/null +++ b/devices/machine_topology.cpp @@ -0,0 +1,261 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include + + +#include + + +namespace gko { + + +namespace detail { + + +class topo_bitmap { +public: + using bitmap_type = hwloc_bitmap_s; +#if GKO_HAVE_HWLOC + topo_bitmap() : bitmap(hwloc_bitmap_alloc()) {} + ~topo_bitmap() { hwloc_bitmap_free(bitmap); } +#endif + bitmap_type *get() { return bitmap; } + +private: + bitmap_type *bitmap; +}; + + +hwloc_topology *init_topology() +{ +#if GKO_HAVE_HWLOC + hwloc_topology_t tmp; + hwloc_topology_init(&tmp); + + hwloc_topology_set_io_types_filter(tmp, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); + hwloc_topology_set_type_filter(tmp, HWLOC_OBJ_BRIDGE, + HWLOC_TYPE_FILTER_KEEP_NONE); + hwloc_topology_set_type_filter(tmp, HWLOC_OBJ_OS_DEVICE, + HWLOC_TYPE_FILTER_KEEP_IMPORTANT); + hwloc_topology_load(tmp); + + return tmp; +#else + return nullptr; +#endif +} + + +} // namespace detail + + +const MachineTopology::io_obj_info *MachineTopology::get_pci_device( + const std::string &pci_bus_id) const +{ + for (size_type id = 0; id < this->pci_devices_.size(); ++id) { + if (this->pci_devices_[id].pci_bus_id.compare(0, 12, pci_bus_id, 0, + 12) == 0) { + return &this->pci_devices_[id]; + } + } + return nullptr; +} + + +MachineTopology::MachineTopology() +{ +#if GKO_HAVE_HWLOC + + // Initialize the topology from hwloc + this->topo_ = hwloc_manager(detail::init_topology(), + hwloc_topology_destroy); + // load objects of type Package . See HWLOC_OBJ_PACKAGE for more details. + load_objects(HWLOC_OBJ_PACKAGE, this->packages_); + // load objects of type NUMA Node. See HWLOC_OBJ_NUMANODE for more details. + load_objects(HWLOC_OBJ_NUMANODE, this->numa_nodes_); + // load objects of type Core. See HWLOC_OBJ_CORE for more details. + load_objects(HWLOC_OBJ_CORE, this->cores_); + // load objects of type processing unit(PU). See HWLOC_OBJ_PU for more + // details. + load_objects(HWLOC_OBJ_PU, this->pus_); + // load objects of type PCI Devices See HWLOC_OBJ_PCI_DEVICE for more + // details. + load_objects(HWLOC_OBJ_PCI_DEVICE, this->pci_devices_); + num_numas_ = hwloc_get_nbobjs_by_type(this->topo_.get(), HWLOC_OBJ_PACKAGE); + +#else + + this->topo_ = hwloc_manager(); + +#endif +} + + +void MachineTopology::hwloc_binding_helper( + const std::vector &obj, + const std::vector &bind_ids, const bool singlify) const +{ +#if GKO_HAVE_HWLOC + detail::topo_bitmap bitmap_toset; + auto num_ids = bind_ids.size(); + auto id = bind_ids.data(); + // Set the given ids to a bitmap + for (size_type i = 0; i < num_ids; ++i) { + GKO_ASSERT(id[i] < obj.size()); + GKO_ASSERT(id[i] >= 0); + hwloc_bitmap_set(bitmap_toset.get(), obj[id[i]].os_id); + } + + // Singlify to reduce expensive migrations, if asked for. + if (singlify) { + hwloc_bitmap_singlify(bitmap_toset.get()); + } + hwloc_set_cpubind(this->topo_.get(), bitmap_toset.get(), 0); +#endif +} + + +void MachineTopology::load_objects( + hwloc_obj_type_t type, + std::vector &objects) const +{ +#if GKO_HAVE_HWLOC + // Get the number of normal objects of a certain type (Core, PU, Machine + // etc.). + unsigned num_objects = hwloc_get_nbobjs_by_type(this->topo_.get(), type); + objects.reserve(num_objects); + for (unsigned i = 0; i < num_objects; i++) { + // Get the actual normal object of the given type. + hwloc_obj_t obj = hwloc_get_obj_by_type(this->topo_.get(), type, i); + objects.push_back(normal_obj_info{obj, obj->logical_index, + obj->os_index, obj->gp_index, + hwloc_bitmap_first(obj->nodeset)}); + } +#endif +} + + +inline int MachineTopology::get_obj_id_by_os_index( + const std::vector &objects, + size_type os_index) const +{ +#if GKO_HAVE_HWLOC + for (size_type id = 0; id < objects.size(); ++id) { + if (objects[id].os_id == os_index) { + return id; + } + } +#endif + return -1; +} + + +inline int MachineTopology::get_obj_id_by_gp_index( + const std::vector &objects, + size_type gp_index) const +{ +#if GKO_HAVE_HWLOC + for (size_type id = 0; id < objects.size(); ++id) { + if (objects[id].gp_id == gp_index) { + return id; + } + } +#endif + return -1; +} + + +void MachineTopology::load_objects( + hwloc_obj_type_t type, + std::vector &vector) const +{ +#if GKO_HAVE_HWLOC + GKO_ASSERT(this->cores_.size() != 0); + GKO_ASSERT(this->pus_.size() != 0); + unsigned num_objects = hwloc_get_nbobjs_by_type(this->topo_.get(), type); + vector.reserve(num_objects); + for (unsigned i = 0; i < num_objects; i++) { + // Get the actual PCI object. + hwloc_obj_t obj = hwloc_get_obj_by_type(this->topo_.get(), type, i); + // Get the non-IO ancestor (which is the closest and the one that can be + // bound to) of the object. + auto ancestor = hwloc_get_non_io_ancestor_obj(this->topo_.get(), obj); + // Create the object. + vector.push_back( + io_obj_info{obj, obj->logical_index, obj->os_index, obj->gp_index, + hwloc_bitmap_first(ancestor->nodeset), ancestor}); + // Get the corresponding cpuset of the ancestor nodeset + detail::topo_bitmap ancestor_cpuset; + hwloc_cpuset_from_nodeset(this->topo_.get(), ancestor_cpuset.get(), + ancestor->nodeset); + // Find the cpu objects closest to this device from the ancestor cpuset + // and store their ids for binding purposes + int closest_pu_id = -1; + int closest_os_id = hwloc_bitmap_first(ancestor_cpuset.get()); + // clang-format off + hwloc_bitmap_foreach_begin(closest_os_id, ancestor_cpuset.get()) + closest_pu_id = get_obj_id_by_os_index(this->pus_, closest_os_id); + vector.back().closest_pu_ids.push_back(closest_pu_id); + hwloc_bitmap_foreach_end(); + // clang-format on + + // Get local id of the ancestor object. + if (hwloc_compare_types(ancestor->type, HWLOC_OBJ_PACKAGE) == 0) { + vector.back().ancestor_local_id = + get_obj_id_by_gp_index(this->packages_, ancestor->gp_index); + } else if (hwloc_compare_types(ancestor->type, HWLOC_OBJ_CORE) == 0) { + vector.back().ancestor_local_id = + get_obj_id_by_gp_index(this->cores_, ancestor->gp_index); + } else if (hwloc_compare_types(ancestor->type, HWLOC_OBJ_NUMANODE) == + 0) { + vector.back().ancestor_local_id = + get_obj_id_by_gp_index(this->numa_nodes_, ancestor->gp_index); + } + // Get type of the ancestor object and store it as a string. + char ances_type[24]; + hwloc_obj_type_snprintf(ances_type, sizeof(ances_type), ancestor, 0); + vector.back().ancestor_type = std::string(ances_type); + // Write the PCI Bus ID from the object info. + char pci_bus_id[14]; + snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%01x", + obj->attr->pcidev.domain, obj->attr->pcidev.bus, + obj->attr->pcidev.dev, obj->attr->pcidev.func); + vector.back().pci_bus_id = std::string(pci_bus_id); + } +#endif +} + + +} // namespace gko diff --git a/core/devices/omp/CMakeLists.txt b/devices/omp/CMakeLists.txt similarity index 100% rename from core/devices/omp/CMakeLists.txt rename to devices/omp/CMakeLists.txt diff --git a/core/devices/omp/executor.cpp b/devices/omp/executor.cpp similarity index 86% rename from core/devices/omp/executor.cpp rename to devices/omp/executor.cpp index e53a1b53c43..3808d42969e 100644 --- a/core/devices/omp/executor.cpp +++ b/devices/omp/executor.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -44,6 +44,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { +void OmpExecutor::populate_exec_info(const MachineTopology *mach_topo) +{ + auto num_cores = + (mach_topo->get_num_cores() == 0 ? 1 : mach_topo->get_num_cores()); + auto num_pus = + (mach_topo->get_num_pus() == 0 ? 1 : mach_topo->get_num_pus()); + this->get_exec_info().num_computing_units = num_cores; + this->get_exec_info().num_pu_per_cu = num_pus / num_cores; +} + + void OmpExecutor::raw_free(void *ptr) const noexcept { std::free(ptr); } diff --git a/core/devices/reference/CMakeLists.txt b/devices/reference/CMakeLists.txt similarity index 100% rename from core/devices/reference/CMakeLists.txt rename to devices/reference/CMakeLists.txt diff --git a/core/devices/reference/dummy.cpp b/devices/reference/dummy.cpp similarity index 97% rename from core/devices/reference/dummy.cpp rename to devices/reference/dummy.cpp index a2f3f380cbe..1c2a9b39c48 100644 --- a/core/devices/reference/dummy.cpp +++ b/devices/reference/dummy.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/DoxygenLayout.xml b/doc/DoxygenLayout.xml index 268f8348145..b31ab5bfc0b 100644 --- a/doc/DoxygenLayout.xml +++ b/doc/DoxygenLayout.xml @@ -5,7 +5,11 @@ - + + + + + diff --git a/doc/conf/Doxyfile.in b/doc/conf/Doxyfile.in index d3442212a21..4ec815c0164 100644 --- a/doc/conf/Doxyfile.in +++ b/doc/conf/Doxyfile.in @@ -12,7 +12,7 @@ INPUT = INCLUDE_PATH = @DIR_BASE@/include @DIR_BASE@ OUTPUT_DIRECTORY = @DIR_OUT@ -EXAMPLE_PATH = @CMAKE_BINARY_DIR@/doc/examples +EXAMPLE_PATH = @PROJECT_BINARY_DIR@/doc/examples RECURSIVE = YES EXAMPLE_RECURSIVE = NO FILE_PATTERNS = *.cpp *.cu *.hpp *.cuh *.md diff --git a/doc/examples/CMakeLists.txt b/doc/examples/CMakeLists.txt index bc256aac6c6..9363ea5e1dd 100644 --- a/doc/examples/CMakeLists.txt +++ b/doc/examples/CMakeLists.txt @@ -1,32 +1,32 @@ # Collect all of the directory names for the examples programs FILE(GLOB _ginkgo_examples - ${CMAKE_SOURCE_DIR}/examples/* + ${PROJECT_SOURCE_DIR}/examples/* ) -LIST(REMOVE_ITEM _ginkgo_examples "${CMAKE_SOURCE_DIR}/examples/CMakeLists.txt" "${CMAKE_SOURCE_DIR}/examples/build-setup.sh") +LIST(REMOVE_ITEM _ginkgo_examples "${PROJECT_SOURCE_DIR}/examples/CMakeLists.txt" "${PROJECT_SOURCE_DIR}/examples/build-setup.sh") ADD_CUSTOM_TARGET(examples) file(GLOB _ginkgo_examples_tooltip - ${CMAKE_SOURCE_DIR}/examples/*/doc/tooltip + ${PROJECT_SOURCE_DIR}/examples/*/doc/tooltip ) file(GLOB _ginkgo_examples_kind - ${CMAKE_SOURCE_DIR}/examples/*/doc/kind + ${PROJECT_SOURCE_DIR}/examples/*/doc/kind ) file(GLOB _ginkgo_examples_buildson - ${CMAKE_SOURCE_DIR}/examples/*/doc/builds-on + ${PROJECT_SOURCE_DIR}/examples/*/doc/builds-on ) ADD_CUSTOM_COMMAND( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/examples.hpp COMMAND ${PERL_EXECUTABLE} ARGS - ${CMAKE_SOURCE_DIR}/doc/scripts/examples.pl + ${PROJECT_SOURCE_DIR}/doc/scripts/examples.pl ${CMAKE_CURRENT_SOURCE_DIR}/examples.hpp.in ${_ginkgo_examples} > ${CMAKE_CURRENT_BINARY_DIR}/examples.hpp DEPENDS - ${CMAKE_SOURCE_DIR}/doc/scripts/examples.pl + ${PROJECT_SOURCE_DIR}/doc/scripts/examples.pl ${CMAKE_CURRENT_SOURCE_DIR}/examples.hpp.in ${_ginkgo_examples_tooltip} ${_ginkgo_examples_kind} @@ -49,12 +49,12 @@ FOREACH(example ${_ginkgo_examples}) OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${example}.cpp COMMAND ${PERL_EXECUTABLE} ARGS - ${CMAKE_SOURCE_DIR}/doc/scripts/program2plain - < ${CMAKE_SOURCE_DIR}/examples/${example}/${example}.cpp + ${PROJECT_SOURCE_DIR}/doc/scripts/program2plain + < ${PROJECT_SOURCE_DIR}/examples/${example}/${example}.cpp > ${CMAKE_CURRENT_BINARY_DIR}/${example}.cpp DEPENDS - ${CMAKE_SOURCE_DIR}/doc/scripts/program2plain - ${CMAKE_SOURCE_DIR}/examples/${example}/${example}.cpp + ${PROJECT_SOURCE_DIR}/doc/scripts/program2plain + ${PROJECT_SOURCE_DIR}/examples/${example}/${example}.cpp VERBATIM ) @@ -62,19 +62,19 @@ FOREACH(example ${_ginkgo_examples}) OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${example}.hpp COMMAND ${PERL_EXECUTABLE} ARGS - ${CMAKE_SOURCE_DIR}/doc/scripts/make_example.pl - ${example} ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} + ${PROJECT_SOURCE_DIR}/doc/scripts/make_example.pl + ${example} ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR} > ${CMAKE_CURRENT_BINARY_DIR}/${example}.hpp WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS - ${CMAKE_SOURCE_DIR}/doc/scripts/make_example.pl - ${CMAKE_SOURCE_DIR}/doc/scripts/intro2toc - ${CMAKE_SOURCE_DIR}/doc/scripts/create_anchors - ${CMAKE_SOURCE_DIR}/doc/scripts/program2doxygen - ${CMAKE_SOURCE_DIR}/examples/${example}/${example}.cpp - ${CMAKE_SOURCE_DIR}/examples/${example}/doc/intro.dox - ${CMAKE_SOURCE_DIR}/examples/${example}/doc/results.dox + ${PROJECT_SOURCE_DIR}/doc/scripts/make_example.pl + ${PROJECT_SOURCE_DIR}/doc/scripts/intro2toc + ${PROJECT_SOURCE_DIR}/doc/scripts/create_anchors + ${PROJECT_SOURCE_DIR}/doc/scripts/program2doxygen + ${PROJECT_SOURCE_DIR}/examples/${example}/${example}.cpp + ${PROJECT_SOURCE_DIR}/examples/${example}/doc/intro.dox + ${PROJECT_SOURCE_DIR}/examples/${example}/doc/results.dox ) ADD_CUSTOM_TARGET(examples_${example} diff --git a/doc/examples/examples.hpp.in b/doc/examples/examples.hpp.in index cf0b01f9a04..adc7287a337 100644 --- a/doc/examples/examples.hpp.in +++ b/doc/examples/examples.hpp.in @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -57,8 +57,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * By default, Ginkgo is compiled with at least * -DGINKGO_BUILD_REFERENCE=ON. - * To execute on a GPU, you need to have a GPU on the system and must have - * compiled Ginkgo with the -DGINKGO_BUILD_CUDA=ON option. + * Ginkgo also tries to detect your environment setup (presence of CUDA, ...) to + * enable the relevant accelerator modules. If you want to target a specific + * GPU, make sure that Ginkgo is compiled with the accelerator specific module + * enabled, such as: + *
    + *
  1. -DGINKGO_BUILD_CUDA=ON option for NVIDIA GPUs. + *
  2. -DGINKGO_BUILD_HIP=ON option for AMD or NVIDIA GPUs. + *
  3. -DGINKGO_BUILD_DPCPP=ON option for Intel GPUs (and + * possibly any other platform). + *
* @anchor ExampleConnectionGraph @@ -127,12 +135,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * - * @ref twentyseven_pt_stencil_solver - * Using a twentyseven point 3D stencil to solve the poisson equation - * with array views. - * - * - * * @ref external_lib_interfacing * Using Ginkgo's solver with the external library deal.II. * @@ -185,6 +187,60 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * factorization. * * + * + * @ref mixed_spmv + * Shows the Ginkgo mixed precision spmv functionality. + * + * + * + * @ref mixed_precision_ir + * Manual implementation of a Mixed Precision Iterative Refinement + * (MPIR) solver. + * + * + * + * @ref adaptiveprecision_blockjacobi + * Shows how to use the adaptive precision block-Jacobi + * preconditioner. + * + * + * + * @ref cb_gmres + * Using the Ginkgo CB-GMRES solver (Compressed Basis GMRES). + * + * + * + * @ref heat_equation + * Solving a 2D heat equation and showing matrix assembly, vector + * initalization and solver setup in a more complex setting with + * output visualization. + * + * + * + * @ref iterative_refinement + * Using a low accuracy CG solver as an inner solver to an iterative + * refinement (IR) method which solves a linear system. + * + * + * + * @ref ir_ilu_preconditioned_solver + * Combining iterative refinement with the adaptive precision + * block-Jacobi preconditioner to approximate triangular systems + * occurring in ILU preconditioning. + * + * + * + * @ref par_ilu_convergence + * Convergence analysis at the examples of parallel incomplete + * factorization solver. + * + * + * + * @ref preconditioner_export + * Explicit generation and storage of preconditioners for given + * matrices. + * + * * * * @@ -194,16 +250,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * - * * * * * - * * * * @@ -218,7 +275,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * + * + * + * + * + * * * @@ -228,13 +298,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * * - * *
Solving a simple linear system with choice of executors. + * Solving a simple linear system with choice of executors * @ref simple_solver *
Debug the performance of a solver using loggers. + * Debug the performance of a solver or preconditioner * @ref performance_debugging + * @ref preconditioner_export *
Using preconditioners * @ref preconditioned_solver, - * @ref ilu_preconditioned_solver + * @ref ilu_preconditioned_solver, + * @ref ir_ilu_preconditioned_solver, + * @ref adaptiveprecision_blockjacobi, + * @ref par_ilu_convergence, + * @ref preconditioner_export + *
Iterative refinement + * @ref iterative_refinement, + * @ref mixed_precision_ir, + * @ref ir_ilu_preconditioned_solver *
@ref poisson_solver, * @ref three_pt_stencil_solver, * @ref nine_pt_stencil_solver, - * @ref twentyseven_pt_stencil_solver, * @ref custom_matrix_format *
Reading in a matrix and right hand side from a file. + * Reading in a matrix and right hand side from a file * @ref simple_solver, * @ref minimal_cuda_solver, @@ -254,7 +323,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * - * * @@ -277,33 +346,43 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * - * * * * * - * * * * * - * * * * * - * * * + * + * + * + * + * *
Using Ginkgo with external libraries. + * Using Ginkgo with external libraries * @ref external_lib_interfacing *
Using Ginkgo to construct more complex linear algebra routines. + * Using Ginkgo to construct more complex linear algebra routines * @ref inverse_iteration *
Logging within Ginkgo. + * Logging within Ginkgo * @ref simple_solver_logging, * @ref papi_logging, + * @ref performance_debugging * @ref custom_logger *
Constructing your own stopping criterion. + * Constructing your own stopping criterion * @ref custom_stopping_criterion *
Using ranges in Ginkgo. + * Using ranges in Ginkgo * @ref ginkgo_ranges *
Mixed precision + * @ref mixed_spmv, + * @ref mixed_precision_ir, + * @ref adaptiveprecision_blockjacobi + *
*/ diff --git a/doc/headers/cuda_executor.hpp b/doc/headers/cuda_executor.hpp index 52b9307357a..75aea04914c 100644 --- a/doc/headers/cuda_executor.hpp +++ b/doc/headers/cuda_executor.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/dpcpp_executor.hpp b/doc/headers/dpcpp_executor.hpp new file mode 100644 index 00000000000..f41571209fe --- /dev/null +++ b/doc/headers/dpcpp_executor.hpp @@ -0,0 +1,40 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +/** + * @defgroup exec_dpcpp DPC++ Executor + * + * @brief A module dedicated to the implementation and usage of the DPC++ + * executor in Ginkgo. + * + * @ingroup Executor + */ diff --git a/doc/headers/executors.hpp b/doc/headers/executors.hpp index 002f64230e9..4b6fae3b025 100644 --- a/doc/headers/executors.hpp +++ b/doc/headers/executors.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -47,12 +47,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Ginkgo currently supports three different executor types: * * + @ref exec_omp specifies that the data should be stored and the - * associated operations executed on an OpenMP-supporting device (e.g. host - * CPU); + * associated operations executed on an OpenMP-supporting device (e.g. host + * CPU); * + @ref exec_cuda specifies that the data should be stored and the * operations executed on the NVIDIA GPU accelerator; * + @ref exec_hip uses the HIP library to compile code for either NVIDIA or * AMD GPU accelerator; + * + @ref exec_dpcpp uses the DPC++ compiler for any DPC++ supported hardware + * (e.g. Intel CPUs, GPU, FPGAs, ...); * + @ref exec_ref executes a non-optimized reference implementation, * which can be used to debug the library. */ diff --git a/doc/headers/factor.hpp b/doc/headers/factor.hpp index 320668cabae..4f2b9260e53 100644 --- a/doc/headers/factor.hpp +++ b/doc/headers/factor.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/hip_executor.hpp b/doc/headers/hip_executor.hpp index 4805cfb4b87..da4b8303cb1 100644 --- a/doc/headers/hip_executor.hpp +++ b/doc/headers/hip_executor.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/jacobi.hpp b/doc/headers/jacobi.hpp index 875efa2c4d0..8eb87bc0a50 100644 --- a/doc/headers/jacobi.hpp +++ b/doc/headers/jacobi.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/linop.hpp b/doc/headers/linop.hpp index 12fc582eb9d..89f1811d1f5 100644 --- a/doc/headers/linop.hpp +++ b/doc/headers/linop.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/logging.hpp b/doc/headers/logging.hpp index e9563469b87..0259932e407 100644 --- a/doc/headers/logging.hpp +++ b/doc/headers/logging.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/matrix_formats.hpp b/doc/headers/matrix_formats.hpp index 641cb98bc13..1114bec1df2 100644 --- a/doc/headers/matrix_formats.hpp +++ b/doc/headers/matrix_formats.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/omp_executor.hpp b/doc/headers/omp_executor.hpp index 83df1f5b292..f0b0783afba 100644 --- a/doc/headers/omp_executor.hpp +++ b/doc/headers/omp_executor.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/preconditioners.hpp b/doc/headers/preconditioners.hpp index b7797b92d1b..de71c54804d 100644 --- a/doc/headers/preconditioners.hpp +++ b/doc/headers/preconditioners.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/ref_executor.hpp b/doc/headers/ref_executor.hpp index c4faf61e2c4..0d69b4dc798 100644 --- a/doc/headers/ref_executor.hpp +++ b/doc/headers/ref_executor.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/solvers.hpp b/doc/headers/solvers.hpp index ac0f797cf02..bca38ef834e 100644 --- a/doc/headers/solvers.hpp +++ b/doc/headers/solvers.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/headers/stop.hpp b/doc/headers/stop.hpp index 16ce4487601..cb5b412839c 100644 --- a/doc/headers/stop.hpp +++ b/doc/headers/stop.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/doc/helpers.cmake b/doc/helpers.cmake index 8d284019d07..c0b4c7b23a7 100644 --- a/doc/helpers.cmake +++ b/doc/helpers.cmake @@ -39,7 +39,7 @@ endfunction() # generates the documentation named with the additional # config file in format function(ginkgo_doc_gen name in pdf mainpage-in) - set(DIR_BASE "${CMAKE_SOURCE_DIR}") + set(DIR_BASE "${PROJECT_SOURCE_DIR}") set(DOC_BASE "${CMAKE_CURRENT_SOURCE_DIR}") set(DIR_SCRIPT "${DOC_BASE}/scripts") set(DIR_OUT "${CMAKE_CURRENT_BINARY_DIR}/${name}") @@ -53,7 +53,7 @@ function(ginkgo_doc_gen name in pdf mainpage-in) "${DOC_BASE}/headers/" ) list(APPEND doxygen_base_input - ${CMAKE_BINARY_DIR}/include/ginkgo/config.hpp + ${PROJECT_BINARY_DIR}/include/ginkgo/config.hpp ${DIR_BASE}/include ${MAINPAGE} ) @@ -69,6 +69,7 @@ function(ginkgo_doc_gen name in pdf mainpage-in) ${DIR_BASE}/omp ${DIR_BASE}/cuda ${DIR_BASE}/hip + ${DIR_BASE}/dpcpp ${DIR_BASE}/reference ) set(doxygen_image_path "") @@ -77,7 +78,7 @@ function(ginkgo_doc_gen name in pdf mainpage-in) ${DIR_BASE}/include/ginkgo/**/*.hpp ) list(APPEND doxygen_depend - ${CMAKE_BINARY_DIR}/include/ginkgo/config.hpp + ${PROJECT_BINARY_DIR}/include/ginkgo/config.hpp ) if(GINKGO_DOC_GENERATE_EXAMPLES) list(APPEND doxygen_depend diff --git a/doc/joss/figures/ginkgo-hierarchy.pdf b/doc/joss/figures/ginkgo-hierarchy.pdf index 945bacfe263..d171dfe03a5 100644 Binary files a/doc/joss/figures/ginkgo-hierarchy.pdf and b/doc/joss/figures/ginkgo-hierarchy.pdf differ diff --git a/doc/joss/figures/ginkgo-hierarchy.tex b/doc/joss/figures/ginkgo-hierarchy.tex index eadc3affdb2..d72b7839e7d 100644 --- a/doc/joss/figures/ginkgo-hierarchy.tex +++ b/doc/joss/figures/ginkgo-hierarchy.tex @@ -350,7 +350,11 @@ { \textbf{CudaExecutor} }; - \node (hip) [abstract, rectangle, below=of cuda] + \node (dpcpp) [abstract, rectangle, below=of cuda] + { + \textbf{DpcppExecutor} + }; + \node (hip) [abstract, rectangle, below=of dpcpp] { \textbf{HipExecutor} }; @@ -460,6 +464,7 @@ \draw[myarrow] (cuda.west) -- ++(-0.5,0) -- ++(0,0.6) -| (exec.south); + \draw[line] (dpcpp.west) -- ++(-0.5,0) -- ([xshift=-0.5cm] dpcpp.west); \draw[line] (hip.west) -- ++(-0.5,0) -- ([xshift=-0.5cm] cuda.west); \draw[line] (omp.west) -- ++(-0.5,0) -- ([xshift=-0.5cm] cuda.west); \draw[line] (ref.west) -- ++(-0.5,0) -- ([xshift=-0.5cm] cuda.west); diff --git a/doc/joss/paper.md b/doc/joss/paper.md index ad764e7f8a5..499874e8fec 100644 --- a/doc/joss/paper.md +++ b/doc/joss/paper.md @@ -53,13 +53,13 @@ Ginkgo is a production-ready sparse linear algebra library for high performance computing on GPU-centric architectures with a high level of performance portability and focuses on software sustainability. -The library focuses on solving sparse linear systems and accommodates a large variety -of matrix formats, state-of-the-art iterative (Krylov) solvers and preconditioners, -which make the library suitable for a variety of scientific applications. Ginkgo -supports many architectures such as multi-threaded CPU, NVIDIA GPUs, and AMD GPUs. -The heavy use of modern C++ features simplifies the addition of new executor -paradigms and algorithmic functionality without introducing significant -performance overhead. +The library focuses on solving sparse linear systems and accommodates a large +variety of matrix formats, state-of-the-art iterative (Krylov) solvers and +preconditioners, which make the library suitable for a variety of scientific +applications. Ginkgo supports many architectures such as multi-threaded CPU, +NVIDIA GPUs, AMD GPUs, and Intel CPU/GPUs. The heavy use of modern C++ features +simplifies the addition of new executor paradigms and algorithmic functionality +without introducing significant performance overhead. Solving linear systems is usually one of the most computationally and memory intensive aspects of any application. Hence there has been a significant diff --git a/doc/scripts/examples.pl b/doc/scripts/examples.pl index e9f273c1931..87632d2e721 100644 --- a/doc/scripts/examples.pl +++ b/doc/scripts/examples.pl @@ -34,6 +34,7 @@ "logging" => ',height=.25,width=.25,fillcolor="gold"', "stopping-criteria" => ',height=.25,width=.25,fillcolor="deepskyblue"', "preconditioners" => ',height=.25,width=.25,fillcolor="crimson"', + "mixed-precision" => ',height=.25,width=.25,fillcolor="aquamarine"', "unfinished" => ',height=.25,width=.25,style="dashed"', ); @@ -194,6 +195,7 @@ "logging" => 'Logging in Ginkgo', "stopping-criteria" => 'Stopping criteria', "preconditioners" => 'Preconditioners', + "mixed-precision" => 'Mixed Precision', "unfinished" => 'Unfinished codes', ); @@ -211,12 +213,12 @@ } # now add connections to make sure they appear nicely next to each other # in the legend -print " basic -- techniques -- logging -- stopping_criteria -- preconditioners -- unfinished;\n"; +print " basic -- techniques -- logging -- stopping_criteria -- preconditioners -- mixed_precision -- unfinished;\n"; # we need to tell 'dot' that all of these are at the same # rank to ensure they appear next to (as opposed to atop) # each other -print " {rank=same; basic, techniques, logging, stopping_criteria, preconditioners, unfinished}"; +print " {rank=same; basic, techniques, logging, stopping_criteria, preconditioners, mixed_precision, unfinished}"; # end the graph print "}\n"; diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt new file mode 100644 index 00000000000..5f5aa2a4a87 --- /dev/null +++ b/dpcpp/CMakeLists.txt @@ -0,0 +1,98 @@ +if (NOT GKO_CAN_COMPILE_DPCPP) + message(FATAL_ERROR "The CMAKE_CXX_COMPILER compiler, which is " + "${CMAKE_CXX_COMPILER} cannot compile DPC++ code!") +endif() + +ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_VERSION) +set(GINKGO_DPCPP_VERSION ${GINKGO_DPCPP_VERSION} PARENT_SCOPE) + +find_package(MKL CONFIG REQUIRED HINTS "$ENV{MKLROOT}") +set(GINKGO_MKL_ROOT "${MKL_ROOT}" PARENT_SCOPE) +find_package(oneDPL REQUIRED HINTS "$ENV{DPL_ROOT}") +set(GINKGO_DPL_ROOT "${DPL_ROOT}" PARENT_SCOPE) + +add_library(ginkgo_dpcpp $ "") +target_sources(ginkgo_dpcpp + PRIVATE + base/version.dp.cpp + base/executor.dp.cpp + base/helper.dp.cpp + components/absolute_array.dp.cpp + components/fill_array.dp.cpp + components/prefix_sum.dp.cpp + factorization/ic_kernels.dp.cpp + factorization/ilu_kernels.dp.cpp + factorization/factorization_kernels.dp.cpp + factorization/par_ic_kernels.dp.cpp + factorization/par_ict_kernels.dp.cpp + factorization/par_ilu_kernels.dp.cpp + factorization/par_ilut_kernels.dp.cpp + matrix/coo_kernels.dp.cpp + matrix/csr_kernels.dp.cpp + matrix/fbcsr_kernels.dp.cpp + matrix/dense_kernels.dp.cpp + matrix/diagonal_kernels.dp.cpp + matrix/ell_kernels.dp.cpp + matrix/hybrid_kernels.dp.cpp + matrix/sellp_kernels.dp.cpp + matrix/sparsity_csr_kernels.dp.cpp + multigrid/amgx_pgm_kernels.dp.cpp + preconditioner/isai_kernels.dp.cpp + preconditioner/jacobi_kernels.dp.cpp + reorder/rcm_kernels.dp.cpp + solver/gmres_kernels.dp.cpp + solver/cb_gmres_kernels.dp.cpp + solver/idr_kernels.dp.cpp + solver/lower_trs_kernels.dp.cpp + solver/upper_trs_kernels.dp.cpp + stop/criterion_kernels.dp.cpp + stop/residual_norm_kernels.dp.cpp + ../common/unified/components/precision_conversion.cpp + ../common/unified/matrix/coo_kernels.cpp + ../common/unified/matrix/csr_kernels.cpp + ../common/unified/matrix/dense_kernels.cpp + ../common/unified/matrix/diagonal_kernels.cpp + ../common/unified/preconditioner/jacobi_kernels.cpp + ../common/unified/solver/bicg_kernels.cpp + ../common/unified/solver/bicgstab_kernels.cpp + ../common/unified/solver/cg_kernels.cpp + ../common/unified/solver/cgs_kernels.cpp + ../common/unified/solver/fcg_kernels.cpp + ../common/unified/solver/ir_kernels.cpp + ) + +ginkgo_compile_features(ginkgo_dpcpp) +target_compile_definitions(ginkgo_dpcpp PRIVATE GKO_COMPILING_DPCPP) + +set(GINKGO_DPCPP_FLAGS ${GINKGO_DPCPP_FLAGS} PARENT_SCOPE) +target_compile_options(ginkgo_dpcpp PRIVATE "${GINKGO_DPCPP_FLAGS}") +# Note: add MKL as PRIVATE not PUBLIC (MKL example shows) to avoid propagating +# find_package(MKL) everywhere when linking ginkgo (see the MKL example +# https://software.intel.com/content/www/us/en/develop/documentation/onemkl-windows-developer-guide/top/getting-started/cmake-config-for-onemkl.html) +target_compile_options(ginkgo_dpcpp PRIVATE $) +target_compile_features(ginkgo_dpcpp PRIVATE cxx_std_17) +target_include_directories(ginkgo_dpcpp PRIVATE $) +target_link_options(ginkgo_dpcpp PRIVATE -fsycl-device-lib=all) +# When building ginkgo as a static library, we need to use dpcpp and per_kernel +# link option when the program uses a dpcpp related function. +if (BUILD_SHARED_LIBS) + target_link_options(ginkgo_dpcpp PRIVATE -fsycl-device-code-split=per_kernel) +else () + target_link_options(ginkgo_dpcpp PUBLIC -fsycl-device-code-split=per_kernel) +endif() +target_link_libraries(ginkgo_dpcpp PUBLIC ginkgo_device) +target_link_libraries(ginkgo_dpcpp PRIVATE MKL::MKL_DPCPP oneDPL) +if (GINKGO_DPCPP_SINGLE_MODE) + target_compile_definitions(ginkgo_dpcpp PRIVATE GINKGO_DPCPP_SINGLE_MODE=1) +endif() + +ginkgo_default_includes(ginkgo_dpcpp) +ginkgo_install_library(ginkgo_dpcpp) + +if (GINKGO_CHECK_CIRCULAR_DEPS) + ginkgo_check_headers(ginkgo_dpcpp GKO_COMPILING_DPCPP) +endif() + +if(GINKGO_BUILD_TESTS) + add_subdirectory(test) +endif() diff --git a/dpcpp/base/config.hpp b/dpcpp/base/config.hpp new file mode 100644 index 00000000000..94e15508d64 --- /dev/null +++ b/dpcpp/base/config.hpp @@ -0,0 +1,80 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_BASE_CONFIG_HPP_ +#define GKO_DPCPP_BASE_CONFIG_HPP_ + + +#include +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +struct config { + /** + * The type containing a bitmask over all lanes of a warp. + */ + using lane_mask_type = uint64; + + /** + * The number of threads within a Dpcpp subgroup. + */ + static constexpr uint32 warp_size = 16; + + /** + * The bitmask of the entire warp. + */ + static constexpr auto full_lane_mask = ~zero(); + + /** + * The minimal amount of warps that need to be scheduled for each block + * to maximize GPU occupancy. + */ + static constexpr uint32 min_warps_per_block = 4; + + /** + * The default maximal number of threads allowed in DPCPP group + */ + static constexpr uint32 max_block_size = 256; +}; + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_BASE_CONFIG_HPP_ diff --git a/dpcpp/base/dim3.dp.hpp b/dpcpp/base/dim3.dp.hpp new file mode 100644 index 00000000000..7017cb7b306 --- /dev/null +++ b/dpcpp/base/dim3.dp.hpp @@ -0,0 +1,96 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_BASE_DIM3_DP_HPP_ +#define GKO_DPCPP_BASE_DIM3_DP_HPP_ + + +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +/** + * dim3 is a cuda-like dim3 for sycl-range, which provides the same ordering as + * cuda and gets the sycl-range in reverse ordering. + */ +struct dim3 { + unsigned int x; + unsigned int y; + unsigned int z; + + /** + * Creates a dim3 with x, y, z + * + * @param xval x dim val + * @param yval y dim val and default is 1 + * @param zval z dim val and default is 1 + */ + dim3(unsigned int xval, unsigned int yval = 1, unsigned int zval = 1) + : x(xval), y(yval), z(zval) + {} + + /** + * get_range returns the range for sycl with correct ordering (reverse of + * cuda) + * + * @return sycl::range<3> + */ + sycl::range<3> get_range() { return sycl::range<3>(z, y, x); } +}; + + +/** + * sycl_nd_range will generate the proper sycl::nd_range<3> from grid, block + * + * @param grid the dim3 for grid + * @param block the dim3 for block + * + * @return sycl::nd_range<3> + */ +inline sycl::nd_range<3> sycl_nd_range(dim3 grid, dim3 block) +{ + auto local_range = block.get_range(); + auto global_range = grid.get_range() * local_range; + return sycl::nd_range<3>(global_range, local_range); +} + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_BASE_DIM3_DP_HPP_ diff --git a/dpcpp/base/dpct.hpp b/dpcpp/base/dpct.hpp new file mode 100644 index 00000000000..f29132be4d4 --- /dev/null +++ b/dpcpp/base/dpct.hpp @@ -0,0 +1,50 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_BASE_DPCT_HPP_ +#define GKO_DPCPP_BASE_DPCT_HPP_ + + +#include + + +// This is partial extraction from dpct/dpct.hpp of Intel +#if defined(_MSC_VER) +#define __dpct_align__(n) __declspec(align(n)) +#define __dpct_inline__ __forceinline +#else +#define __dpct_align__(n) __attribute__((aligned(n))) +#define __dpct_inline__ __inline__ __attribute__((always_inline)) +#endif + + +#endif // GKO_DPCPP_BASE_DPCT_HPP_ diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp new file mode 100644 index 00000000000..d0380127732 --- /dev/null +++ b/dpcpp/base/executor.dp.cpp @@ -0,0 +1,285 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include +#include + + +#include + + +#include +#include + + +namespace gko { +namespace detail { + + +const std::vector get_devices(std::string device_type) +{ + std::map device_type_map{ + {"accelerator", sycl::info::device_type::accelerator}, + {"all", sycl::info::device_type::all}, + {"cpu", sycl::info::device_type::cpu}, + {"host", sycl::info::device_type::host}, + {"gpu", sycl::info::device_type::gpu}}; + std::for_each(device_type.begin(), device_type.end(), + [](char &c) { c = std::tolower(c); }); + return sycl::device::get_devices(device_type_map.at(device_type)); +} + + +} // namespace detail + + +void OmpExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + if (num_bytes > 0) { + dest->get_queue()->memcpy(dest_ptr, src_ptr, num_bytes).wait(); + } +} + + +bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +{ + auto device = detail::get_devices( + dest_exec->get_device_type())[dest_exec->get_device_id()]; + return device.is_host() || device.is_cpu(); +} + + +std::shared_ptr DpcppExecutor::create( + int device_id, std::shared_ptr master, std::string device_type) +{ + return std::shared_ptr( + new DpcppExecutor(device_id, std::move(master), device_type)); +} + + +void DpcppExecutor::populate_exec_info(const MachineTopology *mach_topo) +{ + // Closest CPUs, NUMA node can be updated when there is a way to identify + // the device itself, which is currently not available with DPC++. +} + + +void DpcppExecutor::raw_free(void *ptr) const noexcept +{ + // the free function may syncronize excution or not, which depends on + // implementation or backend, so it is not guaranteed. + // TODO: maybe a light wait implementation? + try { + queue_->wait_and_throw(); + sycl::free(ptr, queue_->get_context()); + } catch (cl::sycl::exception &err) { +#if GKO_VERBOSE_LEVEL >= 1 + // Unfortunately, if memory free fails, there's not much we can do + std::cerr << "Unrecoverable Dpcpp error on device " + << this->get_device_id() << " in " << __func__ << ": " + << err.what() << std::endl + << "Exiting program" << std::endl; +#endif // GKO_VERBOSE_LEVEL >= 1 + // OpenCL error code use 0 for CL_SUCCESS and negative number for others + // error. if the error is not from OpenCL, it will return CL_SUCCESS. + int err_code = err.get_cl_code(); + // if return CL_SUCCESS, exit 1 as DPCPP error. + if (err_code == 0) { + err_code = 1; + } + std::exit(err_code); + } +} + + +void *DpcppExecutor::raw_alloc(size_type num_bytes) const +{ + void *dev_ptr = sycl::malloc_device(num_bytes, *queue_.get()); + GKO_ENSURE_ALLOCATED(dev_ptr, "DPC++", num_bytes); + return dev_ptr; +} + + +void DpcppExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + if (num_bytes > 0) { + queue_->memcpy(dest_ptr, src_ptr, num_bytes).wait(); + } +} + + +void DpcppExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + // TODO: later when possible, if we have DPC++ with a CUDA backend + // support/compiler, we could maybe support native copies? + GKO_NOT_SUPPORTED(dest); +} + + +void DpcppExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + GKO_NOT_SUPPORTED(dest); +} + + +void DpcppExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + if (num_bytes > 0) { + // If the queue is different and is not cpu/host, the queue can not + // transfer the data to another queue (on the same device) + // Note. it could be changed when we ensure the behavior is expected. + auto queue = this->get_queue(); + auto dest_queue = dest->get_queue(); + auto device = queue->get_device(); + auto dest_device = dest_queue->get_device(); + if (((device.is_host() || device.is_cpu()) && + (dest_device.is_host() || dest_device.is_cpu())) || + (queue == dest_queue)) { + dest->get_queue()->memcpy(dest_ptr, src_ptr, num_bytes).wait(); + } else { + // the memcpy only support host<->device or itself memcpy + GKO_NOT_SUPPORTED(dest); + } + } +} + + +void DpcppExecutor::synchronize() const { queue_->wait_and_throw(); } + + +void DpcppExecutor::run(const Operation &op) const +{ + this->template log(this, &op); + op.run(std::static_pointer_cast( + this->shared_from_this())); + this->template log(this, &op); +} + + +int DpcppExecutor::get_num_devices(std::string device_type) +{ + return detail::get_devices(device_type).size(); +} + + +bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const +{ + auto device = detail::get_devices( + get_exec_info().device_type)[get_exec_info().device_id]; + return device.is_host() || device.is_cpu(); +} + +bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +{ + // If the queue is different and is not cpu/host, the queue can not access + // the data from another queue (on the same device) + // Note. it could be changed when we ensure the behavior is expected. + auto queue = this->get_queue(); + auto dest_queue = dest_exec->get_queue(); + auto device = queue->get_device(); + auto dest_device = dest_queue->get_device(); + return ((device.is_host() || device.is_cpu()) && + (dest_device.is_host() || dest_device.is_cpu())) || + (queue == dest_queue); +} + + +namespace detail { + + +void delete_queue(sycl::queue *queue) +{ + queue->wait(); + delete queue; +} + + +} // namespace detail + + +void DpcppExecutor::set_device_property() +{ + assert(this->get_exec_info().device_id < + DpcppExecutor::get_num_devices(this->get_exec_info().device_type)); + auto device = detail::get_devices( + this->get_exec_info().device_type)[this->get_exec_info().device_id]; + if (!device.is_host()) { + try { + auto subgroup_sizes = + device.get_info(); + for (auto &i : subgroup_sizes) { + this->get_exec_info().subgroup_sizes.push_back(i); + } + } catch (cl::sycl::runtime_error &err) { + GKO_NOT_SUPPORTED(device); + } + } + this->get_exec_info().num_computing_units = static_cast( + device.get_info()); + const auto subgroup_sizes = this->get_exec_info().subgroup_sizes; + if (subgroup_sizes.size()) { + this->get_exec_info().max_subgroup_size = static_cast( + *std::max_element(subgroup_sizes.begin(), subgroup_sizes.end())); + } + this->get_exec_info().max_workgroup_size = static_cast( + device.get_info()); + auto max_workitem_sizes = + device.get_info(); + // Get the max dimension of a sycl::id object + auto max_work_item_dimensions = + device.get_info(); + for (uint32 i = 0; i < max_work_item_dimensions; i++) { + this->get_exec_info().max_workitem_sizes.push_back( + max_workitem_sizes[i]); + } + // Here we declare the queue with the property `in_order` which ensures the + // kernels are executed in the submission order. Otherwise, calls to + // `wait()` would be needed after every call to a DPC++ function or kernel. + // For example, without `in_order`, doing a copy, a kernel, and a copy, will + // not necessarily happen in that order by default, which we need to avoid. + auto *queue = new sycl::queue{device, sycl::property::queue::in_order{}}; + queue_ = std::move(queue_manager{queue, detail::delete_queue}); +} + + +} // namespace gko diff --git a/dpcpp/base/helper.dp.cpp b/dpcpp/base/helper.dp.cpp new file mode 100644 index 00000000000..5e6c1a579f5 --- /dev/null +++ b/dpcpp/base/helper.dp.cpp @@ -0,0 +1,62 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include "dpcpp/base/helper.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +bool validate(sycl::queue *queue, unsigned int workgroup_size, + unsigned int subgroup_size) +{ + auto device = queue->get_device(); + auto subgroup_size_list = + device.get_info(); + auto max_workgroup_size = + device.get_info(); + bool allowed = false; + for (auto &i : subgroup_size_list) { + allowed |= (i == subgroup_size); + } + return allowed && (workgroup_size <= max_workgroup_size); +} + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/base/helper.hpp b/dpcpp/base/helper.hpp new file mode 100644 index 00000000000..cb98e4c511e --- /dev/null +++ b/dpcpp/base/helper.hpp @@ -0,0 +1,185 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_BASE_HELPER_HPP_ +#define GKO_DPCPP_BASE_HELPER_HPP_ + + +#include + + +#include + + +#include +#include + + +#include "core/base/types.hpp" +#include "dpcpp/base/dim3.dp.hpp" + + +/** + * GKO_ENABLE_DEFAULT_HOST gives a default host implementation for those + * kernels which require encoded config but do not need explicit template + * parameter and shared memory + * + * @param name_ the name of the host function with config + * @param kernel_ the kernel name + */ +#define GKO_ENABLE_DEFAULT_HOST(name_, kernel_) \ + template \ + void name_(dim3 grid, dim3 block, gko::size_type, sycl::queue *queue, \ + InferredArgs... args) \ + { \ + queue->submit([&](sycl::handler &cgh) { \ + cgh.parallel_for(sycl_nd_range(grid, block), \ + [=](sycl::nd_item<3> item_ct1) { \ + kernel_(args..., item_ct1); \ + }); \ + }); \ + } + + +/** + * GKO_ENABLE_DEFAULT_HOST_CONFIG gives a default host implementation for those + * kernels which require encoded config but do not need explicit template + * parameter and shared memory + * + * @param name_ the name of the host function with config + * @param kernel_ the kernel name + */ +#define GKO_ENABLE_DEFAULT_HOST_CONFIG(name_, kernel_) \ + template \ + inline void name_(dim3 grid, dim3 block, gko::size_type, \ + sycl::queue *queue, InferredArgs... args) \ + { \ + queue->submit([&](sycl::handler &cgh) { \ + cgh.parallel_for(sycl_nd_range(grid, block), \ + [=](sycl::nd_item<3> item_ct1) { \ + kernel_(args..., item_ct1); \ + }); \ + }); \ + } + +/** + * GKO_ENABLE_DEFAULT_CONFIG_CALL gives a default config selection call + * implementation for those kernels which require config selection but do not + * need explicit template parameter + * + * @param name_ the name of the calling function + * @param callable_ the host function with selection + * @param cfg_ the ConfigSet for encode/decode method + * @param list_ the list for encoded config selection, whose value should be + * available to decode<0> for blocksize and decode<1> for + * subgroup_size by cfg_ + */ +#define GKO_ENABLE_DEFAULT_CONFIG_CALL(name_, callable_, list_) \ + template \ + void name_(std::uint32_t desired_cfg, dim3 grid, dim3 block, \ + gko::size_type dynamic_shared_memory, sycl::queue *queue, \ + InferredArgs... args) \ + { \ + callable_( \ + list_, \ + [&desired_cfg](std::uint32_t cfg) { return cfg == desired_cfg; }, \ + ::gko::syn::value_list(), ::gko::syn::value_list(), \ + ::gko::syn::value_list(), \ + ::gko::syn::type_list<>(), grid, block, dynamic_shared_memory, \ + queue, std::forward(args)...); \ + } + +// __WG_BOUND__ gives the cuda-like launch bound in cuda ordering +#define __WG_BOUND_1D__(x) [[intel::reqd_work_group_size(1, 1, x)]] +#define __WG_BOUND_2D__(x, y) [[intel::reqd_work_group_size(1, y, x)]] +#define __WG_BOUND_3D__(x, y, z) [[intel::reqd_work_group_size(z, y, x)]] +#define WG_BOUND_OVERLOAD(_1, _2, _3, NAME, ...) NAME +#define __WG_BOUND__(...) \ + WG_BOUND_OVERLOAD(__VA_ARGS__, __WG_BOUND_3D__, __WG_BOUND_2D__, \ + __WG_BOUND_1D__, UNUSED) \ + (__VA_ARGS__) + +// __WG_CONFIG_BOUND__ use ConfigSet to unpack the config +#define __WG_CONFIG_BOUND__(CFG, cfg) \ + __WG_BOUND_3D__(CFG::decode<0>(cfg), CFG::decode<1>(cfg), \ + CFG::decode<2>(cfg)) + +namespace gko { +namespace kernels { +namespace dpcpp { + + +/** + * This is the validate function for common check. It checks the workgroup size + * is below device max workgroup size and subgroup size is in the supported + * subgroup size. + * + * @param queue the sycl queue pointer + * @param workgroup_size the workgroup size (block size in cuda sense) + * @param subgroup_size the subgroup size (warp size in cuda sense) + * + * @return the given arguments are valid or not in given queue. + */ +bool validate(sycl::queue *queue, unsigned workgroup_size, + unsigned subgroup_size); + + +/** + * get_first_cfg will return the first valid config by validate function from + * given config array. + * + * @tparam IterArr the iteratable array type + * @tparam Validate the validate function type + * + * @param arr the config array + * @param verify the validate function + * + * @return the first valid config + */ +template +std::uint32_t get_first_cfg(IterArr &arr, Validate verify) +{ + for (auto &cfg : arr) { + if (verify(cfg)) { + return cfg; + } + } + GKO_NOT_SUPPORTED(arr); +} + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_BASE_HELPER_HPP_ diff --git a/dpcpp/base/kernel_launch.dp.hpp b/dpcpp/base/kernel_launch.dp.hpp new file mode 100644 index 00000000000..5e9d505ec52 --- /dev/null +++ b/dpcpp/base/kernel_launch.dp.hpp @@ -0,0 +1,91 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_HPP_ +#error \ + "This file can only be used from inside common/unified/base/kernel_launch.hpp" +#endif + + +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +template +void generic_kernel_1d(sycl::handler &cgh, size_type size, KernelFunction fn, + KernelArgs... args) +{ + cgh.parallel_for(sycl::range<1>{size}, [=](sycl::id<1> idx_id) { + auto idx = static_cast(idx_id[0]); + fn(idx, args...); + }); +} + + +template +void generic_kernel_2d(sycl::handler &cgh, size_type rows, size_type cols, + KernelFunction fn, KernelArgs... args) +{ + cgh.parallel_for(sycl::range<2>{rows, cols}, [=](sycl::id<2> idx) { + auto row = static_cast(idx[0]); + auto col = static_cast(idx[1]); + fn(row, col, args...); + }); +} + + +template +void run_kernel(std::shared_ptr exec, KernelFunction fn, + size_type size, KernelArgs &&... args) +{ + exec->get_queue()->submit([&](sycl::handler &cgh) { + generic_kernel_1d(cgh, size, fn, map_to_device(args)...); + }); +} + +template +void run_kernel(std::shared_ptr exec, KernelFunction fn, + dim<2> size, KernelArgs &&... args) +{ + exec->get_queue()->submit([&](sycl::handler &cgh) { + generic_kernel_2d(cgh, size[0], size[1], fn, map_to_device(args)...); + }); +} + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/base/kernel_launch_solver.dp.hpp b/dpcpp/base/kernel_launch_solver.dp.hpp new file mode 100644 index 00000000000..ea0a9ea7f89 --- /dev/null +++ b/dpcpp/base/kernel_launch_solver.dp.hpp @@ -0,0 +1,74 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_SOLVER_HPP_ +#error \ + "This file can only be used from inside common/unified/base/kernel_launch_solver.hpp" +#endif + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +template +void generic_kernel_2d_solver(sycl::handler &cgh, size_type rows, + size_type cols, size_type default_stride, + KernelFunction fn, KernelArgs... args) +{ + cgh.parallel_for(sycl::range<2>{rows, cols}, [=](sycl::id<2> idx) { + auto row = static_cast(idx[0]); + auto col = static_cast(idx[1]); + fn(row, col, + device_unpack_solver_impl::unpack(args, + default_stride)...); + }); +} + + +template +void run_kernel_solver(std::shared_ptr exec, + KernelFunction fn, dim<2> size, size_type default_stride, + KernelArgs &&... args) +{ + exec->get_queue()->submit([&](sycl::handler &cgh) { + kernels::dpcpp::generic_kernel_2d_solver( + cgh, size[0], size[1], default_stride, fn, + kernels::dpcpp::map_to_device(args)...); + }); +} + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/base/onemkl_bindings.hpp b/dpcpp/base/onemkl_bindings.hpp new file mode 100644 index 00000000000..d4503fc8fe0 --- /dev/null +++ b/dpcpp/base/onemkl_bindings.hpp @@ -0,0 +1,133 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_BASE_ONEMKL_BINDINGS_HPP_ +#define GKO_DPCPP_BASE_ONEMKL_BINDINGS_HPP_ + + +#include + + +#include +#include + + +#include + + +namespace gko { +/** + * @brief The device specific kernels namespace. + * + * @ingroup kernels + */ +namespace kernels { +/** + * @brief The DPCPP namespace. + * + * @ingroup dpcpp + */ +namespace dpcpp { +/** + * @brief The ONEMKL namespace. + * + * @ingroup onemkl + */ +namespace onemkl { +/** + * @brief The detail namespace. + * + * @ingroup detail + */ +namespace detail { + + +template +inline void not_implemented(Args &&...) GKO_NOT_IMPLEMENTED; + + +} // namespace detail + + +template +struct is_supported : std::false_type {}; + +template <> +struct is_supported : std::true_type {}; + +template <> +struct is_supported : std::true_type {}; + +template <> +struct is_supported> : std::true_type {}; + +template <> +struct is_supported> : std::true_type {}; + + +#define GKO_BIND_DOT(ValueType, Name, Func) \ + inline void Name(::cl::sycl::queue &exec_queue, std::int64_t n, \ + const ValueType *x, std::int64_t incx, \ + const ValueType *y, std::int64_t incy, ValueType *result) \ + { \ + Func(exec_queue, n, x, incx, y, incy, result); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +// Bind the dot for x^T * y +GKO_BIND_DOT(float, dot, oneapi::mkl::blas::row_major::dot); +GKO_BIND_DOT(double, dot, oneapi::mkl::blas::row_major::dot); +GKO_BIND_DOT(std::complex, dot, oneapi::mkl::blas::row_major::dotu); +GKO_BIND_DOT(std::complex, dot, oneapi::mkl::blas::row_major::dotu); +template +GKO_BIND_DOT(ValueType, dot, detail::not_implemented); + +// Bind the conj_dot for x' * y +GKO_BIND_DOT(float, conj_dot, oneapi::mkl::blas::row_major::dot); +GKO_BIND_DOT(double, conj_dot, oneapi::mkl::blas::row_major::dot); +GKO_BIND_DOT(std::complex, conj_dot, oneapi::mkl::blas::row_major::dotc); +GKO_BIND_DOT(std::complex, conj_dot, + oneapi::mkl::blas::row_major::dotc); +template +GKO_BIND_DOT(ValueType, conj_dot, detail::not_implemented); + +#undef GKO_BIND_DOT + +} // namespace onemkl +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_BASE_ONEMKL_BINDINGS_HPP_ diff --git a/dpcpp/base/version.dp.cpp b/dpcpp/base/version.dp.cpp new file mode 100644 index 00000000000..e6875535486 --- /dev/null +++ b/dpcpp/base/version.dp.cpp @@ -0,0 +1,48 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +namespace gko { + + +version version_info::get_dpcpp_version() noexcept +{ + // When compiling the module, the header version is the same as the library + // version. Mismatch between the header and the module versions may happen + // if using shared libraries from different versions of Ginkgo. + return version_info::get_header_version(); +} + + +} // namespace gko diff --git a/dpcpp/components/absolute_array.dp.cpp b/dpcpp/components/absolute_array.dp.cpp new file mode 100644 index 00000000000..42c5216f8a7 --- /dev/null +++ b/dpcpp/components/absolute_array.dp.cpp @@ -0,0 +1,82 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/absolute_array.hpp" + + +#include + + +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +namespace components { + + +template +void inplace_absolute_array(std::shared_ptr exec, + ValueType *data, size_type n) +{ + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx_id) { + const auto idx = idx_id[0]; + data[idx] = abs(data[idx]); + }); + }); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL); + + +template +void outplace_absolute_array(std::shared_ptr exec, + const ValueType *in, size_type n, + remove_complex *out) +{ + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx_id) { + const auto idx = idx_id[0]; + out[idx] = abs(in[idx]); + }); + }); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_ARRAY_KERNEL); + + +} // namespace components +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/components/atomic.dp.hpp b/dpcpp/components/atomic.dp.hpp new file mode 100644 index 00000000000..c2048b1510b --- /dev/null +++ b/dpcpp/components/atomic.dp.hpp @@ -0,0 +1,295 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_COMPONENTS_ATOMIC_DP_HPP_ +#define GKO_DPCPP_COMPONENTS_ATOMIC_DP_HPP_ + + +#include + + +#include + + +#include "dpcpp/base/dpct.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +namespace atomic { + + +constexpr auto local_space = cl::sycl::access::address_space::local_space; +constexpr auto global_space = cl::sycl::access::address_space::global_space; + + +} // namespace atomic + +namespace { + + +template +T atomic_compare_exchange_strong( + cl::sycl::multi_ptr addr, T expected, T desired, + cl::sycl::memory_order success = cl::sycl::memory_order::relaxed, + cl::sycl::memory_order fail = cl::sycl::memory_order::relaxed) +{ + cl::sycl::atomic obj(addr); + obj.compare_exchange_strong(expected, desired, success, fail); + return expected; +} + +template +T atomic_compare_exchange_strong( + T *addr, T expected, T desired, + cl::sycl::memory_order success = cl::sycl::memory_order::relaxed, + cl::sycl::memory_order fail = cl::sycl::memory_order::relaxed) +{ + return atomic_compare_exchange_strong( + cl::sycl::multi_ptr(addr), expected, desired, success, + fail); +} + + +template +inline T atomic_fetch_add( + T *addr, T operand, + cl::sycl::memory_order memoryOrder = cl::sycl::memory_order::relaxed) +{ + cl::sycl::atomic obj( + (cl::sycl::multi_ptr(addr))); + return cl::sycl::atomic_fetch_add(obj, operand, memoryOrder); +} + + +template +inline T atomic_fetch_max( + T *addr, T operand, + cl::sycl::memory_order memoryOrder = cl::sycl::memory_order::relaxed) +{ + cl::sycl::atomic obj( + (cl::sycl::multi_ptr(addr))); + return cl::sycl::atomic_fetch_max(obj, operand, memoryOrder); +} + + +} // namespace + + +namespace detail { + + +template +struct atomic_helper { + __dpct_inline__ static ValueType atomic_add(ValueType *, ValueType) + { + static_assert(sizeof(ValueType) == 0, + "This default function is not implemented, only the " + "specializations are."); + // TODO: add proper implementation of generic atomic add + } +}; + + +template +struct atomic_max_helper { + __dpct_inline__ static ValueType atomic_max(ValueType *, ValueType) + { + static_assert(sizeof(ValueType) == 0, + "This default function is not implemented, only the " + "specializations are."); + // TODO: add proper implementation of generic atomic max + } +}; + + +template +__dpct_inline__ ResultType reinterpret(ValueType val) +{ + static_assert(sizeof(ValueType) == sizeof(ResultType), + "The type to reinterpret to must be of the same size as the " + "original type."); + return reinterpret_cast(val); +} + + +#define GKO_BIND_ATOMIC_HELPER_STRUCTURE(CONVERTER_TYPE) \ + template \ + struct atomic_helper< \ + addressSpace, ValueType, \ + std::enable_if_t<(sizeof(ValueType) == sizeof(CONVERTER_TYPE))>> { \ + __dpct_inline__ static ValueType atomic_add( \ + ValueType *__restrict__ addr, ValueType val) \ + { \ + CONVERTER_TYPE *address_as_converter = \ + reinterpret_cast(addr); \ + CONVERTER_TYPE old = *address_as_converter; \ + CONVERTER_TYPE assumed; \ + do { \ + assumed = old; \ + old = atomic_compare_exchange_strong( \ + address_as_converter, assumed, \ + reinterpret( \ + val + reinterpret(assumed))); \ + } while (assumed != old); \ + return reinterpret(old); \ + } \ + }; + +// Support 64-bit ATOMIC_ADD +GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned long long int); +// Support 32-bit ATOMIC_ADD +GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned int); + + +#undef GKO_BIND_ATOMIC_HELPER_STRUCTURE + +#define GKO_BIND_ATOMIC_HELPER_VALUETYPE(ValueType) \ + template \ + struct atomic_helper> { \ + __dpct_inline__ static ValueType atomic_add( \ + ValueType *__restrict__ addr, ValueType val) \ + { \ + return atomic_fetch_add(addr, val); \ + } \ + }; + +GKO_BIND_ATOMIC_HELPER_VALUETYPE(int); +GKO_BIND_ATOMIC_HELPER_VALUETYPE(unsigned int); +GKO_BIND_ATOMIC_HELPER_VALUETYPE(unsigned long long int); + +#undef GKO_BIND_ATOMIC_HELPER_VALUETYPE + + +template +struct atomic_helper< + addressSpace, ValueType, + std::enable_if_t() && sizeof(ValueType) >= 16>> { + __dpct_inline__ static ValueType atomic_add(ValueType *__restrict__ addr, + ValueType val) + { + using real_type = remove_complex; + real_type *real_addr = reinterpret_cast(addr); + // Separate to real part and imag part + auto real = atomic_helper::atomic_add( + &real_addr[0], val.real()); + auto imag = atomic_helper::atomic_add( + &real_addr[1], val.imag()); + return {real, imag}; + } +}; + + +#define GKO_BIND_ATOMIC_MAX_STRUCTURE(CONVERTER_TYPE) \ + template \ + struct atomic_max_helper< \ + addressSpace, ValueType, \ + std::enable_if_t<(sizeof(ValueType) == sizeof(CONVERTER_TYPE))>> { \ + __dpct_inline__ static ValueType atomic_max( \ + ValueType *__restrict__ addr, ValueType val) \ + { \ + CONVERTER_TYPE *address_as_converter = \ + reinterpret_cast(addr); \ + CONVERTER_TYPE old = *address_as_converter; \ + CONVERTER_TYPE assumed; \ + do { \ + assumed = old; \ + if (reinterpret(assumed) < val) { \ + old = atomic_compare_exchange_strong( \ + address_as_converter, assumed, \ + reinterpret(val)); \ + } \ + } while (assumed != old); \ + return reinterpret(old); \ + } \ + }; + +// Support 64-bit ATOMIC_ADD +GKO_BIND_ATOMIC_MAX_STRUCTURE(unsigned long long int); +// Support 32-bit ATOMIC_ADD +GKO_BIND_ATOMIC_MAX_STRUCTURE(unsigned int); + + +#undef GKO_BIND_ATOMIC_MAX_STRUCTURE + +#define GKO_BIND_ATOMIC_MAX_VALUETYPE(ValueType) \ + template \ + struct atomic_max_helper> { \ + __dpct_inline__ static ValueType atomic_max( \ + ValueType *__restrict__ addr, ValueType val) \ + { \ + return atomic_fetch_max(addr, val); \ + } \ + }; + +GKO_BIND_ATOMIC_MAX_VALUETYPE(int); +GKO_BIND_ATOMIC_MAX_VALUETYPE(unsigned int); +GKO_BIND_ATOMIC_MAX_VALUETYPE(unsigned long long int); + +#undef GKO_BIND_ATOMIC_MAX_VALUETYPE + + +} // namespace detail + + +template +__dpct_inline__ T atomic_add(T *__restrict__ addr, T val) +{ + return detail::atomic_helper::atomic_add(addr, val); +} + + +template +__dpct_inline__ T atomic_max(T *__restrict__ addr, T val) +{ + return detail::atomic_max_helper::atomic_max(addr, val); +} + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_COMPONENTS_ATOMIC_DP_HPP_ diff --git a/dpcpp/components/cooperative_groups.dp.hpp b/dpcpp/components/cooperative_groups.dp.hpp new file mode 100644 index 00000000000..78f9d63d698 --- /dev/null +++ b/dpcpp/components/cooperative_groups.dp.hpp @@ -0,0 +1,513 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_COMPONENTS_COOPERATIVE_GROUPS_DP_HPP_ +#define GKO_DPCPP_COMPONENTS_COOPERATIVE_GROUPS_DP_HPP_ + + +#include + + +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dpct.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +/** + * Ginkgo uses cooperative groups to handle communication among the threads. + * + * However, DPCPP's implementation of cooperative groups is still quite limited + * in functionality, and some parts are not supported on all hardware + * interesting for Ginkgo. For this reason, Ginkgo exposes only a part of the + * original functionality, and possibly extends it if it is required. Thus, + * developers should include and use this header and the gko::group namespace + * instead of the standard cooperative_groups.h header. The interface exposed + * by Ginkgo's implementation is equivalent to the standard interface, with some + * useful extensions. + * + * A cooperative group (both from standard DPCPP and from Ginkgo) is not a + * specific type, but a concept. That is, any type satisfying the interface + * imposed by the cooperative groups API is considered a cooperative + * group (a.k.a. "duck typing"). To maximize the generality of components that + * need cooperative groups, instead of creating the group manually, consider + * requesting one as an input parameter. Make sure its type is a template + * parameter to maximize the set of groups for which your algorithm can be + * invoked. To maximize the amount of contexts in which your algorithm can be + * called and avoid hidden requirements, do not depend on a specific setup of + * kernel launch parameters (i.e. grid dimensions and block dimensions). + * Instead, use the thread_rank() method of the group to distinguish between + * distinct threads of a group. + * + * The original DPCPP implementation does not provide ways to verify if a + * certain type represents a cooperative group. Ginkgo's implementation provides + * metafunctions which do that. Additionally, not all cooperative groups have + * equivalent functionality, so Ginkgo splits the cooperative group concept into + * three sub-concepts which describe what functionality is available. Here is a + * list of concepts and their interfaces: + * + * ```c++ + * concept Group { + * unsigned size() const; + * unsigned thread_rank() const; + * }; + * + * concept SynchronizableGroup : Group { + * void sync(); + * }; + * + * concept CommunicatorGroup : SynchronizableGroup { + * template + * T shfl(T var, int srcLane); + * T shfl_up(T var, unsigned delta); + * T shfl_down(T var, unsigned delta); + * T shfl_xor(T var, int laneMask); + * int all(int predicate); + * int any(int predicate); + * unsigned ballot(int predicate); + * }; + * ``` + * + * To check if a group T satisfies one of the concepts, one can use the + * metafunctions is_group::value, is_synchronizable_group::value and + * is_communicator_group::value. + * + * @note Please note that the current implementation of cooperative groups + * contains only a subset of functionalities provided by those APIs. If + * you need more functionality, please add the appropriate implementations + * to existing cooperative groups, or create new groups if the existing + * groups do not cover your use-case. For an example, see the + * enable_extended_shuffle mixin, which adds extended shuffles support + * to built-in DPCPP cooperative groups. + */ +namespace group { + + +// metafunctions +namespace detail { + + +template +struct is_group_impl : std::false_type {}; + + +template +struct is_synchronizable_group_impl : std::false_type {}; + + +template +struct is_communicator_group_impl : std::true_type {}; + + +} // namespace detail + + +/** + * Check if T is a Group. + */ +template +using is_group = detail::is_group_impl>; + + +/** + * Check if T is a SynchronizableGroup. + */ +template +using is_synchronizable_group = + detail::is_synchronizable_group_impl>; + + +/** + * Check if T is a CommunicatorGroup. + */ +template +using is_communicator_group = + detail::is_communicator_group_impl>; + + +// types +namespace detail { + + +/** + * This is a limited implementation of the DPCPP thread_block_tile. + */ +template +class thread_block_tile : public sycl::ONEAPI::sub_group { + using sub_group = sycl::ONEAPI::sub_group; + using id_type = sub_group::id_type; + using mask_type = config::lane_mask_type; + +public: + // note: intel calls nd_item.get_sub_group(), but it still call + // intel::sub_group() to create the sub_group. + template + explicit thread_block_tile(const Group &parent_group) + : data_{Size, 0}, sub_group() + { +#ifndef NDEBUG + assert(this->get_local_range().get(0) == Size); +#endif + data_.rank = this->get_local_id(); + } + + + __dpct_inline__ unsigned thread_rank() const noexcept { return data_.rank; } + + __dpct_inline__ unsigned size() const noexcept { return Size; } + + __dpct_inline__ void sync() const noexcept { this->barrier(); } + +#define GKO_BIND_SHFL(ShflOpName, ShflOp) \ + template \ + __dpct_inline__ ValueType ShflOpName(ValueType var, SelectorType selector) \ + const noexcept \ + { \ + return this->ShflOp(var, selector); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + GKO_BIND_SHFL(shfl, shuffle); + GKO_BIND_SHFL(shfl_xor, shuffle_xor); + + // the shfl_up of out-of-range value gives undefined behavior, we + // manually set it as the original value such that give the same result as + // cuda/hip. + template + __dpct_inline__ ValueType shfl_up(ValueType var, + SelectorType selector) const noexcept + { + const auto result = this->shuffle_up(var, selector); + return (data_.rank < selector) ? var : result; + } + + // the shfl_down of out-of-range value gives undefined behavior, we + // manually set it as the original value such that give the same result as + // cuda/hip. + template + __dpct_inline__ ValueType shfl_down(ValueType var, + SelectorType selector) const noexcept + { + const auto result = this->shuffle_down(var, selector); + return (data_.rank + selector >= Size) ? var : result; + } + + /** + * Returns a bitmask containing the value of the given predicate + * for all threads in the group. + * This means that the ith bit is equal to the predicate of the + * thread with thread_rank() == i in the group. + * Note that the whole group needs to execute the same operation. + */ + __dpct_inline__ mask_type ballot(int predicate) const noexcept + { + // todo: change it when OneAPI update the mask related api + return sycl::ONEAPI::reduce( + *this, (predicate != 0) ? mask_type(1) << data_.rank : mask_type(0), + sycl::ONEAPI::plus()); + } + + /** + * Returns true iff the predicate is true for at least one threads in the + * group. Note that the whole group needs to execute the same operation. + */ + __dpct_inline__ bool any(int predicate) const noexcept + { + return sycl::ONEAPI::any_of(*this, (predicate != 0)); + } + + /** + * Returns true iff the predicate is true for all threads in the group. + * Note that the whole group needs to execute the same operation. + */ + __dpct_inline__ bool all(int predicate) const noexcept + { + return sycl::ONEAPI::all_of(*this, (predicate != 0)); + } + + +private: + struct alignas(8) { + unsigned size; + unsigned rank; + } data_; +}; + + +// specialization for 1 +template <> +class thread_block_tile<1> { + using mask_type = config::lane_mask_type; + static constexpr unsigned Size = 1; + +public: + template + explicit thread_block_tile(const Group &parent_group) : data_{Size, 0} + {} + + + __dpct_inline__ unsigned thread_rank() const noexcept { return data_.rank; } + + __dpct_inline__ unsigned size() const noexcept { return Size; } + + __dpct_inline__ void sync() const noexcept {} + + +#define GKO_DISABLE_SHFL(ShflOpName) \ + template \ + __dpct_inline__ ValueType ShflOpName(ValueType var, SelectorType selector) \ + const noexcept \ + { \ + return var; \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + GKO_DISABLE_SHFL(shfl); + GKO_DISABLE_SHFL(shfl_up); + GKO_DISABLE_SHFL(shfl_down); + GKO_DISABLE_SHFL(shfl_xor); + + /** + * Returns a bitmask containing the value of the given predicate + * for all threads in the group. + * This means that the ith bit is equal to the predicate of the + * thread with thread_rank() == i in the group. + * Note that the whole group needs to execute the same operation. + */ + __dpct_inline__ mask_type ballot(int predicate) const noexcept + { + return (predicate != 0) ? mask_type(1) : mask_type(0); + } + + /** + * Returns true iff the predicate is true for at least one threads in the + * group. Note that the whole group needs to execute the same operation. + */ + __dpct_inline__ bool any(int predicate) const noexcept + { + return (predicate != 0); + } + + /** + * Returns true iff the predicate is true for all threads in the group. + * Note that the whole group needs to execute the same operation. + */ + __dpct_inline__ bool all(int predicate) const noexcept + { + return (predicate != 0); + } + + +private: + struct alignas(8) { + unsigned size; + unsigned rank; + } data_; +}; + + +} // namespace detail + + +using detail::thread_block_tile; + + +// Only support tile_partition with 2, 4, 8, 16, 32, 64. +template +__dpct_inline__ + std::enable_if_t<(Size > 1) && Size <= 64 && !(Size & (Size - 1)), + detail::thread_block_tile> + tiled_partition + [[intel::reqd_sub_group_size(Size)]] (const Group &group) +{ + return detail::thread_block_tile(group); +} + + +template +__dpct_inline__ std::enable_if_t> +tiled_partition(const Group &group) +{ + return detail::thread_block_tile(group); +} + + +namespace detail { + + +template +struct is_group_impl> : std::true_type {}; + + +template +struct is_synchronizable_group_impl> : std::true_type { +}; + + +template +struct is_communicator_group_impl> : std::true_type {}; + + +} // namespace detail + + +class thread_block { + friend __dpct_inline__ thread_block this_thread_block(sycl::nd_item<3> &); + +public: + __dpct_inline__ unsigned thread_rank() const noexcept { return data_.rank; } + + __dpct_inline__ unsigned size() const noexcept { return data_.size; } + + __dpct_inline__ void sync() const noexcept { group_.barrier(); } + +private: + __dpct_inline__ thread_block(sycl::nd_item<3> &group) + : group_{group}, + data_{static_cast(group.get_local_range().size()), + static_cast(group.get_local_linear_id())} + {} + struct alignas(8) { + unsigned size; + unsigned rank; + } data_; + + sycl::nd_item<3> &group_; +}; + + +__dpct_inline__ thread_block this_thread_block(sycl::nd_item<3> &group) +{ + return thread_block(group); +} + + +namespace detail { + + +template <> +struct is_group_impl : std::true_type {}; + + +template <> +struct is_synchronizable_group_impl : std::true_type {}; + + +} // namespace detail + + +/** + * This is a limited implementation of the DPCPP grid_group that works even on + * devices that do not support device-wide synchronization and without special + * kernel launch syntax. + * + * Note that this implementation does not support large grids, since it uses 32 + * bits to represent sizes and ranks, while at least 73 bits (63 bit grid + 10 + * bit block) would have to be used to represent the full space of thread ranks. + */ +class grid_group { + friend __dpct_inline__ grid_group this_grid(sycl::nd_item<3> &); + +public: + __dpct_inline__ unsigned size() const noexcept { return data_.size; } + + __dpct_inline__ unsigned thread_rank() const noexcept { return data_.rank; } + +private: + __dpct_inline__ grid_group(sycl::nd_item<3> &group) + : data_{static_cast(group.get_global_range().size()), + static_cast(group.get_global_linear_id())} + {} + + struct alignas(8) { + unsigned size; + unsigned rank; + } data_; +}; + +// Not using this, as grid_group is not universally supported. +// grid_group this_grid() +// using cooperative_groups::this_grid; +// Instead, use our limited implementation: +__dpct_inline__ grid_group this_grid(sycl::nd_item<3> &group) +{ + return grid_group(group); +} + + +} // namespace group +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +// Enable group can directly use group function +__SYCL_INLINE_NAMESPACE(cl) +{ + namespace sycl { + namespace detail { + + + template + struct is_sub_group< + ::gko::kernels::dpcpp::group::detail::thread_block_tile> + : std::true_type {}; + + + namespace spirv { + + + template + struct group_scope; + + template + struct group_scope< + ::gko::kernels::dpcpp::group::detail::thread_block_tile> { + static constexpr __spv::Scope::Flag value = + __spv::Scope::Flag::Subgroup; + }; + + + } // namespace spirv + } // namespace detail + } // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) + + +#endif // GKO_DPCPP_COMPONENTS_COOPERATIVE_GROUPS_DP_HPP_ diff --git a/dpcpp/components/fill_array.dp.cpp b/dpcpp/components/fill_array.dp.cpp new file mode 100644 index 00000000000..9edffc8eaf6 --- /dev/null +++ b/dpcpp/components/fill_array.dp.cpp @@ -0,0 +1,78 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/fill_array.hpp" + + +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +namespace components { + + +template +void fill_array(std::shared_ptr exec, ValueType *array, + size_type n, ValueType val) +{ + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx_id) { + const auto idx = idx_id[0]; + array[idx] = val; + }); + }); +} + +GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); + + +template +void fill_seq_array(std::shared_ptr exec, + ValueType *array, size_type n) +{ + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx_id) { + const auto idx = idx_id[0]; + array[idx] = idx; + }); + }); +} + +GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_SEQ_ARRAY_KERNEL); + + +} // namespace components +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/components/format_conversion.dp.hpp b/dpcpp/components/format_conversion.dp.hpp new file mode 100644 index 00000000000..99df6f02a4a --- /dev/null +++ b/dpcpp/components/format_conversion.dp.hpp @@ -0,0 +1,137 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_COMPONENTS_FORMAT_CONVERSION_DP_HPP_ +#define GKO_DPCPP_COMPONENTS_FORMAT_CONVERSION_DP_HPP_ + + +#include + + +#include + + +#include +#include + + +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" + + +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING + + +namespace gko { +namespace kernels { +namespace dpcpp { +namespace ell { +namespace kernel { + + +/** + * @internal + * + * It counts the number of explicit nonzeros per row of Ell. + */ +template +void count_nnz_per_row(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + size_type max_nnz_per_row, size_type stride, + const ValueType *values, IndexType *result); + + +} // namespace kernel +} // namespace ell + + +namespace coo { +namespace kernel { + + +/** + * @internal + * + * It converts the row index of Coo to the row pointer of Csr. + */ +template +void convert_row_idxs_to_ptrs(dim3 grid, dim3 block, + size_type dynamic_shared_memory, + sycl::queue *queue, const IndexType *idxs, + size_type num_nonzeros, IndexType *ptrs, + size_type length); + + +} // namespace kernel + + +namespace host_kernel { + + +/** + * @internal + * + * It calculates the number of warps used in Coo Spmv depending on the GPU + * architecture and the number of stored elements. + */ +template +size_type calculate_nwarps(std::shared_ptr exec, + const size_type nnz) +{ + size_type nwarps_in_dpcpp = exec->get_num_computing_units() * 7; + size_type multiple = 8; + if (nnz >= 2e8) { + multiple = 256; + } else if (nnz >= 2e7) { + multiple = 32; + } +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING + if (_tuning_flag) { + multiple = _tuned_value; + } +#endif // GINKGO_BENCHMARK_ENABLE_TUNING + return std::min(multiple * nwarps_in_dpcpp, + size_type(ceildiv(nnz, subgroup_size))); +} + + +} // namespace host_kernel +} // namespace coo +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_COMPONENTS_FORMAT_CONVERSION_DP_HPP_ diff --git a/dpcpp/components/matrix_operations.dp.hpp b/dpcpp/components/matrix_operations.dp.hpp new file mode 100644 index 00000000000..0768242ce90 --- /dev/null +++ b/dpcpp/components/matrix_operations.dp.hpp @@ -0,0 +1,61 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_COMPONENTS_MATRIX_OPERATIONS_DP_HPP_ +#define GKO_DPCPP_COMPONENTS_MATRIX_OPERATIONS_DP_HPP_ + + +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +/** + * @internal + * + * Computes the infinity norm of a column-major matrix. + */ +template +remove_complex compute_inf_norm( + size_type num_rows, size_type num_cols, const ValueType *matrix, + size_type stride) GKO_NOT_IMPLEMENTED; + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_COMPONENTS_MATRIX_OPERATIONS_DP_HPP_ diff --git a/dpcpp/components/prefix_sum.dp.cpp b/dpcpp/components/prefix_sum.dp.cpp new file mode 100644 index 00000000000..63f33e9ba35 --- /dev/null +++ b/dpcpp/components/prefix_sum.dp.cpp @@ -0,0 +1,106 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/prefix_sum.hpp" + + +#include + + +#include + + +#include "core/base/types.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/prefix_sum.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +namespace components { + + +using BlockCfg = ConfigSet<11>; + +constexpr auto block_cfg_list = + ::gko::syn::value_list(); + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(start_prefix_sum, start_prefix_sum) +GKO_ENABLE_DEFAULT_CONFIG_CALL(start_prefix_sum_call, start_prefix_sum, + block_cfg_list) + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(finalize_prefix_sum, + finalize_prefix_sum) +GKO_ENABLE_DEFAULT_CONFIG_CALL(finalize_prefix_sum_call, finalize_prefix_sum, + block_cfg_list) + + +template +void prefix_sum(std::shared_ptr exec, IndexType *counts, + size_type num_entries) +{ + // prefix_sum should only be performed on a valid array + if (num_entries > 0) { + auto queue = exec->get_queue(); + constexpr auto block_cfg_array = as_array(block_cfg_list); + const std::uint32_t cfg = + get_first_cfg(block_cfg_array, [&queue](std::uint32_t cfg) { + return validate(queue, BlockCfg::decode<0>(cfg), 16); + }); + const auto wg_size = BlockCfg::decode<0>(cfg); + auto num_blocks = ceildiv(num_entries, wg_size); + Array block_sum_array(exec, num_blocks - 1); + auto block_sums = block_sum_array.get_data(); + start_prefix_sum_call(cfg, num_blocks, wg_size, 0, exec->get_queue(), + num_entries, counts, block_sums); + // add the total sum of the previous block only when the number of + // blocks is larger than 1. + if (num_blocks > 1) { + finalize_prefix_sum_call(cfg, num_blocks, wg_size, 0, + exec->get_queue(), num_entries, counts, + block_sums); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL); + +// instantiate for size_type as well, as this is used in the Sellp format +template GKO_DECLARE_PREFIX_SUM_KERNEL(size_type); + + +} // namespace components +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/components/prefix_sum.dp.hpp b/dpcpp/components/prefix_sum.dp.hpp new file mode 100644 index 00000000000..3b5e1c317dd --- /dev/null +++ b/dpcpp/components/prefix_sum.dp.hpp @@ -0,0 +1,257 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_COMPONENTS_PREFIX_SUM_DP_HPP_ +#define GKO_DPCPP_COMPONENTS_PREFIX_SUM_DP_HPP_ + + +#include + + +#include + + +#include "core/base/types.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/dpct.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +// TODO: porting - some functions name still use subwarp + + +/** + * @internal + * Computes the prefix sum and total sum of `element` over a subgroup. + * + * @param element the element over which we compute the prefix sum. + * @param prefix_sum will be set to the sum of all `element`s from lower + * lanes, plus the local `element` if `inclusive` is `true`. + * @param total_sum will be set to the total sum of `element` in this + * subgroup. + * @param subgroup the cooperative group representing the subgroup. + * + * @tparam inclusive if this is true, the computed prefix sum will be + * inclusive, otherwise it will be exclusive. + * + * @note For this function to work on architectures with independent thread + * scheduling, all threads of the subgroup have to execute it. + */ +template +__dpct_inline__ void subwarp_prefix_sum(ValueType element, + ValueType &prefix_sum, + ValueType &total_sum, Group subgroup) +{ + prefix_sum = inclusive ? element : zero(); + total_sum = element; +#pragma unroll + // hypercube prefix sum + for (int step = 1; step < subgroup.size(); step *= 2) { + auto neighbor = subgroup.shfl_xor(total_sum, step); + total_sum += neighbor; + prefix_sum += bool(subgroup.thread_rank() & step) ? neighbor : 0; + } +} + +/** + * @internal + * Computes the prefix sum of `element` over a subgroup. + * + * @param element the element over which we compute the prefix sum. + * @param prefix_sum will be set to the sum of all `element`s from lower + * lanes, plus the local `element` if `inclusive` is `true`. + * @param subgroup the cooperative group representing the subgroup. + * + * @tparam inclusive if this is true, the computed prefix sum will be + * inclusive, otherwise it will be exclusive. + * + * @note All threads of the subgroup have to execute this function for it to + * work (and not dead-lock on newer architectures). + */ +template +__dpct_inline__ void subwarp_prefix_sum(ValueType element, + ValueType &prefix_sum, Group subgroup) +{ + ValueType tmp{}; + subwarp_prefix_sum(element, prefix_sum, tmp, subgroup); +} + + +/** + * @internal + * First step of the calculation of a prefix sum. Calculates the prefix sum + * in-place on parts of the array `elements`. + * + * @param elements array on which the prefix sum is to be calculated + * @param block_sum array which stores the total sum of each block, requires at + * least `ceildiv(num_elements, block_size) - 1` elements + * @param num_elements total number of entries in `elements` + * + * @tparam block_size thread block size for this kernel, also size of blocks on + * which this kernel calculates the prefix sum in-place + * + * @note To calculate the prefix sum over an array of size bigger than + * `block_size`, `finalize_prefix_sum` has to be used as well. + */ +template +void start_prefix_sum(size_type num_elements, ValueType *__restrict__ elements, + ValueType *__restrict__ block_sum, + sycl::nd_item<3> item_ct1, + UninitializedArray &prefix_helper) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + const auto element_id = item_ct1.get_local_id(2); + + // do not need to access the last element when exclusive prefix sum + prefix_helper[element_id] = + (tidx + 1 < num_elements) ? elements[tidx] : zero(); + auto this_block = group::this_thread_block(item_ct1); + this_block.sync(); + + // Do a normal reduction +#pragma unroll + for (int i = 1; i < block_size; i <<= 1) { + const auto ai = i * (2 * element_id + 1) - 1; + const auto bi = i * (2 * element_id + 2) - 1; + if (bi < block_size) { + prefix_helper[bi] += prefix_helper[ai]; + } + this_block.sync(); + } + + if (element_id == 0) { + // Store the total sum except the last block + if (item_ct1.get_group(2) + 1 < item_ct1.get_group_range(2)) { + block_sum[item_ct1.get_group(2)] = prefix_helper[block_size - 1]; + } + prefix_helper[block_size - 1] = zero(); + } + + this_block.sync(); + + // Perform the down-sweep phase to get the true prefix sum +#pragma unroll + for (int i = block_size >> 1; i > 0; i >>= 1) { + const auto ai = i * (2 * element_id + 1) - 1; + const auto bi = i * (2 * element_id + 2) - 1; + if (bi < block_size) { + auto tmp = prefix_helper[ai]; + prefix_helper[ai] = prefix_helper[bi]; + prefix_helper[bi] += tmp; + } + this_block.sync(); + } + if (tidx < num_elements) { + elements[tidx] = prefix_helper[element_id]; + } +} + +template +void start_prefix_sum(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_elements, + ValueType *elements, ValueType *block_sum) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access::mode::read_write, + sycl::access::target::local> + prefix_helper_acc_ct1(cgh); + + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + start_prefix_sum( + num_elements, elements, block_sum, item_ct1, + *prefix_helper_acc_ct1.get_pointer()); + }); + }); +} + + +/** + * @internal + * Second step of the calculation of a prefix sum. Increases the value of each + * entry of `elements` by the total sum of all preceding blocks. + * + * @param elements array on which the prefix sum is to be calculated + * @param block_sum array storing the total sum of each block + * @param num_elements total number of entries in `elements` + * + * @tparam block_size thread block size for this kernel, has to be the same as + * for `start_prefix_sum` + * + * @note To calculate a prefix sum, first `start_prefix_sum` has to be called. + */ +template +void finalize_prefix_sum(size_type num_elements, + ValueType *__restrict__ elements, + const ValueType *__restrict__ block_sum, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + + if (tidx < num_elements) { + ValueType prefix_block_sum = zero(); + for (size_type i = 0; i < item_ct1.get_group(2); i++) { + prefix_block_sum += block_sum[i]; + } + elements[tidx] += prefix_block_sum; + } +} + +template +void finalize_prefix_sum(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_elements, + ValueType *elements, const ValueType *block_sum) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + finalize_prefix_sum( + num_elements, elements, block_sum, item_ct1); + }); + }); +} + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_COMPONENTS_PREFIX_SUM_DP_HPP_ diff --git a/dpcpp/components/reduction.dp.hpp b/dpcpp/components/reduction.dp.hpp new file mode 100644 index 00000000000..e7f7c8f5ab6 --- /dev/null +++ b/dpcpp/components/reduction.dp.hpp @@ -0,0 +1,299 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_COMPONENTS_REDUCTION_DP_HPP_ +#define GKO_DPCPP_COMPONENTS_REDUCTION_DP_HPP_ + + +#include + + +#include + + +#include +#include +#include + + +#include "core/base/types.hpp" +#include "core/synthesizer/implementation_selection.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/dpct.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" +#include "dpcpp/components/uninitialized_array.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +constexpr int default_block_size = 256; +using KCFG_1D = ConfigSet<11, 7>; +constexpr auto kcfg_1d_list = + syn::value_list(); +constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + +/** + * @internal + * + * Computes a reduction using the binary operation `reduce_op` on a group + * `group`. Each thread contributes with one element `local_data`. The local + * thread element is always passed as the first parameter to the `reduce_op`. + * The function returns the result of the reduction on all threads. + * + * @note The function is guaranteed to return the correct value on all threads + * only if `reduce_op` is commutative (in addition to being associative). + * Otherwise, the correct value is returned only to the thread with + * subgroup index 0. + */ +template < + typename Group, typename ValueType, typename Operator, + typename = std::enable_if_t::value>> +__dpct_inline__ ValueType reduce(const Group &group, ValueType local_data, + Operator reduce_op = Operator{}) +{ +#pragma unroll + for (int32 bitmask = 1; bitmask < group.size(); bitmask <<= 1) { + const auto remote_data = group.shfl_xor(local_data, bitmask); + local_data = reduce_op(local_data, remote_data); + } + return local_data; +} + + +/** + * @internal + * + * Returns the index of the thread that has the element with the largest + * magnitude among all the threads in the group. + * Only the values from threads which set `is_pivoted` to `false` will be + * considered. + */ +template < + typename Group, typename ValueType, + typename = std::enable_if_t::value>> +__dpct_inline__ int choose_pivot(const Group &group, ValueType local_data, + bool is_pivoted) +{ + using real = remove_complex; + real lmag = is_pivoted ? -one() : abs(local_data); + const auto pivot = + reduce(group, group.thread_rank(), [&](int lidx, int ridx) { + const auto rmag = group.shfl(lmag, ridx); + if (rmag > lmag) { + lmag = rmag; + lidx = ridx; + } + return lidx; + }); + // pivot operator not commutative, make sure everyone has the same pivot + return group.shfl(pivot, 0); +} + + +/** + * @internal + * + * Computes a reduction using the binary operation `reduce_op` on entire block. + * The data for the reduction is taken from the `data` array which has to be of + * size `block_size` and accessible from all threads. The `data` array is also + * used as work space (so its content will be destroyed in the process), as well + * as to store the return value - which is stored in the 0-th position of the + * array. + */ +template < + unsigned int sg_size = config::warp_size, typename Group, + typename ValueType, typename Operator, + typename = std::enable_if_t::value>> +void reduce(const Group &__restrict__ group, ValueType *__restrict__ data, + Operator reduce_op = Operator{}) +{ + const auto local_id = group.thread_rank(); + + for (int k = group.size() / 2; k >= sg_size; k /= 2) { + group.sync(); + if (local_id < k) { + data[local_id] = reduce_op(data[local_id], data[local_id + k]); + } + } + + const auto warp = group::tiled_partition(group); + const auto warp_id = group.thread_rank() / warp.size(); + if (warp_id > 0) { + return; + } + auto result = ::gko::kernels::dpcpp::reduce(warp, data[warp.thread_rank()], + reduce_op); + if (warp.thread_rank() == 0) { + data[0] = result; + } +} + + +/** + * @internal + * + * Computes a reduction using the binary operation `reduce_op` on an array + * `source` of any size. Has to be called a second time on `result` to reduce + * an array larger than `block_size`. + */ +template +void reduce_array(size_type size, const ValueType *__restrict__ source, + ValueType *__restrict__ result, sycl::nd_item<3> item_ct1, + Operator reduce_op = Operator{}) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + auto thread_result = zero(); + for (auto i = tidx; i < size; + i += item_ct1.get_local_range().get(2) * item_ct1.get_group_range(2)) { + thread_result = reduce_op(thread_result, source[i]); + } + result[item_ct1.get_local_id(2)] = thread_result; + + group::this_thread_block(item_ct1).sync(); + + // Stores the result of the reduction inside `result[0]` + reduce(group::this_thread_block(item_ct1), result, reduce_op); +} + + +/** + * @internal + * + * Computes a reduction using the add operation (+) on an array + * `source` of any size. Has to be called a second time on `result` to reduce + * an array larger than `block_size`. + */ +template +void reduce_add_array( + size_type size, const ValueType *__restrict__ source, + ValueType *__restrict__ result, sycl::nd_item<3> item_ct1, + UninitializedArray(cfg)> &block_sum) +{ + reduce_array(cfg)>( + size, source, static_cast(block_sum), item_ct1, + [](const ValueType &x, const ValueType &y) { return x + y; }); + + if (item_ct1.get_local_id(2) == 0) { + result[item_ct1.get_group(2)] = block_sum[0]; + } +} + +template +void reduce_add_array(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type size, + const ValueType *source, ValueType *result) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor(cfg)>, + 0, sycl::access::mode::read_write, + sycl::access::target::local> + block_sum_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + reduce_add_array(size, source, result, item_ct1, + *block_sum_acc_ct1.get_pointer()); + }); + }); +} + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(reduce_add_array_config, + reduce_add_array); + +GKO_ENABLE_DEFAULT_CONFIG_CALL(reduce_add_array_call, reduce_add_array_config, + kcfg_1d_list); + + +/** + * Compute a reduction using add operation (+). + * + * @param exec Executor associated to the array + * @param size size of the array + * @param source the pointer of the array + * + * @return the reduction result + */ +template +ValueType reduce_add_array(std::shared_ptr exec, + size_type size, const ValueType *source) +{ + auto block_results_val = source; + size_type grid_dim = size; + auto block_results = Array(exec); + ValueType answer = zero(); + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto sg_size = KCFG_1D::decode<1>(cfg); + + if (size > wg_size) { + const auto n = ceildiv(size, wg_size); + grid_dim = (n <= wg_size) ? n : wg_size; + + block_results.resize_and_reset(grid_dim); + + reduce_add_array_call(cfg, grid_dim, wg_size, 0, exec->get_queue(), + size, source, block_results.get_data()); + + block_results_val = block_results.get_const_data(); + } + + auto d_result = Array(exec, 1); + + reduce_add_array_call(cfg, 1, wg_size, 0, exec->get_queue(), grid_dim, + block_results_val, d_result.get_data()); + answer = exec->copy_val_to_host(d_result.get_const_data()); + return answer; +} + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_COMPONENTS_REDUCTION_DP_HPP_ diff --git a/dpcpp/components/segment_scan.dp.hpp b/dpcpp/components/segment_scan.dp.hpp new file mode 100644 index 00000000000..09dc3e4e20f --- /dev/null +++ b/dpcpp/components/segment_scan.dp.hpp @@ -0,0 +1,87 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_COMPONENTS_SEGMENT_SCAN_DP_HPP_ +#define GKO_DPCPP_COMPONENTS_SEGMENT_SCAN_DP_HPP_ + + +#include + + +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/dpct.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +/** + * @internal + * + * Compute a segement scan using add operation (+) of a subgroup_size. Each + * segment performs suffix sum. Works on the source array and returns whether + * the thread is the first element of its segment with same `ind`. + */ +template +__dpct_inline__ bool segment_scan( + const group::thread_block_tile &group, const IndexType ind, + ValueType *__restrict__ val) +{ + bool head = true; +#pragma unroll + for (int i = 1; i < subgroup_size; i <<= 1) { + const IndexType add_ind = group.shfl_up(ind, i); + ValueType add_val = zero(); + if (add_ind == ind && group.thread_rank() >= i) { + add_val = *val; + if (i == 1) { + head = false; + } + } + add_val = group.shfl_down(add_val, i); + if (group.thread_rank() < subgroup_size - i) { + *val += add_val; + } + } + return head; +} + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_COMPONENTS_SEGMENT_SCAN_DP_HPP_ diff --git a/dpcpp/components/thread_ids.dp.hpp b/dpcpp/components/thread_ids.dp.hpp new file mode 100644 index 00000000000..70ad76d9ccb --- /dev/null +++ b/dpcpp/components/thread_ids.dp.hpp @@ -0,0 +1,327 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_COMPONENTS_THREAD_IDS_DP_HPP_ +#define GKO_DPCPP_COMPONENTS_THREAD_IDS_DP_HPP_ + + +#include + + +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dpct.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The DPCPP thread namespace. + * + * @ingroup dpcpp_thread + */ +namespace thread { + + +// TODO: porting - need to refine functions and their name in this file +// the grid/block description uses the cuda dim3 to represent. i.e. using dim3 +// to launch dpcpp kernel, the kernel will reverse the ordering to keep the same +// linear memory usage as cuda. + + +/** + * @internal + * + * Returns the ID of the block group this thread belongs to. + * + * @return the ID of the block group this thread belongs to + * + * @note Assumes that grid dimensions are in cuda standard format: + * `(block_group_size, first_grid_dimension, second grid_dimension)` + */ +__dpct_inline__ size_type get_block_group_id(sycl::nd_item<3> item_ct1) +{ + return static_cast(item_ct1.get_group(0)) * + item_ct1.get_group_range(1) + + item_ct1.get_group(1); +} + +/** + * @internal + * + * Returns the ID of the block this thread belongs to. + * + * @return the ID of the block this thread belongs to + * + * @note Assumes that grid dimensions are in cuda standard format: + * `(block_group_size, first_grid_dimension, second grid_dimension)` + */ +__dpct_inline__ size_type get_block_id(sycl::nd_item<3> item_ct1) +{ + return get_block_group_id(item_ct1) * item_ct1.get_group_range(2) + + item_ct1.get_group(2); +} + + +/** + * @internal + * + * Returns the local ID of the warp (relative to the block) this thread belongs + * to. + * + * @return the local ID of the warp (relative to the block) this thread belongs + * to + * + * @note Assumes that block dimensions are in cuda standard format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` + */ +__dpct_inline__ size_type get_local_warp_id(sycl::nd_item<3> item_ct1) +{ + return static_cast(item_ct1.get_local_id(0)); +} + + +/** + * @internal + * + * Returns the local ID of the sub-warp (relative to the block) this thread + * belongs to. + * + * @tparam subwarp_size size of the subwarp + * + * @return the local ID of the sub-warp (relative to the block) this thread + * belongs to + * + * @note Assumes that block dimensions are in cuda standard format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` + */ +template +__dpct_inline__ size_type get_local_subwarp_id(sycl::nd_item<3> item_ct1) +{ + // dpcpp does not have subwarp. + constexpr auto subwarps_per_warp = subwarp_size / subwarp_size; + return get_local_warp_id(item_ct1) * subwarps_per_warp + + item_ct1.get_local_id(1); +} + + +/** + * @internal + * + * Returns the local ID of the thread (relative to the block). + * to. + * + * @tparam subwarp_size size of the subwarp + * + * @return the local ID of the thread (relative to the block) + * + * @note Assumes that block dimensions are in cuda standard format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` + */ +template +__dpct_inline__ size_type get_local_thread_id(sycl::nd_item<3> item_ct1) +{ + return get_local_subwarp_id(item_ct1) * subwarp_size + + item_ct1.get_local_id(2); +} + + +/** + * @internal + * + * Returns the global ID of the warp this thread belongs to. + * + * @tparam warps_per_block number of warps within each block + * + * @return the global ID of the warp this thread belongs to. + * + * @note Assumes that block dimensions and grid dimensions are in cuda standard + * format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` and + * `(block_group_size, first_grid_dimension, second grid_dimension)`, + * respectively. + */ +template +__dpct_inline__ size_type get_warp_id(sycl::nd_item<3> item_ct1) +{ + return get_block_id(item_ct1) * warps_per_block + + get_local_warp_id(item_ct1); +} + + +/** + * @internal + * + * Returns the global ID of the sub-warp this thread belongs to. + * + * @tparam subwarp_size size of the subwarp + * + * @return the global ID of the sub-warp this thread belongs to. + * + * @note Assumes that block dimensions and grid dimensions are in cuda standard + * format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` and + * `(block_group_size, first_grid_dimension, second grid_dimension)`, + * respectively. + */ +template +__dpct_inline__ size_type get_subwarp_id(sycl::nd_item<3> item_ct1) +{ + // dpcpp does not have subwarp + constexpr auto subwarps_per_warp = subwarp_size / subwarp_size; + return get_warp_id(item_ct1) * subwarps_per_warp + + item_ct1.get_local_id(1); +} + + +/** + * @internal + * + * Returns the global ID of the thread. + * + * @return the global ID of the thread. + * + * @tparam subwarp_size size of the subwarp + * + * @note Assumes that block dimensions and grid dimensions are in cuda standard + * format: + * `(subwarp_size, config::warp_size / subwarp_size, block_size / + * config::warp_size)` and + * `(block_group_size, first_grid_dimension, second grid_dimension)`, + * respectively. + */ +template +__dpct_inline__ size_type get_thread_id(sycl::nd_item<3> item_ct1) +{ + return get_subwarp_id(item_ct1) * + subwarp_size + + item_ct1.get_local_id(2); +} + + +/** + * @internal + * + * Returns the global ID of the thread in the given index type. + * This function assumes one-dimensional thread and block indexing in cuda + * sense. It uses the third position infomation to get the information. + * + * @return the global ID of the thread in the given index type. + * + * @tparam IndexType the index type + */ +template +__dpct_inline__ IndexType get_thread_id_flat(sycl::nd_item<3> item_ct1) +{ + return item_ct1.get_local_id(2) + + static_cast(item_ct1.get_local_range().get(2)) * + item_ct1.get_group(2); +} + + +/** + * @internal + * + * Returns the total number of threads in the given index type. + * This function assumes one-dimensional thread and block indexing in cuda + * sense. It uses the third position infomation to get the information. + * + * @return the total number of threads in the given index type. + * + * @tparam IndexType the index type + */ +template +__dpct_inline__ IndexType get_thread_num_flat(sycl::nd_item<3> item_ct1) +{ + return item_ct1.get_local_range().get(2) * + static_cast(item_ct1.get_group_range(2)); +} + + +/** + * @internal + * + * Returns the global ID of the subwarp in the given index type. + * This function assumes one-dimensional thread and block indexing in cuda sense + * with a power of two block size of at least subwarp_size. + * + * @return the global ID of the subwarp in the given index type. + * + * @tparam subwarp_size the size of the subwarp. Must be a power of two! + * @tparam IndexType the index type + */ +template +__dpct_inline__ IndexType get_subwarp_id_flat(sycl::nd_item<3> item_ct1) +{ + static_assert(!(subwarp_size & (subwarp_size - 1)), + "subwarp_size must be a power of two"); + return item_ct1.get_local_id(2) / subwarp_size + + static_cast(item_ct1.get_local_range().get(2) / + subwarp_size) * + item_ct1.get_group(2); +} + + +/** + * @internal + * + * Returns the total number of subwarps in the given index type. + * This function assumes one-dimensional thread and block indexing in cuda sense + * with a power of two block size of at least subwarp_size. + * + * @return the total number of subwarps in the given index type. + * + * @tparam subwarp_size the size of the subwarp. Must be a power of two! + * @tparam IndexType the index type + */ +template +__dpct_inline__ IndexType get_subwarp_num_flat(sycl::nd_item<3> item_ct1) +{ + static_assert(!(subwarp_size & (subwarp_size - 1)), + "subwarp_size must be a power of two"); + return item_ct1.get_local_range().get(2) / subwarp_size * + static_cast(item_ct1.get_group_range(2)); +} + + +} // namespace thread +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_COMPONENTS_THREAD_IDS_DP_HPP_ diff --git a/dpcpp/components/uninitialized_array.hpp b/dpcpp/components/uninitialized_array.hpp new file mode 100644 index 00000000000..eb8a36770d7 --- /dev/null +++ b/dpcpp/components/uninitialized_array.hpp @@ -0,0 +1,121 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_COMPONENTS_UNINITIALIZED_ARRAY_HPP_ +#define GKO_DPCPP_COMPONENTS_UNINITIALIZED_ARRAY_HPP_ + + +#include + + +#include "dpcpp/base/dpct.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +// TODO: porting - consider directly use the array as shared memory + + +/** + * Stores an array with uninitialized contents. + * + * This class is needed for datatypes that do have a non-empty constructor when + * using them as shared memory, for example `thrust::complex`. + * + * @tparam ValueType the type of values + * @tparam size the size of the array + */ +template +class UninitializedArray { +public: + /** + * Operator for casting an UninitializedArray into its constexpr value + * pointer. + * + * @return the constexpr pointer to the first entry of the array. + */ + constexpr __dpct_inline__ operator const ValueType *() const noexcept + { + return &(*this)[0]; + } + + /** + * Operator for casting an UninitializedArray into its non-const value + * pointer. + * + * @return the non-const pointer to the first entry of the array. + */ + __dpct_inline__ operator ValueType *() noexcept { return &(*this)[0]; } + + /** + * constexpr array access operator. + * + * @param pos The array index. Using a value outside [0, size) is undefined + * behavior. + * + * @return a reference to the array entry at the given index. + */ + constexpr __dpct_inline__ const ValueType &operator[](size_type pos) const + noexcept + { + return data_[pos]; + } + + /** + * Non-const array access operator. + * + * @param pos The array index. Using a value outside [0, size) is undefined + * behavior. + * + * @return a reference to the array entry at the given index. + */ + __dpct_inline__ ValueType &operator[](size_type pos) noexcept + { + return data_[pos]; + } + +private: + // if dpcpp uses char to represent data in char, compiling gives error. + // Thanksfully, dpcpp support complex data allocation directly. + ValueType data_[size]; +}; + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_COMPONENTS_UNINITIALIZED_ARRAY_HPP_ diff --git a/dpcpp/factorization/factorization_kernels.dp.cpp b/dpcpp/factorization/factorization_kernels.dp.cpp new file mode 100644 index 00000000000..ebb45716c45 --- /dev/null +++ b/dpcpp/factorization/factorization_kernels.dp.cpp @@ -0,0 +1,126 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/factorization_kernels.hpp" + + +#include +#include + + +#include +#include + + +#include "core/components/prefix_sum.hpp" +#include "core/matrix/csr_builder.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The factorization namespace. + * + * @ingroup factor + */ +namespace factorization { + + +template +void find_missing_diagonal_elements( + const matrix::Csr *mtx, + IndexType *elements_to_add_per_row, + bool *changes_required) GKO_NOT_IMPLEMENTED; + + +template +void add_missing_diagonal_elements( + const matrix::Csr *mtx, ValueType *new_values, + IndexType *new_col_idxs, + const IndexType *row_ptrs_addition) GKO_NOT_IMPLEMENTED; + + +template +void add_diagonal_elements(std::shared_ptr exec, + matrix::Csr *mtx, + bool is_sorted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FACTORIZATION_ADD_DIAGONAL_ELEMENTS_KERNEL); + + +template +void initialize_row_ptrs_l_u( + std::shared_ptr exec, + const matrix::Csr *system_matrix, + IndexType *l_row_ptrs, IndexType *u_row_ptrs) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_U_KERNEL); + + +template +void initialize_l_u(std::shared_ptr exec, + const matrix::Csr *system_matrix, + matrix::Csr *csr_l, + matrix::Csr *csr_u) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FACTORIZATION_INITIALIZE_L_U_KERNEL); + + +template +void initialize_row_ptrs_l( + std::shared_ptr exec, + const matrix::Csr *system_matrix, + IndexType *l_row_ptrs) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL); + + +template +void initialize_l(std::shared_ptr exec, + const matrix::Csr *system_matrix, + matrix::Csr *csr_l, + bool diag_sqrt) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL); + + +} // namespace factorization +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/factorization/ic_kernels.dp.cpp b/dpcpp/factorization/ic_kernels.dp.cpp new file mode 100644 index 00000000000..4ea40141287 --- /dev/null +++ b/dpcpp/factorization/ic_kernels.dp.cpp @@ -0,0 +1,57 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/ic_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The ic factorization namespace. + * + * @ingroup factor + */ +namespace ic_factorization { + + +template +void compute(std::shared_ptr exec, + matrix::Csr *m) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); + + +} // namespace ic_factorization +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/factorization/ilu_kernels.dp.cpp b/dpcpp/factorization/ilu_kernels.dp.cpp new file mode 100644 index 00000000000..53a4ee2c636 --- /dev/null +++ b/dpcpp/factorization/ilu_kernels.dp.cpp @@ -0,0 +1,58 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/ilu_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The ilu factorization namespace. + * + * @ingroup factor + */ +namespace ilu_factorization { + + +template +void compute_lu(std::shared_ptr exec, + matrix::Csr *m) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); + + +} // namespace ilu_factorization +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/factorization/par_ic_kernels.dp.cpp b/dpcpp/factorization/par_ic_kernels.dp.cpp new file mode 100644 index 00000000000..9a1279927a5 --- /dev/null +++ b/dpcpp/factorization/par_ic_kernels.dp.cpp @@ -0,0 +1,75 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/par_ic_kernels.hpp" + + +#include +#include + + +#include "core/base/utils.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The parallel IC factorization namespace. + * + * @ingroup factor + */ +namespace par_ic_factorization { + + +template +void init_factor(std::shared_ptr exec, + matrix::Csr *l) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); + + +template +void compute_factor(std::shared_ptr exec, + size_type iterations, + const matrix::Coo *a_lower, + matrix::Csr *l) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); + + +} // namespace par_ic_factorization +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/factorization/par_ict_kernels.dp.cpp b/dpcpp/factorization/par_ict_kernels.dp.cpp new file mode 100644 index 00000000000..a782dba4af1 --- /dev/null +++ b/dpcpp/factorization/par_ict_kernels.dp.cpp @@ -0,0 +1,90 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/par_ict_kernels.hpp" + + +#include +#include +#include +#include + + +#include +#include +#include +#include + + +#include "core/base/utils.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/csr_builder.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The parallel ICT factorization namespace. + * + * @ingroup factor + */ +namespace par_ict_factorization { + + +template +void compute_factor(std::shared_ptr exec, + const matrix::Csr *a, + matrix::Csr *l, + const matrix::Coo *) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_ICT_COMPUTE_FACTOR_KERNEL); + + +template +void add_candidates(std::shared_ptr exec, + const matrix::Csr *llh, + const matrix::Csr *a, + const matrix::Csr *l, + matrix::Csr *l_new) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_ICT_ADD_CANDIDATES_KERNEL); + + +} // namespace par_ict_factorization +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/hip/solver/ir_kernels.hip.cpp b/dpcpp/factorization/par_ilu_kernels.dp.cpp similarity index 67% rename from hip/solver/ir_kernels.hip.cpp rename to dpcpp/factorization/par_ilu_kernels.dp.cpp index 5993c4b120b..4087eed7717 100644 --- a/hip/solver/ir_kernels.hip.cpp +++ b/dpcpp/factorization/par_ilu_kernels.dp.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,49 +30,40 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "core/solver/ir_kernels.hpp" +#include "core/factorization/par_ilu_kernels.hpp" -#include +#include #include - - -#include "hip/components/thread_ids.hip.hpp" +#include +#include namespace gko { namespace kernels { -namespace hip { +namespace dpcpp { /** - * @brief The IR solver namespace. + * @brief The parallel ILU factorization namespace. * - * @ingroup ir + * @ingroup factor */ -namespace ir { - - -constexpr int default_block_size = 512; - - -#include "common/solver/ir_kernels.hpp.inc" +namespace par_ilu_factorization { -void initialize(std::shared_ptr exec, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size(ceildiv(stop_status->get_num_elems(), block_size.x), 1, - 1); +template +void compute_l_u_factors( + std::shared_ptr exec, size_type iterations, + const matrix::Coo *system_matrix, + matrix::Csr *l_factor, + matrix::Csr *u_factor) GKO_NOT_IMPLEMENTED; - hipLaunchKernelGGL(initialize_kernel, dim3(grid_size), dim3(block_size), 0, - 0, stop_status->get_num_elems(), - stop_status->get_data()); -} +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_ILU_COMPUTE_L_U_FACTORS_KERNEL); -} // namespace ir -} // namespace hip +} // namespace par_ilu_factorization +} // namespace dpcpp } // namespace kernels } // namespace gko diff --git a/dpcpp/factorization/par_ilut_kernels.dp.cpp b/dpcpp/factorization/par_ilut_kernels.dp.cpp new file mode 100644 index 00000000000..5b224275827 --- /dev/null +++ b/dpcpp/factorization/par_ilut_kernels.dp.cpp @@ -0,0 +1,152 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/par_ilut_kernels.hpp" + + +#include +#include +#include +#include + + +#include + + +#include +#include +#include +#include + + +#include "core/base/utils.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/coo_builder.hpp" +#include "core/matrix/csr_builder.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The parallel ILUT factorization namespace. + * + * @ingroup factor + */ +namespace par_ilut_factorization { + + +template +void threshold_select(std::shared_ptr exec, + const matrix::Csr *m, + IndexType rank, Array &tmp, + Array> &, + remove_complex &threshold) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_ILUT_THRESHOLD_SELECT_KERNEL); + + +/** + * Removes all the elements from the input matrix for which pred is false. + * Stores the result in m_out and (if non-null) m_out_coo. + * pred(row, nz) is called for each entry, where nz is the index in + * values/col_idxs. + */ +template +void abstract_filter(std::shared_ptr exec, + const matrix::Csr *m, + matrix::Csr *m_out, + matrix::Coo *m_out_coo, + Predicate pred) GKO_NOT_IMPLEMENTED; + + +template +void threshold_filter(std::shared_ptr exec, + const matrix::Csr *m, + remove_complex threshold, + matrix::Csr *m_out, + matrix::Coo *m_out_coo, + bool) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_KERNEL); + + +constexpr auto bucket_count = 1 << sampleselect_searchtree_height; +constexpr auto sample_size = bucket_count * sampleselect_oversampling; + + +template +void threshold_filter_approx( + std::shared_ptr exec, + const matrix::Csr *m, IndexType rank, + Array &tmp, remove_complex &threshold, + matrix::Csr *m_out, + matrix::Coo *m_out_coo) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_APPROX_KERNEL); + + +template +void compute_l_u_factors(std::shared_ptr exec, + const matrix::Csr *a, + matrix::Csr *l, + const matrix::Coo *, + matrix::Csr *u, + const matrix::Coo *, + matrix::Csr *u_csc) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_ILUT_COMPUTE_LU_FACTORS_KERNEL); + + +template +void add_candidates(std::shared_ptr exec, + const matrix::Csr *lu, + const matrix::Csr *a, + const matrix::Csr *l, + const matrix::Csr *u, + matrix::Csr *l_new, + matrix::Csr *u_new) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_ILUT_ADD_CANDIDATES_KERNEL); + + +} // namespace par_ilut_factorization +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/get_info.cmake b/dpcpp/get_info.cmake new file mode 100644 index 00000000000..36918a3a8c6 --- /dev/null +++ b/dpcpp/get_info.cmake @@ -0,0 +1,8 @@ +ginkgo_print_module_header(${detailed_log} "DPCPP") +ginkgo_print_module_footer(${detailed_log} "DPCPP variables:") +ginkgo_print_variable(${detailed_log} "GINKGO_DPCPP_FLAGS") +ginkgo_print_variable(${detailed_log} "GINKGO_DPCPP_SINGLE_MODE") +ginkgo_print_module_footer(${detailed_log} "DPCPP environment variables:") +ginkgo_print_env_variable(${detailed_log} "SYCL_DEVICE_TYPE") +ginkgo_print_env_variable(${detailed_log} "SYCL_BE") +ginkgo_print_module_footer(${detailed_log} "") diff --git a/dpcpp/matrix/coo_kernels.dp.cpp b/dpcpp/matrix/coo_kernels.dp.cpp new file mode 100644 index 00000000000..561a0cddf8f --- /dev/null +++ b/dpcpp/matrix/coo_kernels.dp.cpp @@ -0,0 +1,540 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/coo_kernels.hpp" + + +#include + + +#include +#include +#include +#include +#include + + +#include "core/matrix/dense_kernels.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/atomic.dp.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/format_conversion.dp.hpp" +#include "dpcpp/components/segment_scan.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" + + +namespace gko { +namespace kernels { +/** + * @brief The DPCPP namespace. + * + * @ingroup dpcpp + */ +namespace dpcpp { +/** + * @brief The Coordinate matrix format namespace. + * + * @ingroup coo + */ +namespace coo { + + +constexpr int default_block_size = 256; +constexpr int warps_in_block = 4; +constexpr int spmv_block_size = warps_in_block * config::warp_size; + + +namespace { + + +/** + * The device function of COO spmv + * + * @param nnz the number of nonzeros in the matrix + * @param num_lines the maximum round of each warp + * @param val the value array of the matrix + * @param col the column index array of the matrix + * @param row the row index array of the matrix + * @param b the input dense vector + * @param b_stride the stride of the input dense vector + * @param c the output dense vector + * @param c_stride the stride of the output dense vector + * @param scale the function on the added value + * + * @tparam ValueType type of values stored in the matrix + * @tparam IndexType type of matrix indexes stored in the structure + * @tparam Closure type of the function used to write the result + */ +template +void spmv_kernel(const size_type nnz, const size_type num_lines, + const ValueType *__restrict__ val, + const IndexType *__restrict__ col, + const IndexType *__restrict__ row, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, + Closure scale, sycl::nd_item<3> item_ct1) +{ + ValueType temp_val = zero(); + const auto start = + static_cast(item_ct1.get_local_range().get(2)) * + item_ct1.get_group(2) * item_ct1.get_local_range().get(1) * + num_lines + + item_ct1.get_local_id(1) * item_ct1.get_local_range().get(2) * + num_lines; + const auto column_id = item_ct1.get_group(1); + size_type num = (nnz > start) * ceildiv(nnz - start, subgroup_size); + num = min(num, num_lines); + const IndexType ind_start = start + item_ct1.get_local_id(2); + const IndexType ind_end = ind_start + (num - 1) * subgroup_size; + IndexType ind = ind_start; + IndexType curr_row = (ind < nnz) ? row[ind] : 0; + const auto tile_block = group::tiled_partition( + group::this_thread_block(item_ct1)); + for (; ind < ind_end; ind += subgroup_size) { + temp_val += (ind < nnz) ? val[ind] * b[col[ind] * b_stride + column_id] + : zero(); + auto next_row = (ind + subgroup_size < nnz) ? row[ind + subgroup_size] + : row[nnz - 1]; + // segmented scan + if (tile_block.any(curr_row != next_row)) { + bool is_first_in_segment = + segment_scan(tile_block, curr_row, &temp_val); + if (is_first_in_segment) { + atomic_add(&(c[curr_row * c_stride + column_id]), + scale(temp_val)); + } + temp_val = zero(); + } + curr_row = next_row; + } + if (num > 0) { + ind = ind_end; + temp_val += (ind < nnz) ? val[ind] * b[col[ind] * b_stride + column_id] + : zero(); + // segmented scan + bool is_first_in_segment = + segment_scan(tile_block, curr_row, &temp_val); + if (is_first_in_segment) { + atomic_add(&(c[curr_row * c_stride + column_id]), scale(temp_val)); + } + } +} + + +template +void abstract_spmv(const size_type nnz, const size_type num_lines, + const ValueType *__restrict__ val, + const IndexType *__restrict__ col, + const IndexType *__restrict__ row, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, + sycl::nd_item<3> item_ct1) +{ + spmv_kernel( + nnz, num_lines, val, col, row, b, b_stride, c, c_stride, + [](const ValueType &x) { return x; }, item_ct1); +} + +template +void abstract_spmv(const size_type nnz, const size_type num_lines, + const ValueType *__restrict__ alpha, + const ValueType *__restrict__ val, + const IndexType *__restrict__ col, + const IndexType *__restrict__ row, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, + sycl::nd_item<3> item_ct1) +{ + ValueType scale_factor = alpha[0]; + spmv_kernel( + nnz, num_lines, val, col, row, b, b_stride, c, c_stride, + [&scale_factor](const ValueType &x) { return scale_factor * x; }, + item_ct1); +} + +GKO_ENABLE_DEFAULT_HOST(abstract_spmv, abstract_spmv); + + +/** + * The device function of COO spmm + * + * @param nnz the number of nonzeros in the matrix + * @param num_elems the maximum number of nonzeros in each warp + * @param val the value array of the matrix + * @param col the column index array of the matrix + * @param row the row index array of the matrix + * @param num_cols the number of columns of the matrix + * @param b the input dense vector + * @param b_stride the stride of the input dense vector + * @param c the output dense vector + * @param c_stride the stride of the output dense vector + * @param scale the function on the added value + * + * @tparam ValueType type of values stored in the matrix + * @tparam IndexType type of matrix indexes stored in the structure + * @tparam Closure type of the function used to write the result + */ +template +void spmm_kernel(const size_type nnz, const size_type num_elems, + const ValueType *__restrict__ val, + const IndexType *__restrict__ col, + const IndexType *__restrict__ row, const size_type num_cols, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, + Closure scale, sycl::nd_item<3> item_ct1) +{ + ValueType temp = zero(); + const auto coo_idx = + (static_cast(item_ct1.get_local_range().get(1)) * + item_ct1.get_group(2) + + item_ct1.get_local_id(1)) * + num_elems; + const auto column_id = + item_ct1.get_group(1) * item_ct1.get_local_range().get(2) + + item_ct1.get_local_id(2); + const auto coo_end = + (coo_idx + num_elems > nnz) ? nnz : coo_idx + num_elems; + if (column_id < num_cols && coo_idx < nnz) { + auto curr_row = row[coo_idx]; + auto idx = coo_idx; + for (; idx < coo_end - 1; idx++) { + temp += val[idx] * b[col[idx] * b_stride + column_id]; + const auto next_row = row[idx + 1]; + if (next_row != curr_row) { + atomic_add(&(c[curr_row * c_stride + column_id]), scale(temp)); + curr_row = next_row; + temp = zero(); + } + } + temp += val[idx] * b[col[idx] * b_stride + column_id]; + atomic_add(&(c[curr_row * c_stride + column_id]), scale(temp)); + } +} + + +template +void abstract_spmm(const size_type nnz, const size_type num_elems, + const ValueType *__restrict__ val, + const IndexType *__restrict__ col, + const IndexType *__restrict__ row, const size_type num_cols, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, + sycl::nd_item<3> item_ct1) +{ + spmm_kernel( + nnz, num_elems, val, col, row, num_cols, b, b_stride, c, c_stride, + [](const ValueType &x) { return x; }, item_ct1); +} + +template +void abstract_spmm(const size_type nnz, const size_type num_elems, + const ValueType *__restrict__ alpha, + const ValueType *__restrict__ val, + const IndexType *__restrict__ col, + const IndexType *__restrict__ row, const size_type num_cols, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, + sycl::nd_item<3> item_ct1) +{ + ValueType scale_factor = alpha[0]; + spmm_kernel( + nnz, num_elems, val, col, row, num_cols, b, b_stride, c, c_stride, + [&scale_factor](const ValueType &x) { return scale_factor * x; }, + item_ct1); +} + +GKO_ENABLE_DEFAULT_HOST(abstract_spmm, abstract_spmm); + + +} // namespace + + +namespace kernel { + + +template +void convert_row_idxs_to_ptrs(const IndexType *__restrict__ idxs, + size_type num_nonzeros, + IndexType *__restrict__ ptrs, size_type length, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + + if (tidx == 0) { + ptrs[0] = 0; + ptrs[length - 1] = num_nonzeros; + } + + if (0 < tidx && tidx < num_nonzeros) { + if (idxs[tidx - 1] < idxs[tidx]) { + for (auto i = idxs[tidx - 1] + 1; i <= idxs[tidx]; i++) { + ptrs[i] = tidx; + } + } + } +} + +// can not use GKO_ENABLE_DEFAULT_HOST, otherwise we can not inistantiate it. +template +void convert_row_idxs_to_ptrs(dim3 grid, dim3 block, + size_type dynamic_shared_memory, + sycl::queue *queue, const IndexType *idxs, + size_type num_nonzeros, IndexType *ptrs, + size_type length) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + convert_row_idxs_to_ptrs(idxs, num_nonzeros, ptrs, + length, item_ct1); + }); + }); +} + +template void convert_row_idxs_to_ptrs(dim3, dim3, size_type, sycl::queue *, + const int32 *idxs, size_type, int32 *, + size_type); +template void convert_row_idxs_to_ptrs(dim3, dim3, size_type, sycl::queue *, + const int64 *idxs, size_type, int64 *, + size_type); + +template +void initialize_zero_dense(size_type num_rows, size_type num_cols, + size_type stride, ValueType *__restrict__ result, + sycl::nd_item<3> item_ct1) +{ + const auto tidx_x = + item_ct1.get_local_id(2) + + item_ct1.get_local_range().get(2) * item_ct1.get_group(2); + const auto tidx_y = + item_ct1.get_local_id(1) + + item_ct1.get_local_range().get(1) * item_ct1.get_group(1); + if (tidx_x < num_cols && tidx_y < num_rows) { + result[tidx_y * stride + tidx_x] = zero(); + } +} + +GKO_ENABLE_DEFAULT_HOST(initialize_zero_dense, initialize_zero_dense); + + +template +void fill_in_dense(size_type nnz, const IndexType *__restrict__ row_idxs, + const IndexType *__restrict__ col_idxs, + const ValueType *__restrict__ values, size_type stride, + ValueType *__restrict__ result, sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < nnz) { + result[stride * row_idxs[tidx] + col_idxs[tidx]] = values[tidx]; + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_dense, fill_in_dense); + + +} // namespace kernel + + +template +void spmv(std::shared_ptr exec, + const matrix::Coo *a, + const matrix::Dense *b, matrix::Dense *c) +{ + dense::fill(exec, c, zero()); + spmv2(exec, a, b, c); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Coo *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) +{ + dense::scale(exec, beta, c); + advanced_spmv2(exec, alpha, a, b, c); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_COO_ADVANCED_SPMV_KERNEL); + + +template +void spmv2(std::shared_ptr exec, + const matrix::Coo *a, + const matrix::Dense *b, matrix::Dense *c) +{ + const auto nnz = a->get_num_stored_elements(); + const auto b_ncols = b->get_size()[1]; + const dim3 coo_block(config::warp_size, warps_in_block, 1); + const auto nwarps = host_kernel::calculate_nwarps(exec, nnz); + + if (nwarps > 0) { + if (b_ncols < 4) { + const dim3 coo_grid(ceildiv(nwarps, warps_in_block), b_ncols); + int num_lines = ceildiv(nnz, nwarps * config::warp_size); + abstract_spmv(coo_grid, coo_block, 0, exec->get_queue(), nnz, + num_lines, a->get_const_values(), + a->get_const_col_idxs(), a->get_const_row_idxs(), + b->get_const_values(), b->get_stride(), + c->get_values(), c->get_stride()); + } else { + int num_elems = + ceildiv(nnz, nwarps * config::warp_size) * config::warp_size; + const dim3 coo_grid(ceildiv(nwarps, warps_in_block), + ceildiv(b_ncols, config::warp_size)); + abstract_spmm(coo_grid, coo_block, 0, exec->get_queue(), nnz, + num_elems, a->get_const_values(), + a->get_const_col_idxs(), a->get_const_row_idxs(), + b_ncols, b->get_const_values(), b->get_stride(), + c->get_values(), c->get_stride()); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); + + +template +void advanced_spmv2(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Coo *a, + const matrix::Dense *b, + matrix::Dense *c) +{ + const auto nnz = a->get_num_stored_elements(); + const auto nwarps = host_kernel::calculate_nwarps(exec, nnz); + const dim3 coo_block(config::warp_size, warps_in_block, 1); + const auto b_ncols = b->get_size()[1]; + + if (nwarps > 0) { + if (b_ncols < 4) { + int num_lines = ceildiv(nnz, nwarps * config::warp_size); + const dim3 coo_grid(ceildiv(nwarps, warps_in_block), b_ncols); + abstract_spmv(coo_grid, coo_block, 0, exec->get_queue(), nnz, + num_lines, alpha->get_const_values(), + a->get_const_values(), a->get_const_col_idxs(), + a->get_const_row_idxs(), b->get_const_values(), + b->get_stride(), c->get_values(), c->get_stride()); + } else { + int num_elems = + ceildiv(nnz, nwarps * config::warp_size) * config::warp_size; + const dim3 coo_grid(ceildiv(nwarps, warps_in_block), + ceildiv(b_ncols, config::warp_size)); + abstract_spmm(coo_grid, coo_block, 0, exec->get_queue(), nnz, + num_elems, alpha->get_const_values(), + a->get_const_values(), a->get_const_col_idxs(), + a->get_const_row_idxs(), b_ncols, + b->get_const_values(), b->get_stride(), + c->get_values(), c->get_stride()); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_COO_ADVANCED_SPMV2_KERNEL); + + +template +void convert_row_idxs_to_ptrs(std::shared_ptr exec, + const IndexType *idxs, size_type num_nonzeros, + IndexType *ptrs, size_type length) +{ + const auto grid_dim = ceildiv(num_nonzeros, default_block_size); + + kernel::convert_row_idxs_to_ptrs(grid_dim, default_block_size, 0, + exec->get_queue(), idxs, num_nonzeros, + ptrs, length); +} + + +template +void convert_to_csr(std::shared_ptr exec, + const matrix::Coo *source, + matrix::Csr *result) +{ + auto num_rows = result->get_size()[0]; + + auto row_ptrs = result->get_row_ptrs(); + const auto nnz = result->get_num_stored_elements(); + + const auto source_row_idxs = source->get_const_row_idxs(); + + convert_row_idxs_to_ptrs(exec, source_row_idxs, nnz, row_ptrs, + num_rows + 1); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_COO_CONVERT_TO_CSR_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Coo *source, + matrix::Dense *result) +{ + const auto num_rows = result->get_size()[0]; + const auto num_cols = result->get_size()[1]; + const auto stride = result->get_stride(); + + const auto nnz = source->get_num_stored_elements(); + + const dim3 block_size(config::warp_size, + config::max_block_size / config::warp_size, 1); + const dim3 init_grid_dim(ceildiv(num_cols, block_size.x), + ceildiv(num_rows, block_size.y), 1); + kernel::initialize_zero_dense(init_grid_dim, block_size, 0, + exec->get_queue(), num_rows, num_cols, stride, + result->get_values()); + + const auto grid_dim = ceildiv(nnz, default_block_size); + kernel::fill_in_dense( + grid_dim, default_block_size, 0, exec->get_queue(), nnz, + source->get_const_row_idxs(), source->get_const_col_idxs(), + source->get_const_values(), stride, result->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_COO_CONVERT_TO_DENSE_KERNEL); + + +} // namespace coo +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp new file mode 100644 index 00000000000..3bbd03de871 --- /dev/null +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -0,0 +1,2668 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/csr_kernels.hpp" + + +#include + + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/base/utils.hpp" +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/csr_builder.hpp" +#include "core/matrix/dense_kernels.hpp" +#include "core/synthesizer/implementation_selection.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/dpct.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/atomic.dp.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/segment_scan.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" +#include "dpcpp/components/uninitialized_array.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Compressed sparse row matrix format namespace. + * + * @ingroup csr + */ +namespace csr { + + +constexpr int default_block_size = 256; +constexpr int warps_in_block = 4; +constexpr int spmv_block_size = warps_in_block * config::warp_size; +constexpr int classical_overweight = 32; + + +/** + * A compile-time list of the number items per threads for which spmv kernel + * should be compiled. + */ +using compiled_kernels = syn::value_list; + +using classical_kernels = syn::value_list; + + +namespace kernel { + + +template +__dpct_inline__ T ceildivT(T nom, T denom) +{ + return (nom + denom - 1ll) / denom; +} + + +template +__dpct_inline__ bool block_segment_scan_reverse( + const IndexType *__restrict__ ind, ValueType *__restrict__ val, + sycl::nd_item<3> item_ct1) +{ + bool last = true; + const auto reg_ind = ind[item_ct1.get_local_id(2)]; +#pragma unroll + for (int i = 1; i < spmv_block_size; i <<= 1) { + if (i == 1 && item_ct1.get_local_id(2) < spmv_block_size - 1 && + reg_ind == ind[item_ct1.get_local_id(2) + 1]) { + last = false; + } + auto temp = zero(); + if (item_ct1.get_local_id(2) >= i && + reg_ind == ind[item_ct1.get_local_id(2) - i]) { + temp = val[item_ct1.get_local_id(2) - i]; + } + group::this_thread_block(item_ct1).sync(); + val[item_ct1.get_local_id(2)] += temp; + group::this_thread_block(item_ct1).sync(); + } + + return last; +} + + +template +__dpct_inline__ void find_next_row( + const IndexType num_rows, const IndexType data_size, const IndexType ind, + IndexType *__restrict__ row, IndexType *__restrict__ row_end, + const IndexType row_predict, const IndexType row_predict_end, + const IndexType *__restrict__ row_ptr) +{ + if (!overflow || ind < data_size) { + if (ind >= *row_end) { + *row = row_predict; + *row_end = row_predict_end; + while (ind >= *row_end) { + *row_end = row_ptr[++*row + 1]; + } + } + + } else { + *row = num_rows - 1; + *row_end = data_size; + } +} + + +template +__dpct_inline__ void warp_atomic_add( + const group::thread_block_tile &group, bool force_write, + ValueType *__restrict__ val, const IndexType row, ValueType *__restrict__ c, + const size_type c_stride, const IndexType column_id, Closure scale) +{ + // do a local scan to avoid atomic collisions + const bool need_write = segment_scan(group, row, val); + if (need_write && force_write) { + atomic_add(&(c[row * c_stride + column_id]), scale(*val)); + } + if (!need_write || force_write) { + *val = zero(); + } +} + + +template +__dpct_inline__ void process_window( + const group::thread_block_tile &group, + const IndexType num_rows, const IndexType data_size, const IndexType ind, + IndexType *__restrict__ row, IndexType *__restrict__ row_end, + IndexType *__restrict__ nrow, IndexType *__restrict__ nrow_end, + ValueType *__restrict__ temp_val, const ValueType *__restrict__ val, + const IndexType *__restrict__ col_idxs, + const IndexType *__restrict__ row_ptrs, const ValueType *__restrict__ b, + const size_type b_stride, ValueType *__restrict__ c, + const size_type c_stride, const IndexType column_id, Closure scale) +{ + const IndexType curr_row = *row; + find_next_row(num_rows, data_size, ind, row, row_end, *nrow, + *nrow_end, row_ptrs); + // segmented scan + if (group.any(curr_row != *row)) { + warp_atomic_add(group, curr_row != *row, temp_val, curr_row, c, + c_stride, column_id, scale); + *nrow = group.shfl(*row, subgroup_size - 1); + *nrow_end = group.shfl(*row_end, subgroup_size - 1); + } + + if (!last || ind < data_size) { + const auto col = col_idxs[ind]; + *temp_val += val[ind] * b[col * b_stride + column_id]; + } +} + + +template +__dpct_inline__ IndexType get_warp_start_idx(const IndexType nwarps, + const IndexType nnz, + const IndexType warp_idx) +{ + const long long cache_lines = ceildivT(nnz, config::warp_size); + return (warp_idx * cache_lines / nwarps) * config::warp_size; +} + + +template +__dpct_inline__ void spmv_kernel( + const IndexType nwarps, const IndexType num_rows, + const ValueType *__restrict__ val, const IndexType *__restrict__ col_idxs, + const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, Closure scale, + sycl::nd_item<3> item_ct1) +{ + const IndexType warp_idx = + item_ct1.get_group(2) * warps_in_block + item_ct1.get_local_id(1); + const IndexType column_id = item_ct1.get_group(1); + if (warp_idx >= nwarps) { + return; + } + const IndexType data_size = row_ptrs[num_rows]; + const IndexType start = get_warp_start_idx(nwarps, data_size, warp_idx); + constexpr IndexType wsize = config::warp_size; + const IndexType end = + min(get_warp_start_idx(nwarps, data_size, warp_idx + 1), + ceildivT(data_size, wsize) * wsize); + auto row = srow[warp_idx]; + auto row_end = row_ptrs[row + 1]; + auto nrow = row; + auto nrow_end = row_end; + ValueType temp_val = zero(); + IndexType ind = start + item_ct1.get_local_id(2); + find_next_row(num_rows, data_size, ind, &row, &row_end, nrow, + nrow_end, row_ptrs); + const IndexType ind_end = end - wsize; + const auto tile_block = + group::tiled_partition(group::this_thread_block(item_ct1)); + for (; ind < ind_end; ind += wsize) { + process_window(tile_block, num_rows, data_size, ind, &row, + &row_end, &nrow, &nrow_end, &temp_val, val, + col_idxs, row_ptrs, b, b_stride, c, c_stride, + column_id, scale); + } + process_window(tile_block, num_rows, data_size, ind, &row, &row_end, + &nrow, &nrow_end, &temp_val, val, col_idxs, row_ptrs, + b, b_stride, c, c_stride, column_id, scale); + warp_atomic_add(tile_block, true, &temp_val, row, c, c_stride, column_id, + scale); +} + + +template +void abstract_spmv(const IndexType nwarps, const IndexType num_rows, + const ValueType *__restrict__ val, + const IndexType *__restrict__ col_idxs, + const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ srow, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, + sycl::nd_item<3> item_ct1) +{ + spmv_kernel( + nwarps, num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, + c_stride, [](const ValueType &x) { return x; }, item_ct1); +} + +template +void abstract_spmv(const IndexType nwarps, const IndexType num_rows, + const ValueType *__restrict__ alpha, + const ValueType *__restrict__ val, + const IndexType *__restrict__ col_idxs, + const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ srow, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, + sycl::nd_item<3> item_ct1) +{ + ValueType scale_factor = alpha[0]; + spmv_kernel( + nwarps, num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, + c_stride, + [&scale_factor](const ValueType &x) { return scale_factor * x; }, + item_ct1); +} + +GKO_ENABLE_DEFAULT_HOST(abstract_spmv, abstract_spmv); + + +template +__dpct_inline__ void merge_path_search( + const IndexType diagonal, const IndexType a_len, const IndexType b_len, + const IndexType *__restrict__ a, const IndexType offset_b, + IndexType *__restrict__ x, IndexType *__restrict__ y) +{ + auto x_min = max(diagonal - b_len, zero()); + auto x_max = min(diagonal, a_len); + while (x_min < x_max) { + auto pivot = x_min + (x_max - x_min) / 2; + if (a[pivot] <= offset_b + diagonal - pivot - 1) { + x_min = pivot + 1; + } else { + x_max = pivot; + } + } + + *x = min(x_min, a_len); + *y = diagonal - x_min; +} + + +template +void merge_path_reduce(const IndexType nwarps, + const ValueType *__restrict__ last_val, + const IndexType *__restrict__ last_row, + ValueType *__restrict__ c, const size_type c_stride, + Alpha_op alpha_op, sycl::nd_item<3> item_ct1, + UninitializedArray &tmp_ind, + UninitializedArray &tmp_val) +{ + const IndexType cache_lines = ceildivT(nwarps, spmv_block_size); + const IndexType tid = item_ct1.get_local_id(2); + const IndexType start = min(tid * cache_lines, nwarps); + const IndexType end = min((tid + 1) * cache_lines, nwarps); + ValueType value = zero(); + IndexType row = last_row[nwarps - 1]; + if (start < nwarps) { + value = last_val[start]; + row = last_row[start]; + for (IndexType i = start + 1; i < end; i++) { + if (last_row[i] != row) { + c[row * c_stride] += alpha_op(value); + row = last_row[i]; + value = last_val[i]; + } else { + value += last_val[i]; + } + } + } + + + tmp_val[item_ct1.get_local_id(2)] = value; + tmp_ind[item_ct1.get_local_id(2)] = row; + group::this_thread_block(item_ct1).sync(); + bool last = + block_segment_scan_reverse(static_cast(tmp_ind), + static_cast(tmp_val), item_ct1); + group::this_thread_block(item_ct1).sync(); + if (last) { + c[row * c_stride] += alpha_op(tmp_val[item_ct1.get_local_id(2)]); + } +} + + +template +void merge_path_spmv(const IndexType num_rows, + const ValueType *__restrict__ val, + const IndexType *__restrict__ col_idxs, + const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ srow, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, + IndexType *__restrict__ row_out, + ValueType *__restrict__ val_out, Alpha_op alpha_op, + Beta_op beta_op, sycl::nd_item<3> item_ct1, + IndexType *shared_row_ptrs) +{ + const auto *row_end_ptrs = row_ptrs + 1; + const auto nnz = row_ptrs[num_rows]; + const IndexType num_merge_items = num_rows + nnz; + const auto block_items = spmv_block_size * items_per_thread; + + const IndexType diagonal = + min(IndexType(block_items * item_ct1.get_group(2)), num_merge_items); + const IndexType diagonal_end = min(diagonal + block_items, num_merge_items); + IndexType block_start_x; + IndexType block_start_y; + IndexType end_x; + IndexType end_y; + merge_path_search(diagonal, num_rows, nnz, row_end_ptrs, zero(), + &block_start_x, &block_start_y); + merge_path_search(diagonal_end, num_rows, nnz, row_end_ptrs, + zero(), &end_x, &end_y); + const IndexType block_num_rows = end_x - block_start_x; + const IndexType block_num_nonzeros = end_y - block_start_y; + for (int i = item_ct1.get_local_id(2); + i < block_num_rows && block_start_x + i < num_rows; + i += spmv_block_size) { + shared_row_ptrs[i] = row_end_ptrs[block_start_x + i]; + } + group::this_thread_block(item_ct1).sync(); + + IndexType start_x; + IndexType start_y; + merge_path_search(IndexType(items_per_thread * item_ct1.get_local_id(2)), + block_num_rows, block_num_nonzeros, shared_row_ptrs, + block_start_y, &start_x, &start_y); + + + IndexType ind = block_start_y + start_y; + IndexType row_i = block_start_x + start_x; + ValueType value = zero(); +#pragma unroll + for (IndexType i = 0; i < items_per_thread; i++) { + if (row_i < num_rows) { + if (start_x == block_num_rows || ind < shared_row_ptrs[start_x]) { + value += val[ind] * b[col_idxs[ind] * b_stride]; + ind++; + } else { + c[row_i * c_stride] = + alpha_op(value) + beta_op(c[row_i * c_stride]); + start_x++; + row_i++; + value = zero(); + } + } + } + group::this_thread_block(item_ct1).sync(); + IndexType *tmp_ind = shared_row_ptrs; + ValueType *tmp_val = + reinterpret_cast(shared_row_ptrs + spmv_block_size); + tmp_val[item_ct1.get_local_id(2)] = value; + tmp_ind[item_ct1.get_local_id(2)] = row_i; + group::this_thread_block(item_ct1).sync(); + bool last = block_segment_scan_reverse(tmp_ind, tmp_val, item_ct1); + if (item_ct1.get_local_id(2) == spmv_block_size - 1) { + row_out[item_ct1.get_group(2)] = min(end_x, num_rows - 1); + val_out[item_ct1.get_group(2)] = tmp_val[item_ct1.get_local_id(2)]; + } else if (last) { + c[row_i * c_stride] += alpha_op(tmp_val[item_ct1.get_local_id(2)]); + } +} + +template +void abstract_merge_path_spmv( + const IndexType num_rows, const ValueType *__restrict__ val, + const IndexType *__restrict__ col_idxs, + const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, + const ValueType *__restrict__ b, const size_type b_stride, + ValueType *__restrict__ c, const size_type c_stride, + IndexType *__restrict__ row_out, ValueType *__restrict__ val_out, + sycl::nd_item<3> item_ct1, IndexType *shared_row_ptrs) +{ + merge_path_spmv( + num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, c_stride, + row_out, val_out, [](ValueType &x) { return x; }, + [](ValueType &x) { return zero(); }, item_ct1, + shared_row_ptrs); +} + +template +void abstract_merge_path_spmv(dim3 grid, dim3 block, + size_type dynamic_shared_memory, + sycl::queue *queue, const IndexType num_rows, + const ValueType *val, const IndexType *col_idxs, + const IndexType *row_ptrs, const IndexType *srow, + const ValueType *b, const size_type b_stride, + ValueType *c, const size_type c_stride, + IndexType *row_out, ValueType *val_out) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor + shared_row_ptrs_acc_ct1( + sycl::range<1>(spmv_block_size * items_per_thread), cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + abstract_merge_path_spmv( + num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, + c_stride, row_out, val_out, item_ct1, + static_cast( + shared_row_ptrs_acc_ct1.get_pointer())); + }); + }); +} + + +template +void abstract_merge_path_spmv( + const IndexType num_rows, const ValueType *__restrict__ alpha, + const ValueType *__restrict__ val, const IndexType *__restrict__ col_idxs, + const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, + const ValueType *__restrict__ b, const size_type b_stride, + const ValueType *__restrict__ beta, ValueType *__restrict__ c, + const size_type c_stride, IndexType *__restrict__ row_out, + ValueType *__restrict__ val_out, sycl::nd_item<3> item_ct1, + IndexType *shared_row_ptrs) +{ + const auto alpha_val = alpha[0]; + const auto beta_val = beta[0]; + merge_path_spmv( + num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, c_stride, + row_out, val_out, [&alpha_val](ValueType &x) { return alpha_val * x; }, + [&beta_val](ValueType &x) { return beta_val * x; }, item_ct1, + shared_row_ptrs); +} + +template +void abstract_merge_path_spmv( + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + const IndexType num_rows, const ValueType *alpha, const ValueType *val, + const IndexType *col_idxs, const IndexType *row_ptrs, const IndexType *srow, + const ValueType *b, const size_type b_stride, const ValueType *beta, + ValueType *c, const size_type c_stride, IndexType *row_out, + ValueType *val_out) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor + shared_row_ptrs_acc_ct1( + sycl::range<1>(spmv_block_size * items_per_thread), cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + abstract_merge_path_spmv( + num_rows, alpha, val, col_idxs, row_ptrs, srow, b, b_stride, + beta, c, c_stride, row_out, val_out, item_ct1, + static_cast( + shared_row_ptrs_acc_ct1.get_pointer())); + }); + }); +} + + +template +void abstract_reduce(const IndexType nwarps, + const ValueType *__restrict__ last_val, + const IndexType *__restrict__ last_row, + ValueType *__restrict__ c, const size_type c_stride, + sycl::nd_item<3> item_ct1, + UninitializedArray &tmp_ind, + UninitializedArray &tmp_val) +{ + merge_path_reduce( + nwarps, last_val, last_row, c, c_stride, [](ValueType &x) { return x; }, + item_ct1, tmp_ind, tmp_val); +} + +template +void abstract_reduce(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, const IndexType nwarps, + const ValueType *last_val, const IndexType *last_row, + ValueType *c, const size_type c_stride) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access_mode::read_write, + sycl::access::target::local> + tmp_ind_acc_ct1(cgh); + sycl::accessor, 0, + sycl::access_mode::read_write, + sycl::access::target::local> + tmp_val_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + abstract_reduce(nwarps, last_val, last_row, c, c_stride, + item_ct1, *tmp_ind_acc_ct1.get_pointer(), + *tmp_val_acc_ct1.get_pointer()); + }); + }); +} + + +template +void abstract_reduce(const IndexType nwarps, + const ValueType *__restrict__ last_val, + const IndexType *__restrict__ last_row, + const ValueType *__restrict__ alpha, + ValueType *__restrict__ c, const size_type c_stride, + sycl::nd_item<3> item_ct1, + UninitializedArray &tmp_ind, + UninitializedArray &tmp_val) +{ + const auto alpha_val = alpha[0]; + merge_path_reduce( + nwarps, last_val, last_row, c, c_stride, + [&alpha_val](ValueType &x) { return alpha_val * x; }, item_ct1, tmp_ind, + tmp_val); +} + +template +void abstract_reduce(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, const IndexType nwarps, + const ValueType *last_val, const IndexType *last_row, + const ValueType *alpha, ValueType *c, + const size_type c_stride) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access_mode::read_write, + sycl::access::target::local> + tmp_ind_acc_ct1(cgh); + sycl::accessor, 0, + sycl::access_mode::read_write, + sycl::access::target::local> + tmp_val_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + abstract_reduce(nwarps, last_val, last_row, alpha, c, c_stride, + item_ct1, *tmp_ind_acc_ct1.get_pointer(), + *tmp_val_acc_ct1.get_pointer()); + }); + }); +} + + +template +void device_classical_spmv(const size_type num_rows, + const ValueType *__restrict__ val, + const IndexType *__restrict__ col_idxs, + const IndexType *__restrict__ row_ptrs, + const ValueType *__restrict__ b, + const size_type b_stride, ValueType *__restrict__ c, + const size_type c_stride, Closure scale, + sycl::nd_item<3> item_ct1) +{ + auto subgroup_tile = group::tiled_partition( + group::this_thread_block(item_ct1)); + const auto subrow = thread::get_subwarp_num_flat(item_ct1); + const auto subid = subgroup_tile.thread_rank(); + const auto column_id = item_ct1.get_group(1); + auto row = thread::get_subwarp_id_flat(item_ct1); + for (; row < num_rows; row += subrow) { + const auto ind_end = row_ptrs[row + 1]; + ValueType temp_val = zero(); + for (auto ind = row_ptrs[row] + subid; ind < ind_end; + ind += subgroup_size) { + temp_val += val[ind] * b[col_idxs[ind] * b_stride + column_id]; + } + auto subgroup_result = ::gko::kernels::dpcpp::reduce( + subgroup_tile, temp_val, + [](const ValueType &a, const ValueType &b) { return a + b; }); + // TODO: check the barrier + subgroup_tile.sync(); + if (subid == 0) { + c[row * c_stride + column_id] = + scale(subgroup_result, c[row * c_stride + column_id]); + } + } +} + + +template +void abstract_classical_spmv( + const size_type num_rows, const ValueType *__restrict__ val, + const IndexType *__restrict__ col_idxs, + const IndexType *__restrict__ row_ptrs, const ValueType *__restrict__ b, + const size_type b_stride, ValueType *__restrict__ c, + const size_type c_stride, sycl::nd_item<3> item_ct1) +{ + device_classical_spmv( + num_rows, val, col_idxs, row_ptrs, b, b_stride, c, c_stride, + [](const ValueType &x, const ValueType &y) { return x; }, item_ct1); +} + +template +void abstract_classical_spmv(dim3 grid, dim3 block, + size_type dynamic_shared_memory, + sycl::queue *queue, const size_type num_rows, + const ValueType *val, const IndexType *col_idxs, + const IndexType *row_ptrs, const ValueType *b, + const size_type b_stride, ValueType *c, + const size_type c_stride) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + abstract_classical_spmv(num_rows, val, col_idxs, + row_ptrs, b, b_stride, c, + c_stride, item_ct1); + }); + }); +} + + +template +void abstract_classical_spmv( + const size_type num_rows, const ValueType *__restrict__ alpha, + const ValueType *__restrict__ val, const IndexType *__restrict__ col_idxs, + const IndexType *__restrict__ row_ptrs, const ValueType *__restrict__ b, + const size_type b_stride, const ValueType *__restrict__ beta, + ValueType *__restrict__ c, const size_type c_stride, + sycl::nd_item<3> item_ct1) +{ + const auto alpha_val = alpha[0]; + const auto beta_val = beta[0]; + device_classical_spmv( + num_rows, val, col_idxs, row_ptrs, b, b_stride, c, c_stride, + [&alpha_val, &beta_val](const ValueType &x, const ValueType &y) { + return alpha_val * x + beta_val * y; + }, + item_ct1); +} + +template +void abstract_classical_spmv(dim3 grid, dim3 block, + size_type dynamic_shared_memory, + sycl::queue *queue, const size_type num_rows, + const ValueType *alpha, const ValueType *val, + const IndexType *col_idxs, + const IndexType *row_ptrs, const ValueType *b, + const size_type b_stride, const ValueType *beta, + ValueType *c, const size_type c_stride) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + abstract_classical_spmv( + num_rows, alpha, val, col_idxs, row_ptrs, b, + b_stride, beta, c, c_stride, item_ct1); + }); + }); +} + + +template +void convert_row_ptrs_to_idxs(size_type num_rows, + const IndexType *__restrict__ ptrs, + IndexType *__restrict__ idxs, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num_rows) { + for (auto i = ptrs[tidx]; i < ptrs[tidx + 1]; i++) { + idxs[i] = tidx; + } + } +} + +GKO_ENABLE_DEFAULT_HOST(convert_row_ptrs_to_idxs, convert_row_ptrs_to_idxs); + + +template +void initialize_zero_dense(size_type num_rows, size_type num_cols, + size_type stride, ValueType *__restrict__ result, + sycl::nd_item<3> item_ct1) +{ + const auto tidx_x = + item_ct1.get_local_id(2) + + item_ct1.get_local_range().get(2) * item_ct1.get_group(2); + const auto tidx_y = + item_ct1.get_local_id(1) + + item_ct1.get_local_range().get(1) * item_ct1.get_group(1); + if (tidx_x < num_cols && tidx_y < num_rows) { + result[tidx_y * stride + tidx_x] = zero(); + } +} + +GKO_ENABLE_DEFAULT_HOST(initialize_zero_dense, initialize_zero_dense); + + +template +void fill_in_dense(size_type num_rows, const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ col_idxs, + const ValueType *__restrict__ values, size_type stride, + ValueType *__restrict__ result, sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num_rows) { + for (auto i = row_ptrs[tidx]; i < row_ptrs[tidx + 1]; i++) { + result[stride * tidx + col_idxs[i]] = values[i]; + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_dense, fill_in_dense); + + +template +void calculate_nnz_per_row(size_type num_rows, + const IndexType *__restrict__ row_ptrs, + size_type *__restrict__ nnz_per_row, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num_rows) { + nnz_per_row[tidx] = row_ptrs[tidx + 1] - row_ptrs[tidx]; + } +} + +GKO_ENABLE_DEFAULT_HOST(calculate_nnz_per_row, calculate_nnz_per_row); + + +void calculate_slice_lengths(size_type num_rows, size_type slice_size, + size_type stride_factor, + const size_type *__restrict__ nnz_per_row, + size_type *__restrict__ slice_lengths, + size_type *__restrict__ slice_sets, + sycl::nd_item<3> item_ct1) +{ + constexpr auto warp_size = config::warp_size; + const auto sliceid = item_ct1.get_group(2); + const auto tid_in_warp = item_ct1.get_local_id(2); + + if (sliceid * slice_size + tid_in_warp < num_rows) { + size_type thread_result = 0; + for (int i = tid_in_warp; i < slice_size; i += warp_size) { + thread_result = + (i + slice_size * sliceid < num_rows) + ? max(thread_result, nnz_per_row[sliceid * slice_size + i]) + : thread_result; + } + + auto warp_tile = group::tiled_partition( + group::this_thread_block(item_ct1)); + auto warp_result = ::gko::kernels::dpcpp::reduce( + warp_tile, thread_result, + [](const size_type &a, const size_type &b) { return max(a, b); }); + + if (tid_in_warp == 0) { + auto slice_length = + ceildiv(warp_result, stride_factor) * stride_factor; + slice_lengths[sliceid] = slice_length; + slice_sets[sliceid] = slice_length; + } + } +} + +GKO_ENABLE_DEFAULT_HOST(calculate_slice_lengths, calculate_slice_lengths); + + +template +void fill_in_sellp(size_type num_rows, size_type slice_size, + const ValueType *__restrict__ source_values, + const IndexType *__restrict__ source_row_ptrs, + const IndexType *__restrict__ source_col_idxs, + size_type *__restrict__ slice_lengths, + size_type *__restrict__ slice_sets, + IndexType *__restrict__ result_col_idxs, + ValueType *__restrict__ result_values, + sycl::nd_item<3> item_ct1) +{ + const auto global_row = thread::get_thread_id_flat(item_ct1); + const auto row = global_row % slice_size; + const auto sliceid = global_row / slice_size; + + if (global_row < num_rows) { + size_type sellp_ind = slice_sets[sliceid] * slice_size + row; + + for (size_type csr_ind = source_row_ptrs[global_row]; + csr_ind < source_row_ptrs[global_row + 1]; csr_ind++) { + result_values[sellp_ind] = source_values[csr_ind]; + result_col_idxs[sellp_ind] = source_col_idxs[csr_ind]; + sellp_ind += slice_size; + } + for (size_type i = sellp_ind; + i < + (slice_sets[sliceid] + slice_lengths[sliceid]) * slice_size + row; + i += slice_size) { + result_col_idxs[i] = 0; + result_values[i] = zero(); + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_sellp, fill_in_sellp); + + +template +void initialize_zero_ell(size_type max_nnz_per_row, size_type stride, + ValueType *__restrict__ values, + IndexType *__restrict__ col_idxs, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + + if (tidx < stride * max_nnz_per_row) { + values[tidx] = zero(); + col_idxs[tidx] = 0; + } +} + +GKO_ENABLE_DEFAULT_HOST(initialize_zero_ell, initialize_zero_ell); + + +template +void fill_in_ell(size_type num_rows, size_type stride, + const ValueType *__restrict__ source_values, + const IndexType *__restrict__ source_row_ptrs, + const IndexType *__restrict__ source_col_idxs, + ValueType *__restrict__ result_values, + IndexType *__restrict__ result_col_idxs, + sycl::nd_item<3> item_ct1) +{ + constexpr auto warp_size = config::warp_size; + const auto row = thread::get_subwarp_id_flat(item_ct1); + const auto local_tidx = item_ct1.get_local_id(2) % warp_size; + + if (row < num_rows) { + for (size_type i = local_tidx; + i < source_row_ptrs[row + 1] - source_row_ptrs[row]; + i += warp_size) { + const auto result_idx = row + stride * i; + const auto source_idx = i + source_row_ptrs[row]; + result_values[result_idx] = source_values[source_idx]; + result_col_idxs[result_idx] = source_col_idxs[source_idx]; + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_ell, fill_in_ell); + + +void reduce_max_nnz_per_slice(size_type num_rows, size_type slice_size, + size_type stride_factor, + const size_type *__restrict__ nnz_per_row, + size_type *__restrict__ result, + sycl::nd_item<3> item_ct1) +{ + constexpr auto warp_size = config::warp_size; + auto warp_tile = + group::tiled_partition(group::this_thread_block(item_ct1)); + const auto warpid = thread::get_subwarp_id_flat(item_ct1); + const auto tid_in_warp = warp_tile.thread_rank(); + const auto slice_num = ceildiv(num_rows, slice_size); + + size_type thread_result = 0; + for (auto i = tid_in_warp; i < slice_size; i += warp_size) { + if (warpid * slice_size + i < num_rows) { + thread_result = + max(thread_result, nnz_per_row[warpid * slice_size + i]); + } + } + auto warp_result = ::gko::kernels::dpcpp::reduce( + warp_tile, thread_result, + [](const size_type &a, const size_type &b) { return max(a, b); }); + + if (tid_in_warp == 0 && warpid < slice_num) { + result[warpid] = ceildiv(warp_result, stride_factor) * stride_factor; + } +} + +GKO_ENABLE_DEFAULT_HOST(reduce_max_nnz_per_slice, reduce_max_nnz_per_slice); + + +void reduce_total_cols(size_type num_slices, + const size_type *__restrict__ max_nnz_per_slice, + size_type *__restrict__ result, + sycl::nd_item<3> item_ct1, size_type *block_result) +{ + reduce_array(num_slices, max_nnz_per_slice, block_result, item_ct1, + [](const size_type &x, const size_type &y) { return x + y; }); + + if (item_ct1.get_local_id(2) == 0) { + result[item_ct1.get_group(2)] = block_result[0]; + } +} + +void reduce_total_cols(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_slices, + const size_type *max_nnz_per_slice, size_type *result) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor + block_result_acc_ct1(sycl::range<1>(default_block_size), cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + reduce_total_cols(num_slices, max_nnz_per_slice, result, + item_ct1, block_result_acc_ct1.get_pointer()); + }); + }); +} + + +void reduce_max_nnz(size_type size, const size_type *__restrict__ nnz_per_row, + size_type *__restrict__ result, sycl::nd_item<3> item_ct1, + size_type *block_max) +{ + reduce_array( + size, nnz_per_row, block_max, item_ct1, + [](const size_type &x, const size_type &y) { return max(x, y); }); + + if (item_ct1.get_local_id(2) == 0) { + result[item_ct1.get_group(2)] = block_max[0]; + } +} + +void reduce_max_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type size, + const size_type *nnz_per_row, size_type *result) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor + block_max_acc_ct1(sycl::range<1>(default_block_size), cgh); + + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + reduce_max_nnz(size, nnz_per_row, result, item_ct1, + block_max_acc_ct1.get_pointer()); + }); + }); +} + + +template +void calculate_hybrid_coo_row_nnz(size_type num_rows, + size_type ell_max_nnz_per_row, + IndexType *__restrict__ csr_row_idxs, + size_type *__restrict__ coo_row_nnz, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num_rows) { + const size_type csr_nnz = csr_row_idxs[tidx + 1] - csr_row_idxs[tidx]; + coo_row_nnz[tidx] = + (csr_nnz > ell_max_nnz_per_row) * (csr_nnz - ell_max_nnz_per_row); + } +} + +GKO_ENABLE_DEFAULT_HOST(calculate_hybrid_coo_row_nnz, + calculate_hybrid_coo_row_nnz); + + +template +void fill_in_hybrid(size_type num_rows, size_type stride, + size_type ell_max_nnz_per_row, + const ValueType *__restrict__ source_values, + const IndexType *__restrict__ source_row_ptrs, + const IndexType *__restrict__ source_col_idxs, + const size_type *__restrict__ coo_offset, + ValueType *__restrict__ result_ell_val, + IndexType *__restrict__ result_ell_col, + ValueType *__restrict__ result_coo_val, + IndexType *__restrict__ result_coo_col, + IndexType *__restrict__ result_coo_row, + sycl::nd_item<3> item_ct1) +{ + constexpr auto warp_size = config::warp_size; + const auto row = thread::get_subwarp_id_flat(item_ct1); + const auto local_tidx = item_ct1.get_local_id(2) % warp_size; + + if (row < num_rows) { + for (size_type i = local_tidx; + i < source_row_ptrs[row + 1] - source_row_ptrs[row]; + i += warp_size) { + const auto source_idx = i + source_row_ptrs[row]; + if (i < ell_max_nnz_per_row) { + const auto result_idx = row + stride * i; + result_ell_val[result_idx] = source_values[source_idx]; + result_ell_col[result_idx] = source_col_idxs[source_idx]; + } else { + const auto result_idx = + coo_offset[row] + i - ell_max_nnz_per_row; + result_coo_val[result_idx] = source_values[source_idx]; + result_coo_col[result_idx] = source_col_idxs[source_idx]; + result_coo_row[result_idx] = row; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_hybrid, fill_in_hybrid); + + +template +void check_unsorted(const IndexType *__restrict__ row_ptrs, + const IndexType *__restrict__ col_idxs, IndexType num_rows, + bool *flag, sycl::nd_item<3> item_ct1, bool *sh_flag) +{ + auto block = group::this_thread_block(item_ct1); + if (block.thread_rank() == 0) { + *sh_flag = *flag; + } + block.sync(); + + auto row = thread::get_thread_id_flat(item_ct1); + if (row >= num_rows) { + return; + } + + // fail early + if ((*sh_flag)) { + for (auto nz = row_ptrs[row]; nz < row_ptrs[row + 1] - 1; ++nz) { + if (col_idxs[nz] > col_idxs[nz + 1]) { + *flag = false; + *sh_flag = false; + return; + } + } + } +} + +template +void check_unsorted(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, const IndexType *row_ptrs, + const IndexType *col_idxs, IndexType num_rows, bool *flag) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor + sh_flag_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + check_unsorted(row_ptrs, col_idxs, num_rows, flag, item_ct1, + sh_flag_acc_ct1.get_pointer()); + }); + }); +} + + +template +void extract_diagonal(size_type diag_size, size_type nnz, + const ValueType *__restrict__ orig_values, + const IndexType *__restrict__ orig_row_ptrs, + const IndexType *__restrict__ orig_col_idxs, + ValueType *__restrict__ diag, sycl::nd_item<3> item_ct1) +{ + constexpr auto warp_size = config::warp_size; + const auto row = thread::get_subwarp_id_flat(item_ct1); + const auto local_tidx = item_ct1.get_local_id(2) % warp_size; + + if (row < diag_size) { + for (size_type i = local_tidx; + i < orig_row_ptrs[row + 1] - orig_row_ptrs[row]; i += warp_size) { + const auto orig_idx = i + orig_row_ptrs[row]; + if (orig_idx < nnz) { + if (orig_col_idxs[orig_idx] == row) { + diag[row] = orig_values[orig_idx]; + return; + } + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(extract_diagonal, extract_diagonal); + + +} // namespace kernel + + +template +void row_ptr_permute_kernel(size_type num_rows, + const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + IndexType *__restrict__ out_nnz, + sycl::nd_item<3> item_ct1) +{ + auto tid = thread::get_thread_id_flat(item_ct1); + if (tid >= num_rows) { + return; + } + auto in_row = permutation[tid]; + auto out_row = tid; + out_nnz[out_row] = in_row_ptrs[in_row + 1] - in_row_ptrs[in_row]; +} + +GKO_ENABLE_DEFAULT_HOST(row_ptr_permute_kernel, row_ptr_permute_kernel); + + +template +void inv_row_ptr_permute_kernel(size_type num_rows, + const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + IndexType *__restrict__ out_nnz, + sycl::nd_item<3> item_ct1) +{ + auto tid = thread::get_thread_id_flat(item_ct1); + if (tid >= num_rows) { + return; + } + auto in_row = tid; + auto out_row = permutation[tid]; + out_nnz[out_row] = in_row_ptrs[in_row + 1] - in_row_ptrs[in_row]; +} + +GKO_ENABLE_DEFAULT_HOST(inv_row_ptr_permute_kernel, inv_row_ptr_permute_kernel); + + +template +void row_permute_kernel(size_type num_rows, + const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + const IndexType *__restrict__ in_cols, + const ValueType *__restrict__ in_vals, + const IndexType *__restrict__ out_row_ptrs, + IndexType *__restrict__ out_cols, + ValueType *__restrict__ out_vals, + sycl::nd_item<3> item_ct1) +{ + auto tid = thread::get_subwarp_id_flat(item_ct1); + if (tid >= num_rows) { + return; + } + auto lane = item_ct1.get_local_id(2) % subgroup_size; + auto in_row = permutation[tid]; + auto out_row = tid; + auto in_begin = in_row_ptrs[in_row]; + auto in_size = in_row_ptrs[in_row + 1] - in_begin; + auto out_begin = out_row_ptrs[out_row]; + for (IndexType i = lane; i < in_size; i += subgroup_size) { + out_cols[out_begin + i] = in_cols[in_begin + i]; + out_vals[out_begin + i] = in_vals[in_begin + i]; + } +} + +template +void row_permute_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + const IndexType *permutation, + const IndexType *in_row_ptrs, const IndexType *in_cols, + const ValueType *in_vals, const IndexType *out_row_ptrs, + IndexType *out_cols, ValueType *out_vals) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + row_permute_kernel( + num_rows, permutation, in_row_ptrs, in_cols, in_vals, + out_row_ptrs, out_cols, out_vals, item_ct1); + }); + }); +} + + +template +void inv_row_permute_kernel(size_type num_rows, + const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + const IndexType *__restrict__ in_cols, + const ValueType *__restrict__ in_vals, + const IndexType *__restrict__ out_row_ptrs, + IndexType *__restrict__ out_cols, + ValueType *__restrict__ out_vals, + sycl::nd_item<3> item_ct1) +{ + auto tid = thread::get_subwarp_id_flat(item_ct1); + if (tid >= num_rows) { + return; + } + auto lane = item_ct1.get_local_id(2) % subgroup_size; + auto in_row = tid; + auto out_row = permutation[tid]; + auto in_begin = in_row_ptrs[in_row]; + auto in_size = in_row_ptrs[in_row + 1] - in_begin; + auto out_begin = out_row_ptrs[out_row]; + for (IndexType i = lane; i < in_size; i += subgroup_size) { + out_cols[out_begin + i] = in_cols[in_begin + i]; + out_vals[out_begin + i] = in_vals[in_begin + i]; + } +} + +template +void inv_row_permute_kernel(dim3 grid, dim3 block, + size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_rows, const IndexType *permutation, + const IndexType *in_row_ptrs, + const IndexType *in_cols, const ValueType *in_vals, + const IndexType *out_row_ptrs, IndexType *out_cols, + ValueType *out_vals) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + inv_row_permute_kernel( + num_rows, permutation, in_row_ptrs, in_cols, in_vals, + out_row_ptrs, out_cols, out_vals, item_ct1); + }); + }); +} + + +template +void inv_symm_permute_kernel(size_type num_rows, + const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + const IndexType *__restrict__ in_cols, + const ValueType *__restrict__ in_vals, + const IndexType *__restrict__ out_row_ptrs, + IndexType *__restrict__ out_cols, + ValueType *__restrict__ out_vals, + sycl::nd_item<3> item_ct1) +{ + auto tid = thread::get_subwarp_id_flat(item_ct1); + if (tid >= num_rows) { + return; + } + auto lane = item_ct1.get_local_id(2) % subgroup_size; + auto in_row = tid; + auto out_row = permutation[tid]; + auto in_begin = in_row_ptrs[in_row]; + auto in_size = in_row_ptrs[in_row + 1] - in_begin; + auto out_begin = out_row_ptrs[out_row]; + for (IndexType i = lane; i < in_size; i += subgroup_size) { + out_cols[out_begin + i] = permutation[in_cols[in_begin + i]]; + out_vals[out_begin + i] = in_vals[in_begin + i]; + } +} + +template +void inv_symm_permute_kernel(dim3 grid, dim3 block, + size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + const IndexType *permutation, + const IndexType *in_row_ptrs, + const IndexType *in_cols, const ValueType *in_vals, + const IndexType *out_row_ptrs, IndexType *out_cols, + ValueType *out_vals) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + inv_symm_permute_kernel( + num_rows, permutation, in_row_ptrs, in_cols, in_vals, + out_row_ptrs, out_cols, out_vals, item_ct1); + }); + }); +} + +namespace host_kernel { + + +template +void merge_path_spmv(syn::value_list, + std::shared_ptr exec, + const matrix::Csr *a, + const matrix::Dense *b, + matrix::Dense *c, + const matrix::Dense *alpha = nullptr, + const matrix::Dense *beta = nullptr) +{ + const IndexType total = a->get_size()[0] + a->get_num_stored_elements(); + const IndexType grid_num = + ceildiv(total, spmv_block_size * items_per_thread); + const dim3 grid(grid_num); + const dim3 block(spmv_block_size); + Array row_out(exec, grid_num); + Array val_out(exec, grid_num); + + for (IndexType column_id = 0; column_id < b->get_size()[1]; column_id++) { + if (alpha == nullptr && beta == nullptr) { + const auto b_vals = b->get_const_values() + column_id; + auto c_vals = c->get_values() + column_id; + kernel::abstract_merge_path_spmv( + grid, block, 0, exec->get_queue(), + static_cast(a->get_size()[0]), a->get_const_values(), + a->get_const_col_idxs(), a->get_const_row_ptrs(), + a->get_const_srow(), b_vals, b->get_stride(), c_vals, + c->get_stride(), row_out.get_data(), val_out.get_data()); + kernel::abstract_reduce(1, spmv_block_size, 0, exec->get_queue(), + grid_num, val_out.get_data(), + row_out.get_data(), c_vals, + c->get_stride()); + + } else if (alpha != nullptr && beta != nullptr) { + const auto b_vals = b->get_const_values() + column_id; + auto c_vals = c->get_values() + column_id; + kernel::abstract_merge_path_spmv( + grid, block, 0, exec->get_queue(), + static_cast(a->get_size()[0]), + alpha->get_const_values(), a->get_const_values(), + a->get_const_col_idxs(), a->get_const_row_ptrs(), + a->get_const_srow(), b_vals, b->get_stride(), + beta->get_const_values(), c_vals, c->get_stride(), + row_out.get_data(), val_out.get_data()); + kernel::abstract_reduce( + 1, spmv_block_size, 0, exec->get_queue(), grid_num, + val_out.get_data(), row_out.get_data(), + alpha->get_const_values(), c_vals, c->get_stride()); + } else { + GKO_KERNEL_NOT_FOUND; + } + } +} + +GKO_ENABLE_IMPLEMENTATION_SELECTION(select_merge_path_spmv, merge_path_spmv); + + +template +int compute_items_per_thread(std::shared_ptr exec) +{ + int num_item = 6; + // Ensure that the following is satisfied: + // sizeof(IndexType) + sizeof(ValueType) + // <= items_per_thread * sizeof(IndexType) + constexpr int minimal_num = + ceildiv(sizeof(IndexType) + sizeof(ValueType), sizeof(IndexType)); + int items_per_thread = num_item * 4 / sizeof(IndexType); + return std::max(minimal_num, items_per_thread); +} + + +template +void classical_spmv(syn::value_list, + std::shared_ptr exec, + const matrix::Csr *a, + const matrix::Dense *b, + matrix::Dense *c, + const matrix::Dense *alpha = nullptr, + const matrix::Dense *beta = nullptr) +{ + constexpr int threads_per_cu = 7; + const auto num_subgroup = + exec->get_num_computing_units() * threads_per_cu * classical_overweight; + const auto nsg_in_group = spmv_block_size / subgroup_size; + const auto gridx = + std::min(ceildiv(a->get_size()[0], spmv_block_size / subgroup_size), + int64(num_subgroup / nsg_in_group)); + const dim3 grid(gridx, b->get_size()[1]); + const dim3 block(spmv_block_size); + + if (alpha == nullptr && beta == nullptr) { + kernel::abstract_classical_spmv( + grid, block, 0, exec->get_queue(), a->get_size()[0], + a->get_const_values(), a->get_const_col_idxs(), + a->get_const_row_ptrs(), b->get_const_values(), b->get_stride(), + c->get_values(), c->get_stride()); + } else if (alpha != nullptr && beta != nullptr) { + kernel::abstract_classical_spmv( + grid, block, 0, exec->get_queue(), a->get_size()[0], + alpha->get_const_values(), a->get_const_values(), + a->get_const_col_idxs(), a->get_const_row_ptrs(), + b->get_const_values(), b->get_stride(), beta->get_const_values(), + c->get_values(), c->get_stride()); + } else { + GKO_KERNEL_NOT_FOUND; + } +} + +GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); + + +} // namespace host_kernel + + +template +void spmv(std::shared_ptr exec, + const matrix::Csr *a, + const matrix::Dense *b, matrix::Dense *c) +{ + if (a->get_strategy()->get_name() == "load_balance") { + components::fill_array(exec, c->get_values(), + c->get_num_stored_elements(), zero()); + const IndexType nwarps = a->get_num_srow_elements(); + if (nwarps > 0) { + const dim3 csr_block(config::warp_size, warps_in_block, 1); + const dim3 csr_grid(ceildiv(nwarps, warps_in_block), + b->get_size()[1]); + kernel::abstract_spmv( + csr_grid, csr_block, 0, exec->get_queue(), nwarps, + static_cast(a->get_size()[0]), a->get_const_values(), + a->get_const_col_idxs(), a->get_const_row_ptrs(), + a->get_const_srow(), b->get_const_values(), b->get_stride(), + c->get_values(), c->get_stride()); + } else { + GKO_NOT_SUPPORTED(nwarps); + } + } else if (a->get_strategy()->get_name() == "merge_path") { + int items_per_thread = + host_kernel::compute_items_per_thread(exec); + host_kernel::select_merge_path_spmv( + compiled_kernels(), + [&items_per_thread](int compiled_info) { + return items_per_thread == compiled_info; + }, + syn::value_list(), syn::type_list<>(), exec, a, b, c); + } else if (a->get_strategy()->get_name() == "classical") { + IndexType max_length_per_row = 0; + using Tcsr = matrix::Csr; + if (auto strategy = + std::dynamic_pointer_cast( + a->get_strategy())) { + max_length_per_row = strategy->get_max_length_per_row(); + } else if (auto strategy = std::dynamic_pointer_cast< + const typename Tcsr::automatical>(a->get_strategy())) { + max_length_per_row = strategy->get_max_length_per_row(); + } else { + GKO_NOT_SUPPORTED(a->get_strategy()); + } + host_kernel::select_classical_spmv( + classical_kernels(), + [&max_length_per_row](int compiled_info) { + return max_length_per_row >= compiled_info; + }, + syn::value_list(), syn::type_list<>(), exec, a, b, c); + } else if (a->get_strategy()->get_name() == "sparselib" || + a->get_strategy()->get_name() == "cusparse") { + if (!is_complex()) { + oneapi::mkl::sparse::matrix_handle_t mat_handle; + oneapi::mkl::sparse::init_matrix_handle(&mat_handle); + oneapi::mkl::sparse::set_csr_data( + mat_handle, IndexType(a->get_size()[0]), + IndexType(a->get_size()[1]), oneapi::mkl::index_base::zero, + const_cast(a->get_const_row_ptrs()), + const_cast(a->get_const_col_idxs()), + const_cast(a->get_const_values())); + if (b->get_size()[1] == 1 && b->get_stride() == 1) { + oneapi::mkl::sparse::gemv( + *exec->get_queue(), oneapi::mkl::transpose::nontrans, + one(), mat_handle, + const_cast(b->get_const_values()), + zero(), c->get_values()); + } else { + oneapi::mkl::sparse::gemm( + *exec->get_queue(), oneapi::mkl::transpose::nontrans, + one(), mat_handle, + const_cast(b->get_const_values()), + b->get_size()[1], b->get_stride(), zero(), + c->get_values(), c->get_stride()); + } + oneapi::mkl::sparse::release_matrix_handle(&mat_handle); + } else { + GKO_NOT_IMPLEMENTED; + } + } else { + GKO_NOT_IMPLEMENTED; + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Csr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) +{ + if (a->get_strategy()->get_name() == "load_balance") { + dense::scale(exec, beta, c); + + const IndexType nwarps = a->get_num_srow_elements(); + + if (nwarps > 0) { + const dim3 csr_block(config::warp_size, warps_in_block, 1); + const dim3 csr_grid(ceildiv(nwarps, warps_in_block), + b->get_size()[1]); + kernel::abstract_spmv( + csr_grid, csr_block, 0, exec->get_queue(), nwarps, + static_cast(a->get_size()[0]), + alpha->get_const_values(), a->get_const_values(), + a->get_const_col_idxs(), a->get_const_row_ptrs(), + a->get_const_srow(), b->get_const_values(), b->get_stride(), + c->get_values(), c->get_stride()); + } else { + GKO_NOT_SUPPORTED(nwarps); + } + } else if (a->get_strategy()->get_name() == "sparselib" || + a->get_strategy()->get_name() == "cusparse") { + if (!is_complex()) { + oneapi::mkl::sparse::matrix_handle_t mat_handle; + oneapi::mkl::sparse::init_matrix_handle(&mat_handle); + oneapi::mkl::sparse::set_csr_data( + mat_handle, IndexType(a->get_size()[0]), + IndexType(a->get_size()[1]), oneapi::mkl::index_base::zero, + const_cast(a->get_const_row_ptrs()), + const_cast(a->get_const_col_idxs()), + const_cast(a->get_const_values())); + if (b->get_size()[1] == 1 && b->get_stride() == 1) { + oneapi::mkl::sparse::gemv( + *exec->get_queue(), oneapi::mkl::transpose::nontrans, + exec->copy_val_to_host(alpha->get_const_values()), + mat_handle, const_cast(b->get_const_values()), + exec->copy_val_to_host(beta->get_const_values()), + c->get_values()); + } else { + oneapi::mkl::sparse::gemm( + *exec->get_queue(), oneapi::mkl::transpose::nontrans, + exec->copy_val_to_host(alpha->get_const_values()), + mat_handle, const_cast(b->get_const_values()), + b->get_size()[1], b->get_stride(), + exec->copy_val_to_host(beta->get_const_values()), + c->get_values(), c->get_stride()); + } + oneapi::mkl::sparse::release_matrix_handle(&mat_handle); + } else { + GKO_NOT_IMPLEMENTED; + } + } else if (a->get_strategy()->get_name() == "classical") { + IndexType max_length_per_row = 0; + using Tcsr = matrix::Csr; + if (auto strategy = + std::dynamic_pointer_cast( + a->get_strategy())) { + max_length_per_row = strategy->get_max_length_per_row(); + } else if (auto strategy = std::dynamic_pointer_cast< + const typename Tcsr::automatical>(a->get_strategy())) { + max_length_per_row = strategy->get_max_length_per_row(); + } else { + GKO_NOT_SUPPORTED(a->get_strategy()); + } + host_kernel::select_classical_spmv( + classical_kernels(), + [&max_length_per_row](int compiled_info) { + return max_length_per_row >= compiled_info; + }, + syn::value_list(), syn::type_list<>(), exec, a, b, c, alpha, + beta); + } else if (a->get_strategy()->get_name() == "merge_path") { + int items_per_thread = + host_kernel::compute_items_per_thread(exec); + host_kernel::select_merge_path_spmv( + compiled_kernels(), + [&items_per_thread](int compiled_info) { + return items_per_thread == compiled_info; + }, + syn::value_list(), syn::type_list<>(), exec, a, b, c, alpha, + beta); + } else { + GKO_NOT_IMPLEMENTED; + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL); + + +namespace { + + +/** + * @internal + * + * Entry in a heap storing a column index and associated non-zero index + * (and row end) from a matrix. + * + * @tparam ValueType The value type for matrices. + * @tparam IndexType The index type for matrices. + */ +template +struct col_heap_element { + using value_type = ValueType; + using index_type = IndexType; + + IndexType idx; + IndexType end; + IndexType col; + + ValueType val() const { return zero(); } + + col_heap_element(IndexType idx, IndexType end, IndexType col, ValueType) + : idx{idx}, end{end}, col{col} + {} +}; + + +/** + * @internal + * + * Entry in a heap storing an entry (value and column index) and associated + * non-zero index (and row end) from a matrix. + * + * @tparam ValueType The value type for matrices. + * @tparam IndexType The index type for matrices. + */ +template +struct val_heap_element { + using value_type = ValueType; + using index_type = IndexType; + + IndexType idx; + IndexType end; + IndexType col; + ValueType val_; + + ValueType val() const { return val_; } + + val_heap_element(IndexType idx, IndexType end, IndexType col, ValueType val) + : idx{idx}, end{end}, col{col}, val_{val} + {} +}; + + +/** + * @internal + * + * Restores the binary heap condition downwards from a given index. + * + * The heap condition is: col(child) >= col(parent) + * + * @param heap a pointer to the array containing the heap elements. + * @param idx the index of the starting heap node that potentially + * violates the heap condition. + * @param size the number of elements in the heap. + * @tparam HeapElement the element type in the heap. See col_heap_element and + * val_heap_element + */ +template +void sift_down(HeapElement *heap, typename HeapElement::index_type idx, + typename HeapElement::index_type size) +{ + auto curcol = heap[idx].col; + while (idx * 2 + 1 < size) { + auto lchild = idx * 2 + 1; + auto rchild = min(lchild + 1, size - 1); + auto lcol = heap[lchild].col; + auto rcol = heap[rchild].col; + auto mincol = min(lcol, rcol); + if (mincol >= curcol) { + break; + } + auto minchild = lcol == mincol ? lchild : rchild; + std::swap(heap[minchild], heap[idx]); + idx = minchild; + } +} + + +/** + * @internal + * + * Generic SpGEMM implementation for a single output row of A * B using binary + * heap-based multiway merging. + * + * @param row The row for which to compute the SpGEMM + * @param a The input matrix A + * @param b The input matrix B (its column indices must be sorted within each + * row!) + * @param heap The heap to use for this implementation. It must have as many + * entries as the input row has non-zeros. + * @param init_cb function to initialize the state for a single row. Its return + * value will be updated by subsequent calls of other callbacks, + * and then returned by this function. Its signature must be + * compatible with `return_type state = init_cb(row)`. + * @param step_cb function that will be called for each accumulation from an + * entry of B into the output state. Its signature must be + * compatible with `step_cb(value, column, state)`. + * @param col_cb function that will be called once for each output column after + * all accumulations into it are completed. Its signature must be + * compatible with `col_cb(column, state)`. + * @return the value initialized by init_cb and updated by step_cb and col_cb + * @note If the columns of B are not sorted, the output may have duplicate + * column entries. + * + * @tparam HeapElement the heap element type. See col_heap_element and + * val_heap_element + * @tparam InitCallback functor type for init_cb + * @tparam StepCallback functor type for step_cb + * @tparam ColCallback functor type for col_cb + */ +template +auto spgemm_multiway_merge(size_type row, + const typename HeapElement::index_type *a_row_ptrs, + const typename HeapElement::index_type *a_cols, + const typename HeapElement::value_type *a_vals, + const typename HeapElement::index_type *b_row_ptrs, + const typename HeapElement::index_type *b_cols, + const typename HeapElement::value_type *b_vals, + HeapElement *heap, InitCallback init_cb, + StepCallback step_cb, ColCallback col_cb) + -> decltype(init_cb(0)) +{ + auto a_begin = a_row_ptrs[row]; + auto a_end = a_row_ptrs[row + 1]; + + using index_type = typename HeapElement::index_type; + constexpr auto sentinel = std::numeric_limits::max(); + + auto state = init_cb(row); + + // initialize the heap + for (auto a_nz = a_begin; a_nz < a_end; ++a_nz) { + auto b_row = a_cols[a_nz]; + auto b_begin = b_row_ptrs[b_row]; + auto b_end = b_row_ptrs[b_row + 1]; + heap[a_nz] = {b_begin, b_end, + checked_load(b_cols, b_begin, b_end, sentinel), + a_vals[a_nz]}; + } + + if (a_begin != a_end) { + // make heap: + auto a_size = a_end - a_begin; + for (auto i = (a_size - 2) / 2; i >= 0; --i) { + sift_down(heap + a_begin, i, a_size); + } + auto &top = heap[a_begin]; + auto &bot = heap[a_end - 1]; + auto col = top.col; + + while (top.col != sentinel) { + step_cb(b_vals[top.idx] * top.val(), top.col, state); + // move to the next element + top.idx++; + top.col = checked_load(b_cols, top.idx, top.end, sentinel); + // restore heap property + // pop_heap swaps top and bot, we need to prevent that + // so that we do a simple sift_down instead + sift_down(heap + a_begin, index_type{}, a_size); + if (top.col != col) { + col_cb(col, state); + } + col = top.col; + } + } + + return state; +} + + +} // namespace + + +template +void spgemm(std::shared_ptr exec, + const matrix::Csr *a, + const matrix::Csr *b, + matrix::Csr *c) +{ + auto num_rows = a->get_size()[0]; + const auto a_row_ptrs = a->get_const_row_ptrs(); + const auto a_cols = a->get_const_col_idxs(); + const auto a_vals = a->get_const_values(); + const auto b_row_ptrs = b->get_const_row_ptrs(); + const auto b_cols = b->get_const_col_idxs(); + const auto b_vals = b->get_const_values(); + auto c_row_ptrs = c->get_row_ptrs(); + auto queue = exec->get_queue(); + + Array> heap_array( + exec, a->get_num_stored_elements()); + + auto heap = heap_array.get_data(); + auto col_heap = + reinterpret_cast *>(heap); + + // first sweep: count nnz for each row + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { + const auto a_row = static_cast(idx[0]); + c_row_ptrs[a_row] = spgemm_multiway_merge( + a_row, a_row_ptrs, a_cols, a_vals, b_row_ptrs, b_cols, b_vals, + col_heap, [](size_type) { return IndexType{}; }, + [](ValueType, IndexType, IndexType &) {}, + [](IndexType, IndexType &nnz) { nnz++; }); + }); + }); + + // build row pointers + components::prefix_sum(exec, c_row_ptrs, num_rows + 1); + + // second sweep: accumulate non-zeros + const auto new_nnz = exec->copy_val_to_host(c_row_ptrs + num_rows); + matrix::CsrBuilder c_builder{c}; + auto &c_col_idxs_array = c_builder.get_col_idx_array(); + auto &c_vals_array = c_builder.get_value_array(); + c_col_idxs_array.resize_and_reset(new_nnz); + c_vals_array.resize_and_reset(new_nnz); + auto c_col_idxs = c_col_idxs_array.get_data(); + auto c_vals = c_vals_array.get_data(); + + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { + const auto a_row = static_cast(idx[0]); + spgemm_multiway_merge( + a_row, a_row_ptrs, a_cols, a_vals, b_row_ptrs, b_cols, b_vals, + heap, + [&](size_type row) { + return std::make_pair(zero(), c_row_ptrs[row]); + }, + [](ValueType val, IndexType, + std::pair &state) { + state.first += val; + }, + [&](IndexType col, std::pair &state) { + c_col_idxs[state.second] = col; + c_vals[state.second] = state.first; + state.first = zero(); + state.second++; + }); + }); + }); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); + + +template +void advanced_spgemm(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Csr *a, + const matrix::Csr *b, + const matrix::Dense *beta, + const matrix::Csr *d, + matrix::Csr *c) +{ + auto num_rows = a->get_size()[0]; + const auto a_row_ptrs = a->get_const_row_ptrs(); + const auto a_cols = a->get_const_col_idxs(); + const auto a_vals = a->get_const_values(); + const auto b_row_ptrs = b->get_const_row_ptrs(); + const auto b_cols = b->get_const_col_idxs(); + const auto b_vals = b->get_const_values(); + const auto d_row_ptrs = d->get_const_row_ptrs(); + const auto d_cols = d->get_const_col_idxs(); + const auto d_vals = d->get_const_values(); + auto c_row_ptrs = c->get_row_ptrs(); + const auto alpha_vals = alpha->get_const_values(); + const auto beta_vals = beta->get_const_values(); + constexpr auto sentinel = std::numeric_limits::max(); + auto queue = exec->get_queue(); + + // first sweep: count nnz for each row + + Array> heap_array( + exec, a->get_num_stored_elements()); + + auto heap = heap_array.get_data(); + auto col_heap = + reinterpret_cast *>(heap); + + // first sweep: count nnz for each row + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { + const auto a_row = static_cast(idx[0]); + auto d_nz = d_row_ptrs[a_row]; + const auto d_end = d_row_ptrs[a_row + 1]; + auto d_col = checked_load(d_cols, d_nz, d_end, sentinel); + c_row_ptrs[a_row] = spgemm_multiway_merge( + a_row, a_row_ptrs, a_cols, a_vals, b_row_ptrs, b_cols, b_vals, + col_heap, [](size_type row) { return IndexType{}; }, + [](ValueType, IndexType, IndexType &) {}, + [&](IndexType col, IndexType &nnz) { + // skip smaller elements from d + while (d_col <= col) { + d_nz++; + nnz += d_col != col; + d_col = checked_load(d_cols, d_nz, d_end, sentinel); + } + nnz++; + }); + // handle the remaining columns from d + c_row_ptrs[a_row] += d_end - d_nz; + }); + }); + + // build row pointers + components::prefix_sum(exec, c_row_ptrs, num_rows + 1); + + // second sweep: accumulate non-zeros + const auto new_nnz = exec->copy_val_to_host(c_row_ptrs + num_rows); + matrix::CsrBuilder c_builder{c}; + auto &c_col_idxs_array = c_builder.get_col_idx_array(); + auto &c_vals_array = c_builder.get_value_array(); + c_col_idxs_array.resize_and_reset(new_nnz); + c_vals_array.resize_and_reset(new_nnz); + + auto c_col_idxs = c_col_idxs_array.get_data(); + auto c_vals = c_vals_array.get_data(); + + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { + const auto a_row = static_cast(idx[0]); + auto d_nz = d_row_ptrs[a_row]; + const auto d_end = d_row_ptrs[a_row + 1]; + auto d_col = checked_load(d_cols, d_nz, d_end, sentinel); + auto d_val = checked_load(d_vals, d_nz, d_end, zero()); + const auto valpha = alpha_vals[0]; + const auto vbeta = beta_vals[0]; + auto c_nz = + spgemm_multiway_merge( + a_row, a_row_ptrs, a_cols, a_vals, b_row_ptrs, b_cols, + b_vals, heap, + [&](size_type row) { + return std::make_pair(zero(), + c_row_ptrs[row]); + }, + [](ValueType val, IndexType, + std::pair &state) { + state.first += val; + }, + [&](IndexType col, std::pair &state) { + // handle smaller elements from d + ValueType part_d_val{}; + while (d_col <= col) { + if (d_col == col) { + part_d_val = d_val; + } else { + c_col_idxs[state.second] = d_col; + c_vals[state.second] = vbeta * d_val; + state.second++; + } + d_nz++; + d_col = checked_load(d_cols, d_nz, d_end, sentinel); + d_val = checked_load(d_vals, d_nz, d_end, + zero()); + } + c_col_idxs[state.second] = col; + c_vals[state.second] = + vbeta * part_d_val + valpha * state.first; + state.first = zero(); + state.second++; + }) + .second; + // handle remaining elements from d + while (d_col < sentinel) { + c_col_idxs[c_nz] = d_col; + c_vals[c_nz] = vbeta * d_val; + c_nz++; + d_nz++; + d_col = checked_load(d_cols, d_nz, d_end, sentinel); + d_val = checked_load(d_vals, d_nz, d_end, zero()); + } + }); + }); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL); + + +template +void spgeam(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Csr *a, + const matrix::Dense *beta, + const matrix::Csr *b, + matrix::Csr *c) +{ + constexpr auto sentinel = std::numeric_limits::max(); + const auto num_rows = a->get_size()[0]; + const auto a_row_ptrs = a->get_const_row_ptrs(); + const auto a_cols = a->get_const_col_idxs(); + const auto b_row_ptrs = b->get_const_row_ptrs(); + const auto b_cols = b->get_const_col_idxs(); + auto c_row_ptrs = c->get_row_ptrs(); + auto queue = exec->get_queue(); + + // count number of non-zeros per row + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { + const auto row = static_cast(idx[0]); + auto a_idx = a_row_ptrs[row]; + const auto a_end = a_row_ptrs[row + 1]; + auto b_idx = b_row_ptrs[row]; + const auto b_end = b_row_ptrs[row + 1]; + IndexType row_nnz{}; + while (a_idx < a_end || b_idx < b_end) { + const auto a_col = checked_load(a_cols, a_idx, a_end, sentinel); + const auto b_col = checked_load(b_cols, b_idx, b_end, sentinel); + row_nnz++; + a_idx += (a_col <= b_col) ? 1 : 0; + b_idx += (b_col <= a_col) ? 1 : 0; + } + c_row_ptrs[row] = row_nnz; + }); + }); + + components::prefix_sum(exec, c_row_ptrs, num_rows + 1); + + // second sweep: accumulate non-zeros + const auto new_nnz = exec->copy_val_to_host(c_row_ptrs + num_rows); + matrix::CsrBuilder c_builder{c}; + auto &c_col_idxs_array = c_builder.get_col_idx_array(); + auto &c_vals_array = c_builder.get_value_array(); + c_col_idxs_array.resize_and_reset(new_nnz); + c_vals_array.resize_and_reset(new_nnz); + auto c_cols = c_col_idxs_array.get_data(); + auto c_vals = c_vals_array.get_data(); + + const auto a_vals = a->get_const_values(); + const auto b_vals = b->get_const_values(); + const auto alpha_vals = alpha->get_const_values(); + const auto beta_vals = beta->get_const_values(); + + // count number of non-zeros per row + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { + const auto row = static_cast(idx[0]); + auto a_idx = a_row_ptrs[row]; + const auto a_end = a_row_ptrs[row + 1]; + auto b_idx = b_row_ptrs[row]; + const auto b_end = b_row_ptrs[row + 1]; + const auto alpha = alpha_vals[0]; + const auto beta = beta_vals[0]; + auto c_nz = c_row_ptrs[row]; + while (a_idx < a_end || b_idx < b_end) { + const auto a_col = checked_load(a_cols, a_idx, a_end, sentinel); + const auto b_col = checked_load(b_cols, b_idx, b_end, sentinel); + const bool use_a = a_col <= b_col; + const bool use_b = b_col <= a_col; + const auto a_val = use_a ? a_vals[a_idx] : zero(); + const auto b_val = use_b ? b_vals[b_idx] : zero(); + c_cols[c_nz] = std::min(a_col, b_col); + c_vals[c_nz] = alpha * a_val + beta * b_val; + c_nz++; + a_idx += use_a ? 1 : 0; + b_idx += use_b ? 1 : 0; + } + }); + }); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); + + +template +void convert_row_ptrs_to_idxs(std::shared_ptr exec, + const IndexType *ptrs, size_type num_rows, + IndexType *idxs) +{ + const auto grid_dim = ceildiv(num_rows, default_block_size); + + kernel::convert_row_ptrs_to_idxs(grid_dim, default_block_size, 0, + exec->get_queue(), num_rows, ptrs, idxs); +} + + +template +void convert_to_coo(std::shared_ptr exec, + const matrix::Csr *source, + matrix::Coo *result) +{ + auto num_rows = result->get_size()[0]; + + auto row_idxs = result->get_row_idxs(); + const auto source_row_ptrs = source->get_const_row_ptrs(); + + convert_row_ptrs_to_idxs(exec, source_row_ptrs, num_rows, row_idxs); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CONVERT_TO_COO_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Csr *source, + matrix::Dense *result) +{ + const auto num_rows = result->get_size()[0]; + const auto num_cols = result->get_size()[1]; + const auto stride = result->get_stride(); + const auto row_ptrs = source->get_const_row_ptrs(); + const auto col_idxs = source->get_const_col_idxs(); + const auto vals = source->get_const_values(); + + const dim3 block_size(config::warp_size, + config::max_block_size / config::warp_size, 1); + const dim3 init_grid_dim(ceildiv(num_cols, block_size.x), + ceildiv(num_rows, block_size.y), 1); + kernel::initialize_zero_dense(init_grid_dim, block_size, 0, + exec->get_queue(), num_rows, num_cols, stride, + result->get_values()); + + auto grid_dim = ceildiv(num_rows, default_block_size); + kernel::fill_in_dense(grid_dim, default_block_size, 0, exec->get_queue(), + num_rows, row_ptrs, col_idxs, vals, stride, + result->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_sellp(std::shared_ptr exec, + const matrix::Csr *source, + matrix::Sellp *result) +{ + const auto num_rows = result->get_size()[0]; + const auto num_cols = result->get_size()[1]; + + auto result_values = result->get_values(); + auto result_col_idxs = result->get_col_idxs(); + auto slice_lengths = result->get_slice_lengths(); + auto slice_sets = result->get_slice_sets(); + + const auto slice_size = (result->get_slice_size() == 0) + ? matrix::default_slice_size + : result->get_slice_size(); + const auto stride_factor = (result->get_stride_factor() == 0) + ? matrix::default_stride_factor + : result->get_stride_factor(); + const int slice_num = ceildiv(num_rows, slice_size); + + const auto source_values = source->get_const_values(); + const auto source_row_ptrs = source->get_const_row_ptrs(); + const auto source_col_idxs = source->get_const_col_idxs(); + + auto nnz_per_row = Array(exec, num_rows); + auto grid_dim = ceildiv(num_rows, default_block_size); + + if (grid_dim > 0) { + kernel::calculate_nnz_per_row(grid_dim, default_block_size, 0, + exec->get_queue(), num_rows, + source_row_ptrs, nnz_per_row.get_data()); + } + + grid_dim = slice_num; + + if (grid_dim > 0) { + kernel::calculate_slice_lengths( + grid_dim, config::warp_size, 0, exec->get_queue(), num_rows, + slice_size, stride_factor, nnz_per_row.get_const_data(), + slice_lengths, slice_sets); + } + + components::prefix_sum(exec, slice_sets, slice_num + 1); + + grid_dim = ceildiv(num_rows, default_block_size); + if (grid_dim > 0) { + kernel::fill_in_sellp( + grid_dim, default_block_size, 0, exec->get_queue(), num_rows, + slice_size, source_values, source_row_ptrs, source_col_idxs, + slice_lengths, slice_sets, result_col_idxs, result_values); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CONVERT_TO_SELLP_KERNEL); + + +template +void convert_to_ell(std::shared_ptr exec, + const matrix::Csr *source, + matrix::Ell *result) +{ + const auto source_values = source->get_const_values(); + const auto source_row_ptrs = source->get_const_row_ptrs(); + const auto source_col_idxs = source->get_const_col_idxs(); + + auto result_values = result->get_values(); + auto result_col_idxs = result->get_col_idxs(); + const auto stride = result->get_stride(); + const auto max_nnz_per_row = result->get_num_stored_elements_per_row(); + const auto num_rows = result->get_size()[0]; + const auto num_cols = result->get_size()[1]; + + const auto init_grid_dim = + ceildiv(max_nnz_per_row * num_rows, default_block_size); + + kernel::initialize_zero_ell(init_grid_dim, default_block_size, 0, + exec->get_queue(), max_nnz_per_row, stride, + result_values, result_col_idxs); + + const auto grid_dim = + ceildiv(num_rows * config::warp_size, default_block_size); + + kernel::fill_in_ell(grid_dim, default_block_size, 0, exec->get_queue(), + num_rows, stride, source_values, source_row_ptrs, + source_col_idxs, result_values, result_col_idxs); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CONVERT_TO_ELL_KERNEL); + + +template +void calculate_total_cols(std::shared_ptr exec, + const matrix::Csr *source, + size_type *result, size_type stride_factor, + size_type slice_size) +{ + const auto num_rows = source->get_size()[0]; + + if (num_rows == 0) { + *result = 0; + return; + } + + const auto slice_num = ceildiv(num_rows, slice_size); + const auto row_ptrs = source->get_const_row_ptrs(); + + auto nnz_per_row = Array(exec, num_rows); + auto grid_dim = ceildiv(num_rows, default_block_size); + + kernel::calculate_nnz_per_row(grid_dim, default_block_size, 0, + exec->get_queue(), num_rows, row_ptrs, + nnz_per_row.get_data()); + + grid_dim = ceildiv(slice_num * config::warp_size, default_block_size); + auto max_nnz_per_slice = Array(exec, slice_num); + + kernel::reduce_max_nnz_per_slice( + grid_dim, default_block_size, 0, exec->get_queue(), num_rows, + slice_size, stride_factor, nnz_per_row.get_const_data(), + max_nnz_per_slice.get_data()); + + grid_dim = ceildiv(slice_num, default_block_size); + auto block_results = Array(exec, grid_dim); + + kernel::reduce_total_cols( + grid_dim, default_block_size, 0, exec->get_queue(), slice_num, + max_nnz_per_slice.get_const_data(), block_results.get_data()); + + auto d_result = Array(exec, 1); + + kernel::reduce_total_cols(1, default_block_size, 0, exec->get_queue(), + grid_dim, block_results.get_const_data(), + d_result.get_data()); + + *result = exec->copy_val_to_host(d_result.get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALCULATE_TOTAL_COLS_KERNEL); + + +template +void generic_transpose(std::shared_ptr exec, + const matrix::Csr *orig, + matrix::Csr *trans) +{ + const auto num_rows = orig->get_size()[0]; + const auto num_cols = orig->get_size()[1]; + auto queue = exec->get_queue(); + const auto row_ptrs = orig->get_const_row_ptrs(); + const auto cols = orig->get_const_col_idxs(); + const auto vals = orig->get_const_values(); + + Array counts{exec, num_cols + 1}; + auto tmp_counts = counts.get_data(); + auto out_row_ptrs = trans->get_row_ptrs(); + auto out_cols = trans->get_col_idxs(); + auto out_vals = trans->get_values(); + components::fill_array(exec, tmp_counts, num_cols, IndexType{}); + + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { + const auto row = static_cast(idx[0]); + const auto begin = row_ptrs[row]; + const auto end = row_ptrs[row + 1]; + for (auto i = begin; i < end; i++) { + atomic_fetch_add(tmp_counts + cols[i], IndexType{1}); + } + }); + }); + + components::prefix_sum(exec, tmp_counts, num_cols + 1); + exec->copy(num_cols + 1, tmp_counts, out_row_ptrs); + + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { + const auto row = static_cast(idx[0]); + const auto begin = row_ptrs[row]; + const auto end = row_ptrs[row + 1]; + for (auto i = begin; i < end; i++) { + auto out_nz = + atomic_fetch_add(tmp_counts + cols[i], IndexType{1}); + out_cols[out_nz] = row; + out_vals[out_nz] = conjugate ? conj(vals[i]) : vals[i]; + } + }); + }); + + sort_by_column_index(exec, trans); +} + + +template +void transpose(std::shared_ptr exec, + const matrix::Csr *orig, + matrix::Csr *trans) +{ + generic_transpose(exec, orig, trans); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Csr *orig, + matrix::Csr *trans) +{ + generic_transpose(exec, orig, trans); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL); + + +template +void inv_symm_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *permuted) +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + inv_row_ptr_permute_kernel( + count_num_blocks, default_block_size, 0, exec->get_queue(), num_rows, + perm, orig->get_const_row_ptrs(), permuted->get_row_ptrs()); + components::prefix_sum(exec, permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + inv_symm_permute_kernel( + copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows, + perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(), + orig->get_const_values(), permuted->get_row_ptrs(), + permuted->get_col_idxs(), permuted->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); + + +template +void row_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *row_permuted) +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + row_ptr_permute_kernel( + count_num_blocks, default_block_size, 0, exec->get_queue(), num_rows, + perm, orig->get_const_row_ptrs(), row_permuted->get_row_ptrs()); + components::prefix_sum(exec, row_permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + row_permute_kernel( + copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows, + perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(), + orig->get_const_values(), row_permuted->get_row_ptrs(), + row_permuted->get_col_idxs(), row_permuted->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); + + +template +void inverse_row_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *row_permuted) +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + inv_row_ptr_permute_kernel( + count_num_blocks, default_block_size, 0, exec->get_queue(), num_rows, + perm, orig->get_const_row_ptrs(), row_permuted->get_row_ptrs()); + components::prefix_sum(exec, row_permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + inv_row_permute_kernel( + copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows, + perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(), + orig->get_const_values(), row_permuted->get_row_ptrs(), + row_permuted->get_col_idxs(), row_permuted->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); + + +template +void calculate_max_nnz_per_row(std::shared_ptr exec, + const matrix::Csr *source, + size_type *result) +{ + const auto num_rows = source->get_size()[0]; + + auto nnz_per_row = Array(exec, num_rows); + auto block_results = Array(exec, default_block_size); + auto d_result = Array(exec, 1); + + const auto grid_dim = ceildiv(num_rows, default_block_size); + kernel::calculate_nnz_per_row( + grid_dim, default_block_size, 0, exec->get_queue(), num_rows, + source->get_const_row_ptrs(), nnz_per_row.get_data()); + + const auto n = ceildiv(num_rows, default_block_size); + const auto reduce_dim = n <= default_block_size ? n : default_block_size; + kernel::reduce_max_nnz(reduce_dim, default_block_size, 0, exec->get_queue(), + num_rows, nnz_per_row.get_const_data(), + block_results.get_data()); + + kernel::reduce_max_nnz(1, default_block_size, 0, exec->get_queue(), + reduce_dim, block_results.get_const_data(), + d_result.get_data()); + + *result = exec->copy_val_to_host(d_result.get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void convert_to_hybrid(std::shared_ptr exec, + const matrix::Csr *source, + matrix::Hybrid *result) +{ + auto ell_val = result->get_ell_values(); + auto ell_col = result->get_ell_col_idxs(); + auto coo_val = result->get_coo_values(); + auto coo_col = result->get_coo_col_idxs(); + auto coo_row = result->get_coo_row_idxs(); + const auto stride = result->get_ell_stride(); + const auto max_nnz_per_row = result->get_ell_num_stored_elements_per_row(); + const auto num_rows = result->get_size()[0]; + const auto coo_num_stored_elements = result->get_coo_num_stored_elements(); + auto grid_dim = ceildiv(max_nnz_per_row * num_rows, default_block_size); + + kernel::initialize_zero_ell(grid_dim, default_block_size, 0, + exec->get_queue(), max_nnz_per_row, stride, + ell_val, ell_col); + + grid_dim = ceildiv(num_rows, default_block_size); + auto coo_offset = Array(exec, num_rows); + kernel::calculate_hybrid_coo_row_nnz( + grid_dim, default_block_size, 0, exec->get_queue(), num_rows, + max_nnz_per_row, source->get_const_row_ptrs(), coo_offset.get_data()); + + components::prefix_sum(exec, coo_offset.get_data(), num_rows); + + grid_dim = ceildiv(num_rows * config::warp_size, default_block_size); + kernel::fill_in_hybrid( + grid_dim, default_block_size, 0, exec->get_queue(), num_rows, stride, + max_nnz_per_row, source->get_const_values(), + source->get_const_row_ptrs(), source->get_const_col_idxs(), + coo_offset.get_const_data(), ell_val, ell_col, coo_val, coo_col, + coo_row); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CONVERT_TO_HYBRID_KERNEL); + + +template +void calculate_nonzeros_per_row(std::shared_ptr exec, + const matrix::Csr *source, + Array *result) +{ + const auto num_rows = source->get_size()[0]; + auto row_ptrs = source->get_const_row_ptrs(); + auto grid_dim = ceildiv(num_rows, default_block_size); + + kernel::calculate_nnz_per_row(grid_dim, default_block_size, 0, + exec->get_queue(), num_rows, row_ptrs, + result->get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void sort_by_column_index(std::shared_ptr exec, + matrix::Csr *to_sort) +{ + const auto num_rows = to_sort->get_size()[0]; + const auto row_ptrs = to_sort->get_const_row_ptrs(); + auto cols = to_sort->get_col_idxs(); + auto vals = to_sort->get_values(); + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { + const auto row = static_cast(idx[0]); + const auto begin = row_ptrs[row]; + auto size = row_ptrs[row + 1] - begin; + if (size <= 1) { + return; + } + auto swap = [&](IndexType i, IndexType j) { + std::swap(cols[i + begin], cols[j + begin]); + std::swap(vals[i + begin], vals[j + begin]); + }; + auto lchild = [](IndexType i) { return 2 * i + 1; }; + auto rchild = [](IndexType i) { return 2 * i + 2; }; + auto parent = [](IndexType i) { return (i - 1) / 2; }; + auto sift_down = [&](IndexType i) { + const auto col = cols[i + begin]; + while (lchild(i) < size) { + const auto lcol = cols[lchild(i) + begin]; + // -1 as sentinel, since we are building a max heap + const auto rcol = checked_load(cols + begin, rchild(i), + size, IndexType{-1}); + if (col >= std::max(lcol, rcol)) { + return; + } + const auto maxchild = lcol > rcol ? lchild(i) : rchild(i); + swap(i, maxchild); + i = maxchild; + } + }; + // heapify / sift_down for max-heap + for (auto i = (size - 2) / 2; i >= 0; i--) { + sift_down(i); + } + // heapsort: swap maximum to the end, shrink heap + swap(0, size - 1); + size--; + for (; size > 1; size--) { + // restore heap property and repeat + sift_down(0); + swap(0, size - 1); + } + }); + }); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Csr *to_check, bool *is_sorted) +{ + Array is_sorted_device_array{exec, {true}}; + const auto num_rows = to_check->get_size()[0]; + const auto row_ptrs = to_check->get_const_row_ptrs(); + const auto cols = to_check->get_const_col_idxs(); + auto is_sorted_device = is_sorted_device_array.get_data(); + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { + const auto row = static_cast(idx[0]); + const auto begin = row_ptrs[row]; + const auto end = row_ptrs[row + 1]; + if (*is_sorted_device) { + for (auto i = begin; i < end - 1; i++) { + if (cols[i] > cols[i + 1]) { + *is_sorted_device = false; + break; + } + } + } + }); + }); + *is_sorted = exec->copy_val_to_host(is_sorted_device); +}; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Csr *orig, + matrix::Diagonal *diag) +{ + const auto nnz = orig->get_num_stored_elements(); + const auto diag_size = diag->get_size()[0]; + const auto num_blocks = + ceildiv(config::warp_size * diag_size, default_block_size); + + const auto orig_values = orig->get_const_values(); + const auto orig_row_ptrs = orig->get_const_row_ptrs(); + const auto orig_col_idxs = orig->get_const_col_idxs(); + auto diag_values = diag->get_values(); + + kernel::extract_diagonal(num_blocks, default_block_size, 0, + exec->get_queue(), diag_size, nnz, orig_values, + orig_row_ptrs, orig_col_idxs, diag_values); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); + + +} // namespace csr +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp new file mode 100644 index 00000000000..5fefd83f4c7 --- /dev/null +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -0,0 +1,1340 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/dense_kernels.hpp" + + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/components/prefix_sum.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/base/onemkl_bindings.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" +#include "dpcpp/components/uninitialized_array.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Dense matrix format namespace. + * + * @ingroup dense + */ +namespace dense { + + +using KCFG_1D = ConfigSet<11, 7>; +constexpr auto kcfg_1d_list = + syn::value_list(); +constexpr auto subgroup_list = + syn::value_list(); +constexpr auto kcfg_1d_array = syn::as_array(kcfg_1d_list); +constexpr int default_block_size = 256; + + +namespace kernel { + + +template +void compute_partial_reduce( + size_type num_rows, OutType *__restrict__ work, CallableGetValue get_value, + CallableReduce reduce_op, sycl::nd_item<3> item_ct1, + UninitializedArray(cfg)> &tmp_work) +{ + constexpr auto wg_size = KCFG_1D::decode<0>(cfg); + constexpr auto sg_size = KCFG_1D::decode<1>(cfg); + + constexpr auto warps_per_block = wg_size / sg_size; + + const auto num_blocks = item_ct1.get_group_range(2); + const auto local_id = thread::get_local_thread_id(item_ct1); + const auto global_id = + thread::get_thread_id(item_ct1); + + OutType *tmp_work_array = tmp_work; + auto tmp = zero(); + for (auto i = global_id; i < num_rows; i += wg_size * num_blocks) { + tmp = reduce_op(tmp, get_value(i)); + } + + tmp_work_array[local_id] = tmp; + + ::gko::kernels::dpcpp::reduce(group::this_thread_block(item_ct1), + tmp_work_array, reduce_op); + + if (local_id == 0) { + work[thread::get_block_id(item_ct1)] = tmp_work_array[0]; + } +} + + +template +void finalize_reduce_computation( + size_type size, const ValueType *work, ValueType *result, + CallableReduce reduce_op, CallableFinalize finalize_op, + sycl::nd_item<3> item_ct1, + UninitializedArray(cfg)> &tmp_work) +{ + constexpr auto wg_size = KCFG_1D::decode<0>(cfg); + constexpr auto sg_size = KCFG_1D::decode<1>(cfg); + + const auto local_id = thread::get_local_thread_id(item_ct1); + + ValueType tmp = zero(); + for (auto i = local_id; i < size; i += wg_size) { + tmp = reduce_op(tmp, work[i]); + } + ValueType *tmp_work_array = tmp_work; + tmp_work_array[local_id] = tmp; + + ::gko::kernels::dpcpp::reduce(group::this_thread_block(item_ct1), + tmp_work_array, reduce_op); + + if (local_id == 0) { + *result = finalize_op(tmp_work_array[0]); + } +} + + +template +void compute_partial_dot( + size_type num_rows, const ValueType *__restrict__ x, size_type stride_x, + const ValueType *__restrict__ y, size_type stride_y, + ValueType *__restrict__ work, sycl::nd_item<3> item_ct1, + UninitializedArray(cfg)> &tmp_work) +{ + compute_partial_reduce( + num_rows, work, + [x, stride_x, y, stride_y](size_type i) { + return x[i * stride_x] * y[i * stride_y]; + }, + [](const ValueType &x, const ValueType &y) { return x + y; }, item_ct1, + tmp_work); +} + +template +void compute_partial_dot(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + const ValueType *x, size_type stride_x, + const ValueType *y, size_type stride_y, + ValueType *work) +{ + constexpr auto wg_size = KCFG_1D::decode<0>(cfg); + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access::mode::read_write, + sycl::access::target::local> + tmp_work_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + compute_partial_dot(num_rows, x, stride_x, y, stride_y, + work, item_ct1, + *tmp_work_acc_ct1.get_pointer()); + }); + }); +} + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(compute_partial_dot, + compute_partial_dot) +GKO_ENABLE_DEFAULT_CONFIG_CALL(compute_partial_dot_call, compute_partial_dot, + kcfg_1d_list) + + +template +void compute_partial_conj_dot( + size_type num_rows, const ValueType *__restrict__ x, size_type stride_x, + const ValueType *__restrict__ y, size_type stride_y, + ValueType *__restrict__ work, sycl::nd_item<3> item_ct1, + UninitializedArray(cfg)> &tmp_work) +{ + compute_partial_reduce( + num_rows, work, + [x, stride_x, y, stride_y](size_type i) { + return conj(x[i * stride_x]) * y[i * stride_y]; + }, + [](const ValueType &x, const ValueType &y) { return x + y; }, item_ct1, + tmp_work); +} + +template +void compute_partial_conj_dot(dim3 grid, dim3 block, + size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + const ValueType *x, size_type stride_x, + const ValueType *y, size_type stride_y, + ValueType *work) +{ + constexpr auto wg_size = KCFG_1D::decode<0>(cfg); + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access::mode::read_write, + sycl::access::target::local> + tmp_work_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + compute_partial_conj_dot(num_rows, x, stride_x, y, + stride_y, work, item_ct1, + *tmp_work_acc_ct1.get_pointer()); + }); + }); +} + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(compute_partial_conj_dot, + compute_partial_conj_dot) +GKO_ENABLE_DEFAULT_CONFIG_CALL(compute_partial_conj_dot_call, + compute_partial_conj_dot, kcfg_1d_list) + + +template +void finalize_sum_reduce_computation( + size_type size, const ValueType *work, ValueType *result, + sycl::nd_item<3> item_ct1, + UninitializedArray(cfg)> &tmp_work) +{ + finalize_reduce_computation( + size, work, result, + [](const ValueType &x, const ValueType &y) { return x + y; }, + [](const ValueType &x) { return x; }, item_ct1, tmp_work); +} + +template +void finalize_sum_reduce_computation(dim3 grid, dim3 block, + size_type dynamic_shared_memory, + sycl::queue *queue, size_type size, + const ValueType *work, ValueType *result) +{ + constexpr auto wg_size = KCFG_1D::decode<0>(cfg); + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access::mode::read_write, + sycl::access::target::local> + tmp_work_acc_ct1(cgh); + + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + finalize_sum_reduce_computation( + size, work, result, item_ct1, + *tmp_work_acc_ct1.get_pointer()); + }); + }); +} + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(finalize_sum_reduce_computation, + finalize_sum_reduce_computation) +GKO_ENABLE_DEFAULT_CONFIG_CALL(finalize_sum_reduce_computation_call, + finalize_sum_reduce_computation, kcfg_1d_list) + + +template +void compute_partial_norm2( + size_type num_rows, const ValueType *__restrict__ x, size_type stride_x, + remove_complex *__restrict__ work, sycl::nd_item<3> item_ct1, + UninitializedArray, KCFG_1D::decode<0>(cfg)> + &tmp_work) +{ + using norm_type = remove_complex; + compute_partial_reduce( + num_rows, work, + [x, stride_x](size_type i) { return squared_norm(x[i * stride_x]); }, + [](const norm_type &x, const norm_type &y) { return x + y; }, item_ct1, + tmp_work); +} + +template +void compute_partial_norm2(dim3 grid, dim3 block, + size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_rows, const ValueType *x, + size_type stride_x, remove_complex *work) +{ + constexpr auto wg_size = KCFG_1D::decode<0>(cfg); + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, wg_size>, + 0, sycl::access::mode::read_write, + sycl::access::target::local> + tmp_work_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + compute_partial_norm2(num_rows, x, stride_x, work, + item_ct1, + *tmp_work_acc_ct1.get_pointer()); + }); + }); +} + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(compute_partial_norm2, + compute_partial_norm2) +GKO_ENABLE_DEFAULT_CONFIG_CALL(compute_partial_norm2_call, + compute_partial_norm2, kcfg_1d_list) + + +template +void finalize_sqrt_reduce_computation( + size_type size, const ValueType *work, ValueType *result, + sycl::nd_item<3> item_ct1, + UninitializedArray(cfg)> &tmp_work) +{ + finalize_reduce_computation( + size, work, result, + [](const ValueType &x, const ValueType &y) { return x + y; }, + [](const ValueType &x) { return std::sqrt(x); }, item_ct1, tmp_work); +} + +template +void finalize_sqrt_reduce_computation(dim3 grid, dim3 block, + size_type dynamic_shared_memory, + sycl::queue *queue, size_type size, + const ValueType *work, ValueType *result) +{ + constexpr auto wg_size = KCFG_1D::decode<0>(cfg); + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access::mode::read_write, + sycl::access::target::local> + tmp_work_acc_ct1(cgh); + + + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + finalize_sqrt_reduce_computation( + size, work, result, item_ct1, + *tmp_work_acc_ct1.get_pointer()); + }); + }); +} + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(finalize_sqrt_reduce_computation, + finalize_sqrt_reduce_computation) +GKO_ENABLE_DEFAULT_CONFIG_CALL(finalize_sqrt_reduce_computation_call, + finalize_sqrt_reduce_computation, kcfg_1d_list) + + +template +void fill_in_coo(size_type num_rows, size_type num_cols, size_type stride, + const size_type *__restrict__ row_ptrs, + const ValueType *__restrict__ source, + IndexType *__restrict__ row_idxs, + IndexType *__restrict__ col_idxs, + ValueType *__restrict__ values, sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num_rows) { + size_type write_to = row_ptrs[tidx]; + + for (size_type i = 0; i < num_cols; i++) { + if (source[stride * tidx + i] != zero()) { + values[write_to] = source[stride * tidx + i]; + col_idxs[write_to] = i; + row_idxs[write_to] = tidx; + write_to++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_coo, fill_in_coo) + + +template +void count_nnz_per_row(size_type num_rows, size_type num_cols, size_type stride, + const ValueType *__restrict__ work, + IndexType *__restrict__ result, + sycl::nd_item<3> item_ct1) +{ + constexpr auto sg_size = KCFG_1D::decode<1>(cfg); + const auto row_idx = thread::get_subwarp_id_flat(item_ct1); + auto warp_tile = + group::tiled_partition(group::this_thread_block(item_ct1)); + + if (row_idx < num_rows) { + IndexType part_result{}; + for (auto i = warp_tile.thread_rank(); i < num_cols; i += sg_size) { + if (work[stride * row_idx + i] != zero()) { + part_result += 1; + } + } + result[row_idx] = ::gko::kernels::dpcpp::reduce( + warp_tile, part_result, + [](const size_type &a, const size_type &b) { return a + b; }); + } +} + +GKO_ENABLE_DEFAULT_HOST_CONFIG(count_nnz_per_row, count_nnz_per_row) +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(count_nnz_per_row, count_nnz_per_row) +GKO_ENABLE_DEFAULT_CONFIG_CALL(count_nnz_per_row_call, count_nnz_per_row, + kcfg_1d_list) + + +template +void fill_in_csr(size_type num_rows, size_type num_cols, size_type stride, + const ValueType *__restrict__ source, + IndexType *__restrict__ row_ptrs, + IndexType *__restrict__ col_idxs, + ValueType *__restrict__ values, sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + + if (tidx < num_rows) { + auto write_to = row_ptrs[tidx]; + for (size_type i = 0; i < num_cols; i++) { + if (source[stride * tidx + i] != zero()) { + values[write_to] = source[stride * tidx + i]; + col_idxs[write_to] = i; + write_to++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_csr, fill_in_csr) + + +template +void fill_in_ell(size_type num_rows, size_type num_cols, + size_type source_stride, const ValueType *__restrict__ source, + size_type max_nnz_per_row, size_type result_stride, + IndexType *__restrict__ col_ptrs, + ValueType *__restrict__ values, sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num_rows) { + IndexType col_idx = 0; + for (size_type col = 0; col < num_cols; col++) { + if (source[tidx * source_stride + col] != zero()) { + col_ptrs[col_idx * result_stride + tidx] = col; + values[col_idx * result_stride + tidx] = + source[tidx * source_stride + col]; + col_idx++; + } + } + for (size_type j = col_idx; j < max_nnz_per_row; j++) { + col_ptrs[j * result_stride + tidx] = 0; + values[j * result_stride + tidx] = zero(); + } + } else if (tidx < result_stride) { + for (size_type j = 0; j < max_nnz_per_row; j++) { + col_ptrs[j * result_stride + tidx] = 0; + values[j * result_stride + tidx] = zero(); + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_ell, fill_in_ell) + + +template +void calculate_slice_lengths(size_type num_rows, size_type slice_size, + int slice_num, size_type stride_factor, + const size_type *__restrict__ nnz_per_row, + size_type *__restrict__ slice_lengths, + size_type *__restrict__ slice_sets, + sycl::nd_item<3> item_ct1) +{ + constexpr auto sg_size = cfg; + const auto sliceid = item_ct1.get_group(2); + const auto tid_in_warp = item_ct1.get_local_id(2); + const bool runable = sliceid * slice_size + tid_in_warp < num_rows; + size_type thread_result = 0; + for (size_type i = tid_in_warp; i < slice_size; i += sg_size) { + thread_result = + (i + slice_size * sliceid < num_rows) + ? max(thread_result, nnz_per_row[sliceid * slice_size + i]) + : thread_result; + } + + auto warp_tile = + group::tiled_partition(group::this_thread_block(item_ct1)); + auto warp_result = ::gko::kernels::dpcpp::reduce( + warp_tile, thread_result, + [](const size_type &a, const size_type &b) { return max(a, b); }); + + if (tid_in_warp == 0 && runable) { + auto slice_length = ceildiv(warp_result, stride_factor) * stride_factor; + slice_lengths[sliceid] = slice_length; + slice_sets[sliceid] = slice_length; + } +} + +GKO_ENABLE_DEFAULT_HOST_CONFIG(calculate_slice_lengths, calculate_slice_lengths) +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(calculate_slice_lengths, + calculate_slice_lengths) +GKO_ENABLE_DEFAULT_CONFIG_CALL(calculate_slice_lengths_call, + calculate_slice_lengths, subgroup_list) + + +template +void fill_in_sellp(size_type num_rows, size_type num_cols, size_type slice_size, + size_type stride, const ValueType *__restrict__ source, + size_type *__restrict__ slice_lengths, + size_type *__restrict__ slice_sets, + IndexType *__restrict__ col_idxs, + ValueType *__restrict__ vals, sycl::nd_item<3> item_ct1) +{ + const auto global_row = thread::get_thread_id_flat(item_ct1); + const auto row = global_row % slice_size; + const auto sliceid = global_row / slice_size; + + if (global_row < num_rows) { + size_type sellp_ind = slice_sets[sliceid] * slice_size + row; + + for (size_type col = 0; col < num_cols; col++) { + auto val = source[global_row * stride + col]; + if (val != zero()) { + col_idxs[sellp_ind] = col; + vals[sellp_ind] = val; + sellp_ind += slice_size; + } + } + for (size_type i = sellp_ind; + i < + (slice_sets[sliceid] + slice_lengths[sliceid]) * slice_size + row; + i += slice_size) { + col_idxs[i] = 0; + vals[i] = zero(); + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_sellp, fill_in_sellp) + + +template +void reduce_max_nnz(size_type size, const size_type *__restrict__ nnz_per_row, + size_type *__restrict__ result, sycl::nd_item<3> item_ct1, + uint8_t *dpct_local) +{ + constexpr auto sg_size = KCFG_1D::decode<1>(cfg); + auto block_max = (size_type *)dpct_local; + + reduce_array( + size, nnz_per_row, block_max, item_ct1, + [](const size_type &x, const size_type &y) { return max(x, y); }); + + if (item_ct1.get_local_id(2) == 0) { + result[item_ct1.get_group(2)] = block_max[0]; + } +} + +template +void reduce_max_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type size, + const size_type *nnz_per_row, size_type *result) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor + dpct_local_acc_ct1(sycl::range<1>(dynamic_shared_memory), cgh); + + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + reduce_max_nnz(size, nnz_per_row, result, item_ct1, + dpct_local_acc_ct1.get_pointer().get()); + }); + }); +} + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(reduce_max_nnz, reduce_max_nnz); +GKO_ENABLE_DEFAULT_CONFIG_CALL(reduce_max_nnz_call, reduce_max_nnz, + kcfg_1d_list) + + +template +void reduce_max_nnz_per_slice(size_type num_rows, size_type slice_size, + size_type stride_factor, + const size_type *__restrict__ nnz_per_row, + size_type *__restrict__ result, + sycl::nd_item<3> item_ct1) +{ + constexpr auto sg_size = KCFG_1D::decode<1>(cfg); + auto warp_tile = + group::tiled_partition(group::this_thread_block(item_ct1)); + const auto warpid = thread::get_subwarp_id_flat(item_ct1); + const auto tid_in_warp = warp_tile.thread_rank(); + const auto slice_num = ceildiv(num_rows, slice_size); + + size_type thread_result = 0; + for (size_type i = tid_in_warp; i < slice_size; i += sg_size) { + if (warpid * slice_size + i < num_rows) { + thread_result = + max(thread_result, nnz_per_row[warpid * slice_size + i]); + } + } + + auto warp_result = ::gko::kernels::dpcpp::reduce( + warp_tile, thread_result, + [](const size_type &a, const size_type &b) { return max(a, b); }); + + if (tid_in_warp == 0 && warpid < slice_num) { + result[warpid] = ceildiv(warp_result, stride_factor) * stride_factor; + } +} + +GKO_ENABLE_DEFAULT_HOST_CONFIG(reduce_max_nnz_per_slice, + reduce_max_nnz_per_slice) +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(reduce_max_nnz_per_slice, + reduce_max_nnz_per_slice) +GKO_ENABLE_DEFAULT_CONFIG_CALL(reduce_max_nnz_per_slice_call, + reduce_max_nnz_per_slice, kcfg_1d_list) + + +template +void reduce_total_cols(size_type num_slices, + const size_type *__restrict__ max_nnz_per_slice, + size_type *__restrict__ result, + sycl::nd_item<3> item_ct1, uint8_t *dpct_local) +{ + auto block_result = (size_type *)dpct_local; + constexpr auto sg_size = KCFG_1D::decode<1>(cfg); + reduce_array( + num_slices, max_nnz_per_slice, block_result, item_ct1, + [](const size_type &x, const size_type &y) { return x + y; }); + + if (item_ct1.get_local_id(2) == 0) { + result[item_ct1.get_group(2)] = block_result[0]; + } +} + +template +void reduce_total_cols(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_slices, + const size_type *max_nnz_per_slice, size_type *result) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor + dpct_local_acc_ct1(sycl::range<1>(dynamic_shared_memory), cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + reduce_total_cols(num_slices, max_nnz_per_slice, result, + item_ct1, + dpct_local_acc_ct1.get_pointer().get()); + }); + }); +} + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(reduce_total_cols, + reduce_total_cols); +GKO_ENABLE_DEFAULT_CONFIG_CALL(reduce_total_cols_call, reduce_total_cols, + kcfg_1d_list) + +template +void transpose(const size_type nrows, const size_type ncols, + const ValueType *__restrict__ in, const size_type in_stride, + ValueType *__restrict__ out, const size_type out_stride, + Closure op, sycl::nd_item<3> item_ct1, + UninitializedArray &space) +{ + auto local_x = item_ct1.get_local_id(2); + auto local_y = item_ct1.get_local_id(1); + auto x = item_ct1.get_group(2) * sg_size + local_x; + auto y = item_ct1.get_group(1) * sg_size + local_y; + if (y < nrows && x < ncols) { + space[local_y * (sg_size + 1) + local_x] = op(in[y * in_stride + x]); + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + x = item_ct1.get_group(1) * sg_size + local_x; + y = item_ct1.get_group(2) * sg_size + local_y; + if (y < ncols && x < nrows) { + out[y * out_stride + x] = space[local_x * (sg_size + 1) + local_y]; + } +} + +template +__WG_BOUND__(sg_size, sg_size) +void transpose(const size_type nrows, const size_type ncols, + const ValueType *__restrict__ in, const size_type in_stride, + ValueType *__restrict__ out, const size_type out_stride, + sycl::nd_item<3> item_ct1, + UninitializedArray &space) +{ + transpose( + nrows, ncols, in, in_stride, out, out_stride, + [](ValueType val) { return val; }, item_ct1, space); +} + +template +void transpose(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, const size_type nrows, const size_type ncols, + const ValueType *in, const size_type in_stride, ValueType *out, + const size_type out_stride) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access_mode::read_write, + sycl::access::target::local> + space_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + transpose(nrows, ncols, in, in_stride, out, out_stride, + item_ct1, *space_acc_ct1.get_pointer()); + }); + }); +} + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(transpose, transpose); +GKO_ENABLE_DEFAULT_CONFIG_CALL(transpose_call, transpose, subgroup_list); + + +template +__WG_BOUND__(sg_size, sg_size) +void conj_transpose( + const size_type nrows, const size_type ncols, + const ValueType *__restrict__ in, const size_type in_stride, + ValueType *__restrict__ out, const size_type out_stride, + sycl::nd_item<3> item_ct1, + UninitializedArray &space) +{ + transpose( + nrows, ncols, in, in_stride, out, out_stride, + [](ValueType val) { return conj(val); }, item_ct1, space); +} + +template +void conj_transpose(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, const size_type nrows, + const size_type ncols, const ValueType *in, + const size_type in_stride, ValueType *out, + const size_type out_stride) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access_mode::read_write, + sycl::access::target::local> + space_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + conj_transpose(nrows, ncols, in, in_stride, out, + out_stride, item_ct1, + *space_acc_ct1.get_pointer()); + }); + }); +} + + +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(conj_transpose, conj_transpose); +GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose, + subgroup_list); + + +} // namespace kernel + + +template +void simple_apply(std::shared_ptr exec, + const matrix::Dense *a, + const matrix::Dense *b, + matrix::Dense *c) +{ + using namespace oneapi::mkl; + oneapi::mkl::blas::row_major::gemm( + *exec->get_queue(), transpose::nontrans, transpose::nontrans, + c->get_size()[0], c->get_size()[1], a->get_size()[1], one(), + a->get_const_values(), a->get_stride(), b->get_const_values(), + b->get_stride(), zero(), c->get_values(), c->get_stride()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); + + +template +void apply(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Dense *a, const matrix::Dense *b, + const matrix::Dense *beta, matrix::Dense *c) +{ + using namespace oneapi::mkl; + oneapi::mkl::blas::row_major::gemm( + *exec->get_queue(), transpose::nontrans, transpose::nontrans, + c->get_size()[0], c->get_size()[1], a->get_size()[1], + exec->copy_val_to_host(alpha->get_const_values()), + a->get_const_values(), a->get_stride(), b->get_const_values(), + b->get_stride(), exec->copy_val_to_host(beta->get_const_values()), + c->get_values(), c->get_stride()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); + + +template +void compute_dot(std::shared_ptr exec, + const matrix::Dense *x, + const matrix::Dense *y, + matrix::Dense *result) +{ + if (x->get_size()[1] == 1) { + // TODO: write a custom kernel which does this more efficiently + onemkl::dot(*exec->get_queue(), x->get_size()[0], x->get_const_values(), + x->get_stride(), y->get_const_values(), y->get_stride(), + result->get_values()); + } else { + // TODO: these are tuning parameters obtained experimentally, once + // we decide how to handle this uniformly, they should be modified + // appropriately + constexpr int work_per_thread = 32; + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto sg_size = KCFG_1D::decode<1>(cfg); + const auto work_per_block = work_per_thread * wg_size; + const dim3 grid_dim = ceildiv(x->get_size()[0], work_per_block); + const dim3 block_dim{sg_size, 1, wg_size / sg_size}; + Array work(exec, grid_dim.x); + // TODO: write a kernel which does this more efficiently + for (size_type col = 0; col < x->get_size()[1]; ++col) { + kernel::compute_partial_dot_call( + cfg, grid_dim, block_dim, 0, exec->get_queue(), + x->get_size()[0], x->get_const_values() + col, x->get_stride(), + y->get_const_values() + col, y->get_stride(), work.get_data()); + kernel::finalize_sum_reduce_computation_call( + cfg, 1, block_dim, 0, exec->get_queue(), grid_dim.x, + work.get_const_data(), result->get_values() + col); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); + + +template +void compute_conj_dot(std::shared_ptr exec, + const matrix::Dense *x, + const matrix::Dense *y, + matrix::Dense *result) +{ + if (x->get_size()[1] == 1) { + // TODO: write a custom kernel which does this more efficiently + onemkl::conj_dot(*exec->get_queue(), x->get_size()[0], + x->get_const_values(), x->get_stride(), + y->get_const_values(), y->get_stride(), + result->get_values()); + + } else { + // TODO: these are tuning parameters obtained experimentally, once + // we decide how to handle this uniformly, they should be modified + // appropriately + constexpr int work_per_thread = 32; + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto sg_size = KCFG_1D::decode<1>(cfg); + + const auto work_per_block = work_per_thread * wg_size; + const dim3 grid_dim = ceildiv(x->get_size()[0], work_per_block); + const dim3 block_dim{sg_size, 1, wg_size / sg_size}; + Array work(exec, grid_dim.x); + // TODO: write a kernel which does this more efficiently + for (size_type col = 0; col < x->get_size()[1]; ++col) { + kernel::compute_partial_conj_dot_call( + cfg, grid_dim, block_dim, 0, exec->get_queue(), + x->get_size()[0], x->get_const_values() + col, x->get_stride(), + y->get_const_values() + col, y->get_stride(), work.get_data()); + kernel::finalize_sum_reduce_computation_call( + cfg, 1, block_dim, 0, exec->get_queue(), grid_dim.x, + work.get_const_data(), result->get_values() + col); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); + + +template +void compute_norm2(std::shared_ptr exec, + const matrix::Dense *x, + matrix::Dense> *result) +{ + if (x->get_size()[1] == 1) { + oneapi::mkl::blas::row_major::nrm2( + *exec->get_queue(), x->get_size()[0], x->get_const_values(), + x->get_stride(), result->get_values()); + } else { + using norm_type = remove_complex; + // TODO: these are tuning parameters obtained experimentally, once + // we decide how to handle this uniformly, they should be modified + // appropriately + constexpr int work_per_thread = 32; + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto sg_size = KCFG_1D::decode<1>(cfg); + + const auto work_per_block = work_per_thread * wg_size; + const dim3 grid_dim = ceildiv(x->get_size()[0], work_per_block); + const dim3 block_dim{sg_size, 1, wg_size / sg_size}; + Array work(exec, grid_dim.x); + // TODO: write a kernel which does this more efficiently + for (size_type col = 0; col < x->get_size()[1]; ++col) { + kernel::compute_partial_norm2_call( + cfg, grid_dim, block_dim, 0, exec->get_queue(), + x->get_size()[0], x->get_const_values() + col, x->get_stride(), + work.get_data()); + kernel::finalize_sqrt_reduce_computation_call( + cfg, 1, block_dim, 0, exec->get_queue(), grid_dim.x, + work.get_const_data(), result->get_values() + col); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); + + +template +void convert_to_coo(std::shared_ptr exec, + const matrix::Dense *source, + matrix::Coo *result) +{ + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + + auto row_idxs = result->get_row_idxs(); + auto col_idxs = result->get_col_idxs(); + auto values = result->get_values(); + + auto stride = source->get_stride(); + + auto nnz_prefix_sum = Array(exec, num_rows); + calculate_nonzeros_per_row(exec, source, &nnz_prefix_sum); + + components::prefix_sum(exec, nnz_prefix_sum.get_data(), num_rows); + + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto sg_size = KCFG_1D::decode<1>(cfg); + size_type grid_dim = ceildiv(num_rows, wg_size); + + kernel::fill_in_coo(grid_dim, wg_size, 0, exec->get_queue(), num_rows, + num_cols, stride, nnz_prefix_sum.get_const_data(), + source->get_const_values(), row_idxs, col_idxs, values); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL); + + +template +void convert_to_csr(std::shared_ptr exec, + const matrix::Dense *source, + matrix::Csr *result) +{ + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto sg_size = KCFG_1D::decode<1>(cfg); + + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + + auto row_ptrs = result->get_row_ptrs(); + auto col_idxs = result->get_col_idxs(); + auto values = result->get_values(); + + auto stride = source->get_stride(); + + const auto rows_per_block = ceildiv(wg_size, sg_size); + const auto grid_dim_nnz = ceildiv(source->get_size()[0], rows_per_block); + + kernel::count_nnz_per_row_call( + cfg, grid_dim_nnz, wg_size, 0, exec->get_queue(), num_rows, num_cols, + stride, source->get_const_values(), row_ptrs); + + components::prefix_sum(exec, row_ptrs, num_rows + 1); + + size_type grid_dim = ceildiv(num_rows, wg_size); + + kernel::fill_in_csr(grid_dim, default_block_size, 0, exec->get_queue(), + num_rows, num_cols, stride, source->get_const_values(), + row_ptrs, col_idxs, values); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_CONVERT_TO_CSR_KERNEL); + + +template +void convert_to_ell(std::shared_ptr exec, + const matrix::Dense *source, + matrix::Ell *result) +{ + auto num_rows = result->get_size()[0]; + auto num_cols = result->get_size()[1]; + auto max_nnz_per_row = result->get_num_stored_elements_per_row(); + + auto col_ptrs = result->get_col_idxs(); + auto values = result->get_values(); + + auto source_stride = source->get_stride(); + auto result_stride = result->get_stride(); + + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto sg_size = KCFG_1D::decode<1>(cfg); + auto grid_dim = ceildiv(result_stride, wg_size); + kernel::fill_in_ell(grid_dim, wg_size, 0, exec->get_queue(), num_rows, + num_cols, source_stride, source->get_const_values(), + max_nnz_per_row, result_stride, col_ptrs, values); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_CONVERT_TO_ELL_KERNEL); + + +template +void convert_to_hybrid(std::shared_ptr exec, + const matrix::Dense *source, + matrix::Hybrid *result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_CONVERT_TO_HYBRID_KERNEL); + + +template +void convert_to_sellp(std::shared_ptr exec, + const matrix::Dense *source, + matrix::Sellp *result) +{ + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto sg_size = KCFG_1D::decode<1>(cfg); + + const auto stride = source->get_stride(); + const auto num_rows = result->get_size()[0]; + const auto num_cols = result->get_size()[1]; + + auto vals = result->get_values(); + auto col_idxs = result->get_col_idxs(); + auto slice_lengths = result->get_slice_lengths(); + auto slice_sets = result->get_slice_sets(); + + const auto slice_size = (result->get_slice_size() == 0) + ? matrix::default_slice_size + : result->get_slice_size(); + const auto stride_factor = (result->get_stride_factor() == 0) + ? matrix::default_stride_factor + : result->get_stride_factor(); + const int slice_num = ceildiv(num_rows, slice_size); + + auto nnz_per_row = Array(exec, num_rows); + calculate_nonzeros_per_row(exec, source, &nnz_per_row); + + auto grid_dim = slice_num; + + if (grid_dim > 0) { + kernel::calculate_slice_lengths_call( + sg_size, grid_dim, sg_size, 0, exec->get_queue(), num_rows, + slice_size, slice_num, stride_factor, nnz_per_row.get_const_data(), + slice_lengths, slice_sets); + } + + components::prefix_sum(exec, slice_sets, slice_num + 1); + + grid_dim = ceildiv(num_rows, wg_size); + if (grid_dim > 0) { + kernel::fill_in_sellp(grid_dim, wg_size, 0, exec->get_queue(), num_rows, + num_cols, slice_size, stride, + source->get_const_values(), slice_lengths, + slice_sets, col_idxs, vals); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_CONVERT_TO_SELLP_KERNEL); + + +template +void convert_to_sparsity_csr(std::shared_ptr exec, + const matrix::Dense *source, + matrix::SparsityCsr *result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_CONVERT_TO_SPARSITY_CSR_KERNEL); + + +template +void count_nonzeros(std::shared_ptr exec, + const matrix::Dense *source, size_type *result) +{ + const auto num_rows = source->get_size()[0]; + auto nnz_per_row = Array(exec, num_rows); + + calculate_nonzeros_per_row(exec, source, &nnz_per_row); + + *result = reduce_add_array(exec, num_rows, nnz_per_row.get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COUNT_NONZEROS_KERNEL); + + +template +void calculate_max_nnz_per_row(std::shared_ptr exec, + const matrix::Dense *source, + size_type *result) +{ + const auto num_rows = source->get_size()[0]; + auto nnz_per_row = Array(exec, num_rows); + + calculate_nonzeros_per_row(exec, source, &nnz_per_row); + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto n = ceildiv(num_rows, wg_size); + const size_type grid_dim = (n <= wg_size) ? n : wg_size; + + auto block_results = Array(exec, grid_dim); + + kernel::reduce_max_nnz_call( + cfg, grid_dim, wg_size, wg_size * sizeof(size_type), exec->get_queue(), + num_rows, nnz_per_row.get_const_data(), block_results.get_data()); + + auto d_result = Array(exec, 1); + + kernel::reduce_max_nnz_call( + cfg, 1, wg_size, wg_size * sizeof(size_type), exec->get_queue(), + grid_dim, block_results.get_const_data(), d_result.get_data()); + + *result = exec->copy_val_to_host(d_result.get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void calculate_nonzeros_per_row(std::shared_ptr exec, + const matrix::Dense *source, + Array *result) +{ + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto sg_size = KCFG_1D::decode<1>(cfg); + const dim3 block_size(wg_size, 1, 1); + auto rows_per_block = ceildiv(wg_size, sg_size); + const size_t grid_x = ceildiv(source->get_size()[0], rows_per_block); + const dim3 grid_size(grid_x, 1, 1); + if (grid_x > 0) { + kernel::count_nnz_per_row_call( + cfg, grid_size, block_size, 0, exec->get_queue(), + source->get_size()[0], source->get_size()[1], source->get_stride(), + source->get_const_values(), result->get_data()); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void calculate_total_cols(std::shared_ptr exec, + const matrix::Dense *source, + size_type *result, size_type stride_factor, + size_type slice_size) +{ + const auto num_rows = source->get_size()[0]; + + if (num_rows == 0) { + *result = 0; + return; + } + + const auto num_cols = source->get_size()[1]; + const auto slice_num = ceildiv(num_rows, slice_size); + + auto nnz_per_row = Array(exec, num_rows); + + calculate_nonzeros_per_row(exec, source, &nnz_per_row); + + auto max_nnz_per_slice = Array(exec, slice_num); + auto queue = exec->get_queue(); + constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); + const std::uint32_t cfg = + get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) { + return validate(queue, KCFG_1D::decode<0>(cfg), + KCFG_1D::decode<1>(cfg)); + }); + const auto wg_size = KCFG_1D::decode<0>(cfg); + const auto sg_size = KCFG_1D::decode<1>(cfg); + + auto grid_dim = ceildiv(slice_num * sg_size, wg_size); + + kernel::reduce_max_nnz_per_slice_call( + cfg, grid_dim, wg_size, 0, exec->get_queue(), num_rows, slice_size, + stride_factor, nnz_per_row.get_const_data(), + max_nnz_per_slice.get_data()); + + grid_dim = ceildiv(slice_num, wg_size); + auto block_results = Array(exec, grid_dim); + + kernel::reduce_total_cols_call( + cfg, grid_dim, wg_size, wg_size * sizeof(size_type), exec->get_queue(), + slice_num, max_nnz_per_slice.get_const_data(), + block_results.get_data()); + + auto d_result = Array(exec, 1); + + kernel::reduce_total_cols_call( + cfg, 1, wg_size, wg_size * sizeof(size_type), exec->get_queue(), + grid_dim, block_results.get_const_data(), d_result.get_data()); + + *result = exec->copy_val_to_host(d_result.get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_CALCULATE_TOTAL_COLS_KERNEL); + + +template +void transpose(std::shared_ptr exec, + const matrix::Dense *orig, + matrix::Dense *trans) +{ + auto size = orig->get_size(); + auto sg_array = syn::as_array(subgroup_list); + auto queue = exec->get_queue(); + const std::uint32_t cfg = + get_first_cfg(sg_array, [&queue](std::uint32_t cfg) { + return validate(queue, cfg * cfg, cfg); + }); + dim3 grid(ceildiv(size[1], cfg), ceildiv(size[0], cfg)); + dim3 block(cfg, cfg); + kernel::transpose_call(cfg, grid, block, 0, queue, size[0], size[1], + orig->get_const_values(), orig->get_stride(), + trans->get_values(), trans->get_stride()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Dense *orig, + matrix::Dense *trans) +{ + auto size = orig->get_size(); + auto sg_array = syn::as_array(subgroup_list); + auto queue = exec->get_queue(); + const std::uint32_t cfg = + get_first_cfg(sg_array, [&queue](std::uint32_t cfg) { + return validate(queue, cfg * cfg, cfg); + }); + dim3 grid(ceildiv(size[1], cfg), ceildiv(size[0], cfg)); + dim3 block(cfg, cfg); + kernel::conj_transpose_call(cfg, grid, block, 0, queue, size[0], size[1], + orig->get_const_values(), orig->get_stride(), + trans->get_values(), trans->get_stride()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); + + +} // namespace dense +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/matrix/diagonal_kernels.dp.cpp b/dpcpp/matrix/diagonal_kernels.dp.cpp new file mode 100644 index 00000000000..1aae9a393a7 --- /dev/null +++ b/dpcpp/matrix/diagonal_kernels.dp.cpp @@ -0,0 +1,122 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/diagonal_kernels.hpp" + + +#include + + +#include +#include + + +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Diagonal matrix format namespace. + * + * @ingroup diagonal + */ +namespace diagonal { + + +constexpr int default_block_size = 256; + + +namespace kernel { + + +template +void apply_to_csr(size_type num_rows, const ValueType *__restrict__ diag, + const IndexType *__restrict__ row_ptrs, + ValueType *__restrict__ result_values, + sycl::nd_item<3> item_ct1) +{ + constexpr auto warp_size = config::warp_size; + auto warp_tile = + group::tiled_partition(group::this_thread_block(item_ct1)); + const auto row = thread::get_subwarp_id_flat(item_ct1); + const auto tid_in_warp = warp_tile.thread_rank(); + + if (row >= num_rows) { + return; + } + + const auto diag_val = diag[row]; + + for (size_type idx = row_ptrs[row] + tid_in_warp; idx < row_ptrs[row + 1]; + idx += warp_size) { + result_values[idx] *= diag_val; + } +} + +GKO_ENABLE_DEFAULT_HOST(apply_to_csr, apply_to_csr); + + +} // namespace kernel + + +template +void apply_to_csr(std::shared_ptr exec, + const matrix::Diagonal *a, + const matrix::Csr *b, + matrix::Csr *c) +{ + const auto num_rows = b->get_size()[0]; + const auto diag_values = a->get_const_values(); + c->copy_from(b); + auto csr_values = c->get_values(); + const auto csr_row_ptrs = c->get_const_row_ptrs(); + + const auto grid_dim = + ceildiv(num_rows * config::warp_size, default_block_size); + kernel::apply_to_csr(grid_dim, default_block_size, 0, exec->get_queue(), + num_rows, diag_values, csr_row_ptrs, csr_values); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DIAGONAL_APPLY_TO_CSR_KERNEL); + + +} // namespace diagonal +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/matrix/ell_kernels.dp.cpp b/dpcpp/matrix/ell_kernels.dp.cpp new file mode 100644 index 00000000000..7525c85eae9 --- /dev/null +++ b/dpcpp/matrix/ell_kernels.dp.cpp @@ -0,0 +1,760 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/ell_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include +#include +#include + + +#include "accessor/reduced_row_major.hpp" +#include "core/base/mixed_precision_types.hpp" +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/dense_kernels.hpp" +#include "core/synthesizer/implementation_selection.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/atomic.dp.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/format_conversion.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The ELL matrix format namespace. + * + * @ingroup ell + */ +namespace ell { + + +constexpr int default_block_size = 256; + + +// TODO: num_threads_per_core and ratio are parameters should be tuned +/** + * num_threads_per_core is the oversubscribing parameter. There are + * `num_threads_per_core` threads assigned to each physical core. + */ +constexpr int num_threads_per_core = 4; + + +/** + * ratio is the parameter to decide when to use threads to do reduction on each + * row. (#cols/#rows > ratio) + */ +constexpr double ratio = 1e-2; + + +/** + * max_thread_per_worker is the max number of thread per worker. The + * `compiled_kernels` must be a list <0, 1, 2, ..., max_thread_per_worker> + */ +constexpr int max_thread_per_worker = 32; + + +/** + * A compile-time list of sub-warp sizes for which the spmv kernels should be + * compiled. + * 0 is a special case where it uses a sub-warp size of warp_size in + * combination with atomic_adds. + */ +using compiled_kernels = syn::value_list; + + +namespace kernel { +namespace { + + +template +void spmv_kernel( + const size_type num_rows, const int num_worker_per_row, + acc::range val, const IndexType *__restrict__ col, + const size_type stride, const size_type num_stored_elements_per_row, + acc::range b, OutputValueType *__restrict__ c, + const size_type c_stride, Closure op, sycl::nd_item<3> item_ct1, + UninitializedArray &storage) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + const decltype(tidx) column_id = item_ct1.get_group(1); + if (num_thread_per_worker == 1) { + // Specialize the num_thread_per_worker = 1. It doesn't need the shared + // memory, __syncthreads, and atomic_add + if (tidx < num_rows) { + auto temp = zero(); + for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) { + const auto ind = tidx + idx * stride; + const auto col_idx = col[ind]; + if (col_idx < idx) { + break; + } else { + temp += val(ind) * b(col_idx, column_id); + } + } + const auto c_ind = tidx * c_stride + column_id; + c[c_ind] = op(temp, c[c_ind]); + } + } else { + bool runnable = tidx < num_worker_per_row * num_rows; + const auto idx_in_worker = item_ct1.get_local_id(1); + const auto x = tidx % num_rows; + const auto worker_id = tidx / num_rows; + const auto step_size = num_worker_per_row * num_thread_per_worker; + + if (runnable && idx_in_worker == 0) { + storage[item_ct1.get_local_id(2)] = 0; + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + auto temp = zero(); + if (runnable) { + for (size_type idx = + worker_id * num_thread_per_worker + idx_in_worker; + idx < num_stored_elements_per_row; idx += step_size) { + const auto ind = x + idx * stride; + const auto col_idx = col[ind]; + if (col_idx < idx) { + break; + } else { + temp += val(ind) * b(col_idx, column_id); + } + } + atomic_add(&storage[item_ct1.get_local_id(2)], + temp); + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (runnable && idx_in_worker == 0) { + const auto c_ind = x * c_stride + column_id; + if (atomic) { + atomic_add(&(c[c_ind]), + op(storage[item_ct1.get_local_id(2)], c[c_ind])); + } else { + c[c_ind] = op(storage[item_ct1.get_local_id(2)], c[c_ind]); + } + } + } +} + + +template +void spmv( + const size_type num_rows, const int num_worker_per_row, + acc::range val, const IndexType *__restrict__ col, + const size_type stride, const size_type num_stored_elements_per_row, + acc::range b, OutputValueType *__restrict__ c, + const size_type c_stride, sycl::nd_item<3> item_ct1, + UninitializedArray &storage) +{ + spmv_kernel( + num_rows, num_worker_per_row, val, col, stride, + num_stored_elements_per_row, b, c, c_stride, + [](const OutputValueType &x, const OutputValueType &y) { return x; }, + item_ct1, storage); +} + +template +void spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, const size_type num_rows, + const int num_worker_per_row, acc::range val, + const IndexType *col, const size_type stride, + const size_type num_stored_elements_per_row, acc::range b, + OutputValueType *c, const size_type c_stride) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor< + UninitializedArray, + 0, sycl::access_mode::read_write, sycl::access::target::local> + storage_acc_ct1(cgh); + + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + spmv( + num_rows, num_worker_per_row, val, col, stride, + num_stored_elements_per_row, b, c, c_stride, + item_ct1, *storage_acc_ct1.get_pointer()); + }); + }); +} + + +template +void spmv( + const size_type num_rows, const int num_worker_per_row, + acc::range alpha, acc::range val, + const IndexType *__restrict__ col, const size_type stride, + const size_type num_stored_elements_per_row, acc::range b, + const OutputValueType *__restrict__ beta, OutputValueType *__restrict__ c, + const size_type c_stride, sycl::nd_item<3> item_ct1, + UninitializedArray &storage) +{ + const OutputValueType alpha_val = alpha(0); + const OutputValueType beta_val = beta[0]; + if (atomic) { + // Because the atomic operation changes the values of c during + // computation, it can not directly do alpha * a * b + beta * c + // operation. The beta * c needs to be done before calling this kernel. + // Then, this kernel only adds alpha * a * b when it uses atomic + // operation. + spmv_kernel( + num_rows, num_worker_per_row, val, col, stride, + num_stored_elements_per_row, b, c, c_stride, + [&alpha_val](const OutputValueType &x, const OutputValueType &y) { + return alpha_val * x; + }, + item_ct1, storage); + } else { + spmv_kernel( + num_rows, num_worker_per_row, val, col, stride, + num_stored_elements_per_row, b, c, c_stride, + [&alpha_val, &beta_val](const OutputValueType &x, + const OutputValueType &y) { + return alpha_val * x + beta_val * y; + }, + item_ct1, storage); + } +} + +template +void spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, const size_type num_rows, + const int num_worker_per_row, acc::range alpha, + acc::range val, const IndexType *col, + const size_type stride, const size_type num_stored_elements_per_row, + acc::range b, const OutputValueType *beta, + OutputValueType *c, const size_type c_stride) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor< + UninitializedArray, + 0, sycl::access_mode::read_write, sycl::access::target::local> + storage_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + spmv( + num_rows, num_worker_per_row, alpha, val, col, stride, + num_stored_elements_per_row, b, beta, c, c_stride, item_ct1, + *storage_acc_ct1.get_pointer()); + }); + }); +} + + +} // namespace + + +template +void initialize_zero_dense(size_type num_rows, size_type num_cols, + size_type stride, ValueType *__restrict__ result, + sycl::nd_item<3> item_ct1) +{ + const auto tidx_x = + item_ct1.get_local_id(2) + + item_ct1.get_local_range().get(2) * item_ct1.get_group(2); + const auto tidx_y = + item_ct1.get_local_id(1) + + item_ct1.get_local_range().get(1) * item_ct1.get_group(1); + if (tidx_x < num_cols && tidx_y < num_rows) { + result[tidx_y * stride + tidx_x] = zero(); + } +} + +GKO_ENABLE_DEFAULT_HOST(initialize_zero_dense, initialize_zero_dense); + + +template +void fill_in_dense(size_type num_rows, size_type nnz, size_type source_stride, + const IndexType *__restrict__ col_idxs, + const ValueType *__restrict__ values, + size_type result_stride, ValueType *__restrict__ result, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num_rows) { + for (size_type col = 0; col < nnz; col++) { + result[tidx * result_stride + + col_idxs[tidx + col * source_stride]] += + values[tidx + col * source_stride]; + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_dense, fill_in_dense); + + +template +void count_nnz_per_row(size_type num_rows, size_type max_nnz_per_row, + size_type stride, const ValueType *__restrict__ values, + IndexType *__restrict__ result, + sycl::nd_item<3> item_ct1) +{ + constexpr auto warp_size = config::warp_size; + const auto row_idx = thread::get_subwarp_id_flat(item_ct1); + auto warp_tile = + group::tiled_partition(group::this_thread_block(item_ct1)); + + if (row_idx < num_rows) { + IndexType part_result{}; + for (auto i = warp_tile.thread_rank(); i < max_nnz_per_row; + i += warp_size) { + if (values[stride * i + row_idx] != zero()) { + part_result += 1; + } + } + result[row_idx] = ::gko::kernels::dpcpp::reduce( + warp_tile, part_result, + [](const size_type &a, const size_type &b) { return a + b; }); + } +} + +template +void count_nnz_per_row(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + size_type max_nnz_per_row, size_type stride, + const ValueType *values, IndexType *result) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + count_nnz_per_row(num_rows, max_nnz_per_row, stride, values, + result, item_ct1); + }); + }); +} + +#define GKO_ELL_COUNT_NNZ_PER_ROW(ValueType, IndexType) \ + void count_nnz_per_row(dim3, dim3, size_type, sycl::queue *, size_type, \ + size_type, size_type, const ValueType *, \ + IndexType *) + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_ELL_COUNT_NNZ_PER_ROW); + +#undef GKO_ELL_COUNT_NNZ_PER_ROW + + +template +void fill_in_csr(size_type num_rows, size_type max_nnz_per_row, + size_type stride, const ValueType *__restrict__ source_values, + const IndexType *__restrict__ source_col_idxs, + IndexType *__restrict__ result_row_ptrs, + IndexType *__restrict__ result_col_idxs, + ValueType *__restrict__ result_values, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + + if (tidx < num_rows) { + auto write_to = result_row_ptrs[tidx]; + for (size_type i = 0; i < max_nnz_per_row; i++) { + const auto source_idx = tidx + stride * i; + if (source_values[source_idx] != zero()) { + result_values[write_to] = source_values[source_idx]; + result_col_idxs[write_to] = source_col_idxs[source_idx]; + write_to++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_csr, fill_in_csr); + + +template +void extract_diagonal(size_type diag_size, size_type max_nnz_per_row, + size_type orig_stride, + const ValueType *__restrict__ orig_values, + const IndexType *__restrict__ orig_col_idxs, + ValueType *__restrict__ diag, sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + const auto row = tidx % diag_size; + const auto col = tidx / diag_size; + const auto ell_ind = orig_stride * col + row; + + if (col < max_nnz_per_row) { + if (orig_col_idxs[ell_ind] == row && + orig_values[ell_ind] != zero()) { + diag[row] = orig_values[ell_ind]; + } + } +} + +GKO_ENABLE_DEFAULT_HOST(extract_diagonal, extract_diagonal); + + +} // namespace kernel + + +namespace { + +template +GKO_INLINE auto as_dpcpp_accessor( + const acc::range> &acc) +{ + return acc::range>( + acc.get_accessor().get_size(), acc.get_accessor().get_stored_data(), + acc.get_accessor().get_stride()); +} + + +template +void abstract_spmv(syn::value_list, + std::shared_ptr exec, + int num_worker_per_row, + const matrix::Ell *a, + const matrix::Dense *b, + matrix::Dense *c, + const matrix::Dense *alpha = nullptr, + const matrix::Dense *beta = nullptr) +{ + using a_accessor = + gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>; + using b_accessor = + gko::acc::reduced_row_major<2, OutputValueType, const InputValueType>; + + const auto nrows = a->get_size()[0]; + const auto stride = a->get_stride(); + const auto num_stored_elements_per_row = + a->get_num_stored_elements_per_row(); + + constexpr int num_thread_per_worker = + (info == 0) ? max_thread_per_worker : info; + constexpr bool atomic = (info == 0); + const dim3 block_size(default_block_size / num_thread_per_worker, + num_thread_per_worker, 1); + const dim3 grid_size(ceildiv(nrows * num_worker_per_row, block_size.x), + b->get_size()[1], 1); + + const auto a_vals = gko::acc::range( + std::array{{num_stored_elements_per_row * stride}}, + a->get_const_values()); + const auto b_vals = gko::acc::range( + std::array{{b->get_size()[0], b->get_size()[1]}}, + b->get_const_values(), std::array{{b->get_stride()}}); + + if (alpha == nullptr && beta == nullptr) { + kernel::spmv( + grid_size, block_size, 0, exec->get_queue(), nrows, + num_worker_per_row, as_dpcpp_accessor(a_vals), + a->get_const_col_idxs(), stride, num_stored_elements_per_row, + as_dpcpp_accessor(b_vals), c->get_values(), c->get_stride()); + } else if (alpha != nullptr && beta != nullptr) { + const auto alpha_val = gko::acc::range( + std::array{1}, alpha->get_const_values()); + kernel::spmv( + grid_size, block_size, 0, exec->get_queue(), nrows, + num_worker_per_row, as_dpcpp_accessor(alpha_val), + as_dpcpp_accessor(a_vals), a->get_const_col_idxs(), stride, + num_stored_elements_per_row, as_dpcpp_accessor(b_vals), + beta->get_const_values(), c->get_values(), c->get_stride()); + } else { + GKO_KERNEL_NOT_FOUND; + } +} + +GKO_ENABLE_IMPLEMENTATION_SELECTION(select_abstract_spmv, abstract_spmv); + + +template +std::array compute_thread_worker_and_atomicity( + std::shared_ptr exec, + const matrix::Ell *a) +{ + int num_thread_per_worker = 8; + int atomic = 0; + int num_worker_per_row = 1; + + const auto nrows = a->get_size()[0]; + const auto ell_ncols = a->get_num_stored_elements_per_row(); + // TODO: num_threads_per_core should be tuned for Dpcpp + const auto nwarps = 16 * num_threads_per_core; + + // Use multithreads to perform the reduction on each row when the matrix is + // wide. + // To make every thread have computation, so pick the value which is the + // power of 2 less than max_thread_per_worker and is less than or equal to + // ell_ncols. If the num_thread_per_worker is max_thread_per_worker and + // allow more than one worker to work on the same row, use atomic add to + // handle the worker write the value into the same position. The #worker is + // decided according to the number of worker allowed on GPU. + if (static_cast(ell_ncols) / nrows > ratio) { + while (num_thread_per_worker < max_thread_per_worker && + (num_thread_per_worker << 1) <= ell_ncols) { + num_thread_per_worker <<= 1; + } + if (num_thread_per_worker == max_thread_per_worker) { + num_worker_per_row = + std::min(ell_ncols / max_thread_per_worker, nwarps / nrows); + num_worker_per_row = std::max(num_worker_per_row, 1); + } + if (num_worker_per_row > 1) { + atomic = 1; + } + } + return {num_thread_per_worker, atomic, num_worker_per_row}; +} + + +} // namespace + + +template +void spmv(std::shared_ptr exec, + const matrix::Ell *a, + const matrix::Dense *b, + matrix::Dense *c) +{ + const auto data = compute_thread_worker_and_atomicity(exec, a); + const int num_thread_per_worker = std::get<0>(data); + const int atomic = std::get<1>(data); + const int num_worker_per_row = std::get<2>(data); + + /** + * info is the parameter for selecting the dpcpp kernel. + * for info == 0, it uses the kernel by warp_size threads with atomic + * operation for other value, it uses the kernel without atomic_add + */ + const int info = (!atomic) * num_thread_per_worker; + if (atomic) { + components::fill_array(exec, c->get_values(), + c->get_num_stored_elements(), + zero()); + } + select_abstract_spmv( + compiled_kernels(), + [&info](int compiled_info) { return info == compiled_info; }, + syn::value_list(), syn::type_list<>(), exec, num_worker_per_row, a, + b, c); +} + +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ELL_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Ell *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) +{ + const auto data = compute_thread_worker_and_atomicity(exec, a); + const int num_thread_per_worker = std::get<0>(data); + const int atomic = std::get<1>(data); + const int num_worker_per_row = std::get<2>(data); + + /** + * info is the parameter for selecting the dpcpp kernel. + * for info == 0, it uses the kernel by warp_size threads with atomic + * operation for other value, it uses the kernel without atomic_add + */ + const int info = (!atomic) * num_thread_per_worker; + if (atomic) { + dense::scale(exec, beta, c); + } + select_abstract_spmv( + compiled_kernels(), + [&info](int compiled_info) { return info == compiled_info; }, + syn::value_list(), syn::type_list<>(), exec, num_worker_per_row, a, + b, c, alpha, beta); +} + +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Ell *source, + matrix::Dense *result) +{ + const auto num_rows = result->get_size()[0]; + const auto num_cols = result->get_size()[1]; + const auto result_stride = result->get_stride(); + const auto col_idxs = source->get_const_col_idxs(); + const auto vals = source->get_const_values(); + const auto source_stride = source->get_stride(); + + const dim3 block_size(config::warp_size, + config::max_block_size / config::warp_size, 1); + const dim3 init_grid_dim(ceildiv(num_cols, block_size.x), + ceildiv(num_rows, block_size.y), 1); + kernel::initialize_zero_dense(init_grid_dim, block_size, 0, + exec->get_queue(), num_rows, num_cols, + result_stride, result->get_values()); + + const auto grid_dim = ceildiv(num_rows, default_block_size); + kernel::fill_in_dense(grid_dim, default_block_size, 0, exec->get_queue(), + num_rows, source->get_num_stored_elements_per_row(), + source_stride, col_idxs, vals, result_stride, + result->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ELL_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(std::shared_ptr exec, + const matrix::Ell *source, + matrix::Csr *result) +{ + auto num_rows = result->get_size()[0]; + + auto row_ptrs = result->get_row_ptrs(); + auto col_idxs = result->get_col_idxs(); + auto values = result->get_values(); + + const auto stride = source->get_stride(); + const auto max_nnz_per_row = source->get_num_stored_elements_per_row(); + + constexpr auto rows_per_block = + ceildiv(default_block_size, config::warp_size); + const auto grid_dim_nnz = ceildiv(source->get_size()[0], rows_per_block); + + kernel::count_nnz_per_row(grid_dim_nnz, default_block_size, 0, + exec->get_queue(), num_rows, max_nnz_per_row, + stride, source->get_const_values(), row_ptrs); + + components::prefix_sum(exec, row_ptrs, num_rows + 1); + + size_type grid_dim = ceildiv(num_rows, default_block_size); + + kernel::fill_in_csr( + grid_dim, default_block_size, 0, exec->get_queue(), num_rows, + max_nnz_per_row, stride, source->get_const_values(), + source->get_const_col_idxs(), row_ptrs, col_idxs, values); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ELL_CONVERT_TO_CSR_KERNEL); + + +template +void count_nonzeros(std::shared_ptr exec, + const matrix::Ell *source, + size_type *result) +{ + const auto num_rows = source->get_size()[0]; + auto nnz_per_row = Array(exec, num_rows); + + calculate_nonzeros_per_row(exec, source, &nnz_per_row); + + *result = reduce_add_array(exec, num_rows, nnz_per_row.get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ELL_COUNT_NONZEROS_KERNEL); + + +template +void calculate_nonzeros_per_row(std::shared_ptr exec, + const matrix::Ell *source, + Array *result) +{ + const auto num_rows = source->get_size()[0]; + const auto max_nnz_per_row = source->get_num_stored_elements_per_row(); + const auto stride = source->get_stride(); + const auto values = source->get_const_values(); + + const auto warp_size = config::warp_size; + const auto grid_dim = ceildiv(num_rows * warp_size, default_block_size); + + kernel::count_nnz_per_row(grid_dim, default_block_size, 0, + exec->get_queue(), num_rows, max_nnz_per_row, + stride, values, result->get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ELL_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Ell *orig, + matrix::Diagonal *diag) +{ + const auto max_nnz_per_row = orig->get_num_stored_elements_per_row(); + const auto orig_stride = orig->get_stride(); + const auto diag_size = diag->get_size()[0]; + const auto num_blocks = + ceildiv(diag_size * max_nnz_per_row, default_block_size); + + const auto orig_values = orig->get_const_values(); + const auto orig_col_idxs = orig->get_const_col_idxs(); + auto diag_values = diag->get_values(); + + kernel::extract_diagonal( + num_blocks, default_block_size, 0, exec->get_queue(), diag_size, + max_nnz_per_row, orig_stride, orig_values, orig_col_idxs, diag_values); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ELL_EXTRACT_DIAGONAL_KERNEL); + + +} // namespace ell +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp new file mode 100644 index 00000000000..bc61e5c6985 --- /dev/null +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -0,0 +1,176 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include + + +#include +#include +#include +#include +#include + + +#include "dpcpp/base/config.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The fixed-size block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void convert_row_ptrs_to_idxs(std::shared_ptr exec, + const IndexType *ptrs, size_type num_rows, + IndexType *idxs) GKO_NOT_IMPLEMENTED; + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/matrix/hybrid_kernels.dp.cpp b/dpcpp/matrix/hybrid_kernels.dp.cpp new file mode 100644 index 00000000000..13fa02a3331 --- /dev/null +++ b/dpcpp/matrix/hybrid_kernels.dp.cpp @@ -0,0 +1,320 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/hybrid_kernels.hpp" + + +#include + + +#include +#include + + +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/coo_kernels.hpp" +#include "core/matrix/ell_kernels.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/atomic.dp.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/format_conversion.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/segment_scan.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Hybrid matrix format namespace. + * + * @ingroup hybrid + */ +namespace hybrid { + + +constexpr int default_block_size = 256; +constexpr int warps_in_block = 4; + + +namespace kernel { + + +/** + * The global function for counting the number of nonzeros per row of COO. + * It is almost like COO spmv routine. + * It performs is_nonzeros(Coo) times the vector whose values are one + * + * @param nnz the number of nonzeros in the matrix + * @param num_line the maximum round of each warp + * @param val the value array of the matrix + * @param row the row index array of the matrix + * @param nnz_per_row the output nonzeros per row + */ +template +void count_coo_row_nnz(const size_type nnz, const size_type num_lines, + const ValueType *__restrict__ val, + const IndexType *__restrict__ row, + IndexType *__restrict__ nnz_per_row, + sycl::nd_item<3> item_ct1) +{ + IndexType temp_val = 0; + const auto start = + static_cast(item_ct1.get_local_range().get(2)) * + item_ct1.get_group(2) * item_ct1.get_local_range().get(1) * + num_lines + + item_ct1.get_local_id(1) * item_ct1.get_local_range().get(2) * + num_lines; + size_type num = (nnz > start) * ceildiv(nnz - start, subgroup_size); + num = min(num, num_lines); + const IndexType ind_start = start + item_ct1.get_local_id(2); + const IndexType ind_end = ind_start + (num - 1) * subgroup_size; + IndexType ind = ind_start; + IndexType curr_row = (ind < nnz) ? row[ind] : 0; + const auto tile_block = group::tiled_partition( + group::this_thread_block(item_ct1)); + for (; ind < ind_end; ind += subgroup_size) { + temp_val += ind < nnz && val[ind] != zero(); + auto next_row = (ind + subgroup_size < nnz) ? row[ind + subgroup_size] + : row[nnz - 1]; + // segmented scan + if (tile_block.any(curr_row != next_row)) { + bool is_first_in_segment = + segment_scan(tile_block, curr_row, &temp_val); + if (is_first_in_segment) { + atomic_add(&(nnz_per_row[curr_row]), temp_val); + } + temp_val = 0; + } + curr_row = next_row; + } + if (num > 0) { + ind = ind_end; + temp_val += ind < nnz && val[ind] != zero(); + // segmented scan + + bool is_first_in_segment = + segment_scan(tile_block, curr_row, &temp_val); + if (is_first_in_segment) { + atomic_add(&(nnz_per_row[curr_row]), temp_val); + } + } +} + +template +void count_coo_row_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, const size_type nnz, + const size_type num_lines, const ValueType *val, + const IndexType *row, IndexType *nnz_per_row) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + count_coo_row_nnz(nnz, num_lines, val, row, + nnz_per_row, item_ct1); + }); + }); +} + + +template +void fill_in_csr(size_type num_rows, size_type max_nnz_per_row, + size_type stride, const ValueType *__restrict__ ell_val, + const IndexType *__restrict__ ell_col, + const ValueType *__restrict__ coo_val, + const IndexType *__restrict__ coo_col, + const IndexType *__restrict__ coo_offset, + IndexType *__restrict__ result_row_ptrs, + IndexType *__restrict__ result_col_idxs, + ValueType *__restrict__ result_values, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + + if (tidx < num_rows) { + auto write_to = result_row_ptrs[tidx]; + for (size_type i = 0; i < max_nnz_per_row; i++) { + const auto source_idx = tidx + stride * i; + if (ell_val[source_idx] != zero()) { + result_values[write_to] = ell_val[source_idx]; + result_col_idxs[write_to] = ell_col[source_idx]; + write_to++; + } + } + for (auto i = coo_offset[tidx]; i < coo_offset[tidx + 1]; i++) { + if (coo_val[i] != zero()) { + result_values[write_to] = coo_val[i]; + result_col_idxs[write_to] = coo_col[i]; + write_to++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_csr, fill_in_csr); + + +template +void add(size_type num, ValueType1 *__restrict__ val1, + const ValueType2 *__restrict__ val2, sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < num) { + val1[tidx] += val2[tidx]; + } +} + +GKO_ENABLE_DEFAULT_HOST(add, add); + + +} // namespace kernel + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Hybrid *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_HYBRID_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(std::shared_ptr exec, + const matrix::Hybrid *source, + matrix::Csr *result) +{ + const auto num_rows = source->get_size()[0]; + auto coo_offset = Array(exec, num_rows + 1); + auto coo_val = source->get_const_coo_values(); + auto coo_col = source->get_const_coo_col_idxs(); + auto coo_row = source->get_const_coo_row_idxs(); + auto ell_val = source->get_const_ell_values(); + auto ell_col = source->get_const_ell_col_idxs(); + const auto stride = source->get_ell_stride(); + const auto max_nnz_per_row = source->get_ell_num_stored_elements_per_row(); + const auto coo_num_stored_elements = source->get_coo_num_stored_elements(); + + // Compute the row offset of Coo without zeros + size_type grid_num = ceildiv(coo_num_stored_elements, default_block_size); + coo::kernel::convert_row_idxs_to_ptrs( + grid_num, default_block_size, 0, exec->get_queue(), coo_row, + coo_num_stored_elements, coo_offset.get_data(), num_rows + 1); + + // Compute the row ptrs of Csr + auto row_ptrs = result->get_row_ptrs(); + auto coo_row_ptrs = Array(exec, num_rows); + + components::fill_array(exec, row_ptrs, num_rows + 1, zero()); + grid_num = ceildiv(num_rows, warps_in_block); + ell::kernel::count_nnz_per_row(grid_num, default_block_size, 0, + exec->get_queue(), num_rows, max_nnz_per_row, + stride, ell_val, row_ptrs); + + components::fill_array(exec, coo_row_ptrs.get_data(), num_rows, + zero()); + + auto nwarps = + coo::host_kernel::calculate_nwarps(exec, coo_num_stored_elements); + if (nwarps > 0) { + int num_lines = + ceildiv(coo_num_stored_elements, nwarps * config::warp_size); + const dim3 coo_block(config::warp_size, warps_in_block, 1); + const dim3 coo_grid(ceildiv(nwarps, warps_in_block), 1); + + kernel::count_coo_row_nnz(coo_grid, coo_block, 0, exec->get_queue(), + coo_num_stored_elements, num_lines, coo_val, + coo_row, coo_row_ptrs.get_data()); + } + + kernel::add(grid_num, default_block_size, 0, exec->get_queue(), num_rows, + row_ptrs, coo_row_ptrs.get_const_data()); + + components::prefix_sum(exec, row_ptrs, num_rows + 1); + + // Fill the value + grid_num = ceildiv(num_rows, default_block_size); + kernel::fill_in_csr(grid_num, default_block_size, 0, exec->get_queue(), + num_rows, max_nnz_per_row, stride, ell_val, ell_col, + coo_val, coo_col, coo_offset.get_const_data(), row_ptrs, + result->get_col_idxs(), result->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_HYBRID_CONVERT_TO_CSR_KERNEL); + + +template +void count_nonzeros(std::shared_ptr exec, + const matrix::Hybrid *source, + size_type *result) +{ + size_type ell_nnz = 0; + size_type coo_nnz = 0; + ell::count_nonzeros(exec, source->get_ell(), &ell_nnz); + + auto nnz = source->get_coo_num_stored_elements(); + auto nwarps = coo::host_kernel::calculate_nwarps(exec, nnz); + if (nwarps > 0) { + int num_lines = ceildiv(nnz, nwarps * config::warp_size); + const dim3 coo_block(config::warp_size, warps_in_block, 1); + const dim3 coo_grid(ceildiv(nwarps, warps_in_block), 1); + const auto num_rows = source->get_size()[0]; + auto nnz_per_row = Array(exec, num_rows); + components::fill_array(exec, nnz_per_row.get_data(), num_rows, + zero()); + kernel::count_coo_row_nnz( + coo_grid, coo_block, 0, exec->get_queue(), nnz, num_lines, + source->get_coo()->get_const_values(), + source->get_coo()->get_const_row_idxs(), nnz_per_row.get_data()); + + coo_nnz = + reduce_add_array(exec, num_rows, nnz_per_row.get_const_data()); + } + + *result = ell_nnz + coo_nnz; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_HYBRID_COUNT_NONZEROS_KERNEL); + + +} // namespace hybrid +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/matrix/sellp_kernels.dp.cpp b/dpcpp/matrix/sellp_kernels.dp.cpp new file mode 100644 index 00000000000..cd20591f48d --- /dev/null +++ b/dpcpp/matrix/sellp_kernels.dp.cpp @@ -0,0 +1,503 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/sellp_kernels.hpp" + + +#include + + +#include +#include +#include +#include +#include + + +#include "core/components/prefix_sum.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The SELL-P matrix format namespace. + * + * @ingroup sellp + */ +namespace sellp { + + +constexpr int default_block_size = 256; + + +namespace { + + +template +void spmv_kernel(size_type num_rows, size_type num_right_hand_sides, + size_type b_stride, size_type c_stride, + const size_type *__restrict__ slice_lengths, + const size_type *__restrict__ slice_sets, + const ValueType *__restrict__ a, + const IndexType *__restrict__ col, + const ValueType *__restrict__ b, ValueType *__restrict__ c, + sycl::nd_item<3> item_ct1) +{ + const auto slice_id = item_ct1.get_group(2); + const auto slice_size = item_ct1.get_local_range().get(2); + const auto row_in_slice = item_ct1.get_local_id(2); + const auto global_row = + static_cast(slice_size) * slice_id + row_in_slice; + const auto column_id = item_ct1.get_group(1); + ValueType val = 0; + IndexType ind = 0; + if (global_row < num_rows && column_id < num_right_hand_sides) { + for (size_type i = 0; i < slice_lengths[slice_id]; i++) { + ind = row_in_slice + (slice_sets[slice_id] + i) * slice_size; + val += a[ind] * b[col[ind] * b_stride + column_id]; + } + c[global_row * c_stride + column_id] = val; + } +} + +GKO_ENABLE_DEFAULT_HOST(spmv_kernel, spmv_kernel); + + +template +void advanced_spmv_kernel(size_type num_rows, size_type num_right_hand_sides, + size_type b_stride, size_type c_stride, + const size_type *__restrict__ slice_lengths, + const size_type *__restrict__ slice_sets, + const ValueType *__restrict__ alpha, + const ValueType *__restrict__ a, + const IndexType *__restrict__ col, + const ValueType *__restrict__ b, + const ValueType *__restrict__ beta, + ValueType *__restrict__ c, sycl::nd_item<3> item_ct1) +{ + const auto slice_id = item_ct1.get_group(2); + const auto slice_size = item_ct1.get_local_range().get(2); + const auto row_in_slice = item_ct1.get_local_id(2); + const auto global_row = + static_cast(slice_size) * slice_id + row_in_slice; + const auto column_id = item_ct1.get_group(1); + ValueType val = 0; + IndexType ind = 0; + if (global_row < num_rows && column_id < num_right_hand_sides) { + for (size_type i = 0; i < slice_lengths[slice_id]; i++) { + ind = row_in_slice + (slice_sets[slice_id] + i) * slice_size; + val += alpha[0] * a[ind] * b[col[ind] * b_stride + column_id]; + } + c[global_row * c_stride + column_id] = + beta[0] * c[global_row * c_stride + column_id] + val; + } +} + +GKO_ENABLE_DEFAULT_HOST(advanced_spmv_kernel, advanced_spmv_kernel); + + +} // namespace + + +namespace kernel { + + +template +void initialize_zero_dense(size_type num_rows, size_type num_cols, + size_type stride, ValueType *__restrict__ result, + sycl::nd_item<3> item_ct1) +{ + const auto tidx_x = + item_ct1.get_local_id(2) + + item_ct1.get_local_range().get(2) * item_ct1.get_group(2); + const auto tidx_y = + item_ct1.get_local_id(1) + + item_ct1.get_local_range().get(1) * item_ct1.get_group(1); + if (tidx_x < num_cols && tidx_y < num_rows) { + result[tidx_y * stride + tidx_x] = zero(); + } +} + +GKO_ENABLE_DEFAULT_HOST(initialize_zero_dense, initialize_zero_dense); + + +template +void fill_in_dense(size_type num_rows, size_type num_cols, size_type stride, + size_type slice_size, + const size_type *__restrict__ slice_lengths, + const size_type *__restrict__ slice_sets, + const IndexType *__restrict__ col_idxs, + const ValueType *__restrict__ values, + ValueType *__restrict__ result, sycl::nd_item<3> item_ct1) +{ + const auto global_row = + thread::get_subwarp_id_flat(item_ct1); + const auto row = global_row % slice_size; + const auto slice = global_row / slice_size; + const auto start_index = item_ct1.get_local_id(2) % threads_per_row; + + if (global_row < num_rows) { + for (auto i = start_index; i < slice_lengths[slice]; + i += threads_per_row) { + if (values[(slice_sets[slice] + i) * slice_size + row] != + zero()) { + result[global_row * stride + + col_idxs[(slice_sets[slice] + i) * slice_size + row]] = + values[(slice_sets[slice] + i) * slice_size + row]; + } + } + } +} + +template +void fill_in_dense(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, size_type num_cols, + size_type stride, size_type slice_size, + const size_type *slice_lengths, const size_type *slice_sets, + const IndexType *col_idxs, const ValueType *values, + ValueType *result) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + fill_in_dense( + num_rows, num_cols, stride, slice_size, slice_lengths, + slice_sets, col_idxs, values, result, item_ct1); + }); + }); +} + + +template +void count_nnz_per_row(size_type num_rows, size_type slice_size, + const size_type *__restrict__ slice_sets, + const ValueType *__restrict__ values, + IndexType *__restrict__ result, + sycl::nd_item<3> item_ct1) +{ + constexpr auto warp_size = config::warp_size; + auto warp_tile = + group::tiled_partition(group::this_thread_block(item_ct1)); + const auto row_idx = thread::get_subwarp_id_flat(item_ct1); + const auto slice_id = row_idx / slice_size; + const auto tid_in_warp = warp_tile.thread_rank(); + const auto row_in_slice = row_idx % slice_size; + + if (row_idx < num_rows) { + IndexType part_result{}; + for (size_type sellp_ind = + (slice_sets[slice_id] + tid_in_warp) * slice_size + + row_in_slice; + sellp_ind < slice_sets[slice_id + 1] * slice_size; + sellp_ind += warp_size * slice_size) { + if (values[sellp_ind] != zero()) { + part_result += 1; + } + } + result[row_idx] = ::gko::kernels::dpcpp::reduce( + warp_tile, part_result, + [](const size_type &a, const size_type &b) { return a + b; }); + } +} + +GKO_ENABLE_DEFAULT_HOST(count_nnz_per_row, count_nnz_per_row); + + +template +void fill_in_csr(size_type num_rows, size_type slice_size, + const size_type *__restrict__ source_slice_sets, + const IndexType *__restrict__ source_col_idxs, + const ValueType *__restrict__ source_values, + IndexType *__restrict__ result_row_ptrs, + IndexType *__restrict__ result_col_idxs, + ValueType *__restrict__ result_values, + sycl::nd_item<3> item_ct1) +{ + const auto row = thread::get_thread_id_flat(item_ct1); + const auto slice_id = row / slice_size; + const auto row_in_slice = row % slice_size; + + if (row < num_rows) { + size_type csr_ind = result_row_ptrs[row]; + for (size_type sellp_ind = + source_slice_sets[slice_id] * slice_size + row_in_slice; + sellp_ind < source_slice_sets[slice_id + 1] * slice_size; + sellp_ind += slice_size) { + if (source_values[sellp_ind] != zero()) { + result_values[csr_ind] = source_values[sellp_ind]; + result_col_idxs[csr_ind] = source_col_idxs[sellp_ind]; + csr_ind++; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(fill_in_csr, fill_in_csr); + + +template +void extract_diagonal(size_type diag_size, size_type slice_size, + const size_type *__restrict__ orig_slice_sets, + const ValueType *__restrict__ orig_values, + const IndexType *__restrict__ orig_col_idxs, + ValueType *__restrict__ diag, sycl::nd_item<3> item_ct1) +{ + constexpr auto warp_size = config::warp_size; + auto warp_tile = + group::tiled_partition(group::this_thread_block(item_ct1)); + const auto slice_id = thread::get_subwarp_id_flat(item_ct1); + const auto tid_in_warp = warp_tile.thread_rank(); + const auto slice_num = ceildiv(diag_size, slice_size); + + if (slice_id >= slice_num) { + return; + } + + const auto start_ind = orig_slice_sets[slice_id] * slice_size + tid_in_warp; + const auto end_ind = orig_slice_sets[slice_id + 1] * slice_size; + + for (auto sellp_ind = start_ind; sellp_ind < end_ind; + sellp_ind += warp_size) { + auto global_row = slice_id * slice_size + sellp_ind % slice_size; + if (global_row < diag_size) { + if (orig_col_idxs[sellp_ind] == global_row && + orig_values[sellp_ind] != zero()) { + diag[global_row] = orig_values[sellp_ind]; + } + } + } +} + +GKO_ENABLE_DEFAULT_HOST(extract_diagonal, extract_diagonal); + + +} // namespace kernel + + +template +void spmv(std::shared_ptr exec, + const matrix::Sellp *a, + const matrix::Dense *b, matrix::Dense *c) +{ + const dim3 blockSize(matrix::default_slice_size); + const dim3 gridSize(ceildiv(a->get_size()[0], matrix::default_slice_size), + b->get_size()[1]); + + spmv_kernel(gridSize, blockSize, 0, exec->get_queue(), a->get_size()[0], + b->get_size()[1], b->get_stride(), c->get_stride(), + a->get_const_slice_lengths(), a->get_const_slice_sets(), + a->get_const_values(), a->get_const_col_idxs(), + b->get_const_values(), c->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Sellp *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) +{ + const dim3 blockSize(matrix::default_slice_size); + const dim3 gridSize(ceildiv(a->get_size()[0], matrix::default_slice_size), + b->get_size()[1]); + + advanced_spmv_kernel(gridSize, blockSize, 0, exec->get_queue(), + a->get_size()[0], b->get_size()[1], b->get_stride(), + c->get_stride(), a->get_const_slice_lengths(), + a->get_const_slice_sets(), alpha->get_const_values(), + a->get_const_values(), a->get_const_col_idxs(), + b->get_const_values(), beta->get_const_values(), + c->get_values()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SELLP_ADVANCED_SPMV_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Sellp *source, + matrix::Dense *result) +{ + const auto num_rows = source->get_size()[0]; + const auto num_cols = source->get_size()[1]; + const auto vals = source->get_const_values(); + const auto col_idxs = source->get_const_col_idxs(); + const auto slice_lengths = source->get_const_slice_lengths(); + const auto slice_sets = source->get_const_slice_sets(); + const auto slice_size = source->get_slice_size(); + + const auto slice_num = ceildiv(num_rows, slice_size); + + const dim3 block_size(config::warp_size, + config::max_block_size / config::warp_size, 1); + const dim3 init_grid_dim(ceildiv(num_cols, block_size.x), + ceildiv(num_rows, block_size.y), 1); + + if (num_rows > 0 && result->get_stride() > 0) { + kernel::initialize_zero_dense( + init_grid_dim, block_size, 0, exec->get_queue(), num_rows, num_cols, + result->get_stride(), result->get_values()); + } + + constexpr auto threads_per_row = config::warp_size; + const auto grid_dim = + ceildiv(slice_size * slice_num * threads_per_row, default_block_size); + + if (grid_dim > 0) { + kernel::fill_in_dense( + grid_dim, default_block_size, 0, exec->get_queue(), num_rows, + num_cols, result->get_stride(), slice_size, slice_lengths, + slice_sets, col_idxs, vals, result->get_values()); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SELLP_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(std::shared_ptr exec, + const matrix::Sellp *source, + matrix::Csr *result) +{ + const auto num_rows = source->get_size()[0]; + const auto slice_size = source->get_slice_size(); + const auto slice_num = ceildiv(num_rows, slice_size); + + const auto source_values = source->get_const_values(); + const auto source_slice_lengths = source->get_const_slice_lengths(); + const auto source_slice_sets = source->get_const_slice_sets(); + const auto source_col_idxs = source->get_const_col_idxs(); + + auto result_values = result->get_values(); + auto result_col_idxs = result->get_col_idxs(); + auto result_row_ptrs = result->get_row_ptrs(); + + auto grid_dim = ceildiv(num_rows * config::warp_size, default_block_size); + + if (grid_dim > 0) { + kernel::count_nnz_per_row( + grid_dim, default_block_size, 0, exec->get_queue(), num_rows, + slice_size, source_slice_sets, source_values, result_row_ptrs); + } + + grid_dim = ceildiv(num_rows + 1, default_block_size); + auto add_values = Array(exec, grid_dim); + + components::prefix_sum(exec, result_row_ptrs, num_rows + 1); + + grid_dim = ceildiv(num_rows, default_block_size); + + if (grid_dim > 0) { + kernel::fill_in_csr(grid_dim, default_block_size, 0, exec->get_queue(), + num_rows, slice_size, source_slice_sets, + source_col_idxs, source_values, result_row_ptrs, + result_col_idxs, result_values); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SELLP_CONVERT_TO_CSR_KERNEL); + + +template +void count_nonzeros(std::shared_ptr exec, + const matrix::Sellp *source, + size_type *result) +{ + const auto num_rows = source->get_size()[0]; + + if (num_rows <= 0) { + *result = 0; + return; + } + + const auto slice_size = source->get_slice_size(); + const auto slice_sets = source->get_const_slice_sets(); + const auto values = source->get_const_values(); + + auto nnz_per_row = Array(exec, num_rows); + + auto grid_dim = ceildiv(num_rows * config::warp_size, default_block_size); + + kernel::count_nnz_per_row(grid_dim, default_block_size, 0, + exec->get_queue(), num_rows, slice_size, + slice_sets, values, nnz_per_row.get_data()); + + *result = reduce_add_array(exec, num_rows, nnz_per_row.get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SELLP_COUNT_NONZEROS_KERNEL); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Sellp *orig, + matrix::Diagonal *diag) +{ + const auto diag_size = diag->get_size()[0]; + const auto slice_size = orig->get_slice_size(); + const auto slice_num = ceildiv(diag_size, slice_size); + const auto num_blocks = + ceildiv(slice_num * config::warp_size, default_block_size); + + const auto orig_slice_sets = orig->get_const_slice_sets(); + const auto orig_values = orig->get_const_values(); + const auto orig_col_idxs = orig->get_const_col_idxs(); + auto diag_values = diag->get_values(); + + kernel::extract_diagonal( + num_blocks, default_block_size, 0, exec->get_queue(), diag_size, + slice_size, orig_slice_sets, orig_values, orig_col_idxs, diag_values); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SELLP_EXTRACT_DIAGONAL_KERNEL); + + +} // namespace sellp +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp new file mode 100644 index 00000000000..7dc935e83a5 --- /dev/null +++ b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp @@ -0,0 +1,124 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/sparsity_csr_kernels.hpp" + + +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Compressed sparse row matrix format namespace. + * + * @ingroup sparsity + */ +namespace sparsity_csr { + + +template +void spmv(std::shared_ptr exec, + const matrix::SparsityCsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SPARSITY_CSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::SparsityCsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SPARSITY_CSR_ADVANCED_SPMV_KERNEL); + + +template +void count_num_diagonal_elements( + std::shared_ptr exec, + const matrix::SparsityCsr *matrix, + size_type *num_diagonal_elements) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SPARSITY_CSR_COUNT_NUM_DIAGONAL_ELEMENTS_KERNEL); + + +template +void remove_diagonal_elements( + std::shared_ptr exec, const IndexType *row_ptrs, + const IndexType *col_idxs, + matrix::SparsityCsr *matrix) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SPARSITY_CSR_REMOVE_DIAGONAL_ELEMENTS_KERNEL); + + +template +void transpose(std::shared_ptr exec, + const matrix::SparsityCsr *orig, + matrix::SparsityCsr *trans) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SPARSITY_CSR_TRANSPOSE_KERNEL); + + +template +void sort_by_column_index(std::shared_ptr exec, + matrix::SparsityCsr *to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SPARSITY_CSR_SORT_BY_COLUMN_INDEX); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::SparsityCsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_SPARSITY_CSR_IS_SORTED_BY_COLUMN_INDEX); + + +} // namespace sparsity_csr +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp b/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp new file mode 100644 index 00000000000..14baa306bca --- /dev/null +++ b/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp @@ -0,0 +1,106 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/multigrid/amgx_pgm_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The AMGX_PGM solver namespace. + * + * @ingroup amgx_pgm + */ +namespace amgx_pgm { + + +template +void match_edge(std::shared_ptr exec, + const Array &strongest_neighbor, + Array &agg) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); + + +template +void count_unagg(std::shared_ptr exec, + const Array &agg, + IndexType *num_unagg) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); + + +template +void renumber(std::shared_ptr exec, Array &agg, + IndexType *num_agg) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); + + +template +void find_strongest_neighbor( + std::shared_ptr exec, + const matrix::Csr *weight_mtx, + const matrix::Diagonal *diag, Array &agg, + Array &strongest_neighbor) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR); + + +template +void assign_to_exist_agg( + std::shared_ptr exec, + const matrix::Csr *weight_mtx, + const matrix::Diagonal *diag, Array &agg, + Array &intermediate_agg) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); + + +} // namespace amgx_pgm +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/preconditioner/isai_kernels.dp.cpp b/dpcpp/preconditioner/isai_kernels.dp.cpp new file mode 100644 index 00000000000..33cbe044fc9 --- /dev/null +++ b/dpcpp/preconditioner/isai_kernels.dp.cpp @@ -0,0 +1,142 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/preconditioner/isai_kernels.hpp" + + +#include +#include + + +#include + + +#include +#include +#include +#include + + +#include "core/components/prefix_sum.hpp" +#include "core/matrix/csr_builder.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Isai preconditioner namespace. + * + * @ingroup isai + */ +namespace isai { + + +template +void forall_matching(const IndexType *fst, IndexType fst_size, + const IndexType *snd, IndexType snd_size, + Callback cb) GKO_NOT_IMPLEMENTED; + + +template +void generic_generate(std::shared_ptr exec, + const matrix::Csr *mtx, + matrix::Csr *inverse_mtx, + IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, + Callable trs_solve) GKO_NOT_IMPLEMENTED; + + +template +void generate_tri_inverse(std::shared_ptr exec, + const matrix::Csr *mtx, + matrix::Csr *inverse_mtx, + IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, + bool lower) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_GENERATE_TRI_INVERSE_KERNEL); + + +template +void generate_general_inverse(std::shared_ptr exec, + const matrix::Csr *input, + matrix::Csr *inverse, + IndexType *excess_rhs_ptrs, + IndexType *excess_nz_ptrs, + bool spd) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_GENERATE_GENERAL_INVERSE_KERNEL); + + +template +void generate_excess_system(std::shared_ptr, + const matrix::Csr *input, + const matrix::Csr *inverse, + const IndexType *excess_rhs_ptrs, + const IndexType *excess_nz_ptrs, + matrix::Csr *excess_system, + matrix::Dense *excess_rhs, + size_type e_start, + size_type e_end) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_GENERATE_EXCESS_SYSTEM_KERNEL); + + +template +void scale_excess_solution(std::shared_ptr, + const IndexType *excess_block_ptrs, + matrix::Dense *excess_solution, + size_type e_start, + size_type e_end) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL); + + +template +void scatter_excess_solution(std::shared_ptr, + const IndexType *excess_block_ptrs, + const matrix::Dense *excess_solution, + matrix::Csr *inverse, + size_type e_start, + size_type e_end) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_SCATTER_EXCESS_SOLUTION_KERNEL); + + +} // namespace isai +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/preconditioner/jacobi_kernels.dp.cpp b/dpcpp/preconditioner/jacobi_kernels.dp.cpp new file mode 100644 index 00000000000..623e2c121ec --- /dev/null +++ b/dpcpp/preconditioner/jacobi_kernels.dp.cpp @@ -0,0 +1,286 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/preconditioner/jacobi_kernels.hpp" + + +#include +#include +#include +#include +#include + + +#include + + +#include +#include +#include +#include + + +#include "core/base/allocator.hpp" +#include "core/base/extended_float.hpp" +#include "core/preconditioner/jacobi_utils.hpp" +#include "dpcpp/components/matrix_operations.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Jacobi preconditioner namespace. + * + * @ingroup jacobi + */ +namespace jacobi { + + +void initialize_precisions(std::shared_ptr exec, + const Array &source, + Array &precisions) + GKO_NOT_IMPLEMENTED; + + +namespace { + + +template +inline bool has_same_nonzero_pattern( + const IndexType *prev_row_ptr, const IndexType *curr_row_ptr, + const IndexType *next_row_ptr) GKO_NOT_IMPLEMENTED; + + +template +size_type find_natural_blocks(const matrix::Csr *mtx, + uint32 max_block_size, + IndexType *block_ptrs) GKO_NOT_IMPLEMENTED; + + +template +inline size_type agglomerate_supervariables( + uint32 max_block_size, size_type num_natural_blocks, + IndexType *block_ptrs) GKO_NOT_IMPLEMENTED; + + +} // namespace + + +template +void find_blocks(std::shared_ptr exec, + const matrix::Csr *system_matrix, + uint32 max_block_size, size_type &num_blocks, + Array &block_pointers) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_JACOBI_FIND_BLOCKS_KERNEL); + + +namespace { + + +template +inline void extract_block(const matrix::Csr *mtx, + IndexType block_size, IndexType block_start, + ValueType *block, + size_type stride) GKO_NOT_IMPLEMENTED; + + +template +inline IndexType choose_pivot(IndexType block_size, const ValueType *block, + size_type stride) GKO_NOT_IMPLEMENTED; + + +template +inline void swap_rows(IndexType row1, IndexType row2, IndexType block_size, + ValueType *block, size_type stride) GKO_NOT_IMPLEMENTED; + + +template +inline bool apply_gauss_jordan_transform(IndexType row, IndexType col, + IndexType block_size, ValueType *block, + size_type stride) GKO_NOT_IMPLEMENTED; + + +template > +inline void transpose_block( + IndexType block_size, const SourceValueType *from, size_type from_stride, + ResultValueType *to, size_type to_stride, + ValueConverter converter = {}) noexcept GKO_NOT_IMPLEMENTED; + + +template > +inline void conj_transpose_block( + IndexType block_size, const SourceValueType *from, size_type from_stride, + ResultValueType *to, size_type to_stride, + ValueConverter converter = {}) noexcept GKO_NOT_IMPLEMENTED; + + +template > +inline void permute_and_transpose_block( + IndexType block_size, const IndexType *col_perm, + const SourceValueType *source, size_type source_stride, + ResultValueType *result, size_type result_stride, + ValueConverter converter = {}) GKO_NOT_IMPLEMENTED; + + +template +inline bool invert_block(IndexType block_size, IndexType *perm, + ValueType *block, + size_type stride) GKO_NOT_IMPLEMENTED; + + +template +inline bool validate_precision_reduction_feasibility( + std::shared_ptr exec, IndexType block_size, + const ValueType *block, size_type stride) GKO_NOT_IMPLEMENTED; + + +} // namespace + + +template +void generate(std::shared_ptr exec, + const matrix::Csr *system_matrix, + size_type num_blocks, uint32 max_block_size, + remove_complex accuracy, + const preconditioner::block_interleaved_storage_scheme + &storage_scheme, + Array> &conditioning, + Array &block_precisions, + const Array &block_pointers, + Array &blocks) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_JACOBI_GENERATE_KERNEL); + + +namespace { + + +template < + typename ValueType, typename BlockValueType, + typename ValueConverter = default_converter> +inline void apply_block(size_type block_size, size_type num_rhs, + const BlockValueType *block, size_type stride, + ValueType alpha, const ValueType *b, size_type stride_b, + ValueType beta, ValueType *x, size_type stride_x, + ValueConverter converter = {}) GKO_NOT_IMPLEMENTED; + + +} // namespace + + +template +void apply(std::shared_ptr exec, size_type num_blocks, + uint32 max_block_size, + const preconditioner::block_interleaved_storage_scheme + &storage_scheme, + const Array &block_precisions, + const Array &block_pointers, + const Array &blocks, + const matrix::Dense *alpha, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *x) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_JACOBI_APPLY_KERNEL); + + +template +void simple_apply( + std::shared_ptr exec, size_type num_blocks, + uint32 max_block_size, + const preconditioner::block_interleaved_storage_scheme + &storage_scheme, + const Array &block_precisions, + const Array &block_pointers, const Array &blocks, + const matrix::Dense *b, + matrix::Dense *x) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_JACOBI_SIMPLE_APPLY_KERNEL); + + +template +void transpose_jacobi( + std::shared_ptr exec, size_type num_blocks, + uint32 max_block_size, const Array &block_precisions, + const Array &block_pointers, const Array &blocks, + const preconditioner::block_interleaved_storage_scheme + &storage_scheme, + Array &out_blocks) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_JACOBI_TRANSPOSE_KERNEL); + + +template +void conj_transpose_jacobi( + std::shared_ptr exec, size_type num_blocks, + uint32 max_block_size, const Array &block_precisions, + const Array &block_pointers, const Array &blocks, + const preconditioner::block_interleaved_storage_scheme + &storage_scheme, + Array &out_blocks) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL); + + +template +void convert_to_dense( + std::shared_ptr exec, size_type num_blocks, + const Array &block_precisions, + const Array &block_pointers, const Array &blocks, + const preconditioner::block_interleaved_storage_scheme + &storage_scheme, + ValueType *result_values, size_type result_stride) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL); + + +} // namespace jacobi +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/reorder/rcm_kernels.dp.cpp b/dpcpp/reorder/rcm_kernels.dp.cpp new file mode 100644 index 00000000000..baf60231bf5 --- /dev/null +++ b/dpcpp/reorder/rcm_kernels.dp.cpp @@ -0,0 +1,81 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/reorder/rcm_kernels.hpp" + + +#include + + +#include +#include +#include +#include +#include +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The reordering namespace. + * + * @ingroup reorder + */ +namespace rcm { + + +template +void get_degree_of_nodes(std::shared_ptr exec, + const IndexType num_vertices, + const IndexType *const row_ptrs, + IndexType *const degrees) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); + + +template +void get_permutation( + std::shared_ptr exec, const IndexType num_vertices, + const IndexType *const row_ptrs, const IndexType *const col_idxs, + const IndexType *const degrees, IndexType *const permutation, + IndexType *const inv_permutation, + const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); + + +} // namespace rcm +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/solver/cb_gmres_kernels.dp.cpp b/dpcpp/solver/cb_gmres_kernels.dp.cpp new file mode 100644 index 00000000000..2dc9d8b84b5 --- /dev/null +++ b/dpcpp/solver/cb_gmres_kernels.dp.cpp @@ -0,0 +1,1363 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/cb_gmres_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include +#include + + +#include "accessor/range.hpp" +#include "accessor/reduced_row_major.hpp" +#include "accessor/scaled_reduced_row_major.hpp" +#include "core/components/fill_array.hpp" +#include "core/matrix/dense_kernels.hpp" +#include "core/solver/cb_gmres_accessor.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/components/atomic.dp.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" +#include "dpcpp/components/uninitialized_array.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The CB_GMRES solver namespace. + * + * @ingroup cb_gmres + */ +namespace cb_gmres { + + +constexpr int default_block_size = 256; +constexpr int default_dot_dim = 16; +constexpr int default_dot_size = default_dot_dim * default_dot_dim; + + +#include "dpcpp/solver/common_gmres_kernels.dp.inc" + + +template +void zero_matrix_kernel(size_type m, size_type n, size_type stride, + ValueType *__restrict__ array, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + if (tidx < n) { + auto pos = tidx; + for (size_type k = 0; k < m; ++k) { + array[pos] = zero(); + pos += stride; + } + } +} + +GKO_ENABLE_DEFAULT_HOST(zero_matrix_kernel, zero_matrix_kernel); + + +// Must be called with at least `num_rows * stride_krylov` threads in total. +template +void initialize_2_1_kernel(size_type num_rows, size_type num_rhs, + size_type krylov_dim, Accessor3d krylov_bases, + ValueType *__restrict__ residual_norm_collection, + size_type stride_residual_nc, + sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto krylov_stride = + gko::cb_gmres::helper_functions_accessor::get_stride( + krylov_bases); + // krylov indices + const auto krylov_idx = global_id / krylov_stride[0]; + const auto reminder = global_id % krylov_stride[0]; + const auto krylov_row = reminder / krylov_stride[1]; + const auto rhs = reminder % krylov_stride[1]; + + // residual_norm indices (separated for better coalesced access) + const auto residual_row = global_id / stride_residual_nc; + const auto residual_col = global_id % stride_residual_nc; + + if (krylov_idx < krylov_dim + 1 && krylov_row < num_rows && rhs < num_rhs) { + krylov_bases(krylov_idx, krylov_row, rhs) = zero(); + } + + if (residual_row < krylov_dim + 1 && residual_col < num_rhs) { + residual_norm_collection[residual_row * stride_residual_nc + + residual_col] = zero(); + } +} + +template +void initialize_2_1_kernel(dim3 grid, dim3 block, + size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_rows, size_type num_rhs, + size_type krylov_dim, Accessor3d krylov_bases, + ValueType *residual_norm_collection, + size_type stride_residual_nc) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + initialize_2_1_kernel( + num_rows, num_rhs, krylov_dim, krylov_bases, + residual_norm_collection, stride_residual_nc, item_ct1); + }); + }); +} + + +// Must be called with at least `num_rows * num_rhs` threads in total. +template +void initialize_2_2_kernel( + size_type num_rows, size_type num_rhs, + const ValueType *__restrict__ residual, size_type stride_residual, + const remove_complex *__restrict__ residual_norm, + ValueType *__restrict__ residual_norm_collection, Accessor3d krylov_bases, + ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, + size_type *__restrict__ final_iter_nums, sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto krylov_stride = + gko::cb_gmres::helper_functions_accessor::get_stride( + krylov_bases); + const auto row_idx = global_id / krylov_stride[1]; + const auto col_idx = global_id % krylov_stride[1]; + + if (global_id < num_rhs) { + residual_norm_collection[global_id] = residual_norm[global_id]; + final_iter_nums[global_id] = 0; + } + + if (row_idx < num_rows && col_idx < num_rhs) { + auto value = residual[row_idx * stride_residual + col_idx] / + residual_norm[col_idx]; + krylov_bases(0, row_idx, col_idx) = value; + next_krylov_basis[row_idx * stride_next_krylov + col_idx] = value; + } +} + +template +void initialize_2_2_kernel( + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_rows, size_type num_rhs, const ValueType *residual, + size_type stride_residual, const remove_complex *residual_norm, + ValueType *residual_norm_collection, Accessor3d krylov_bases, + ValueType *next_krylov_basis, size_type stride_next_krylov, + size_type *final_iter_nums) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + initialize_2_2_kernel( + num_rows, num_rhs, residual, stride_residual, residual_norm, + residual_norm_collection, krylov_bases, next_krylov_basis, + stride_next_krylov, final_iter_nums, item_ct1); + }); + }); +} + + +void increase_final_iteration_numbers_kernel( + size_type *__restrict__ final_iter_nums, + const stopping_status *__restrict__ stop_status, size_type total_number, + sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + if (global_id < total_number) { + final_iter_nums[global_id] += !stop_status[global_id].has_stopped(); + } +} + +GKO_ENABLE_DEFAULT_HOST(increase_final_iteration_numbers_kernel, + increase_final_iteration_numbers_kernel); + + +template +void multinorm2_kernel( + size_type num_rows, size_type num_cols, + const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, remove_complex *__restrict__ norms, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray, + default_dot_dim *(default_dot_dim + 1)> + *reduction_helper_array) +{ + using rc_vtype = remove_complex; + const auto tidx = item_ct1.get_local_id(2); + const auto tidy = item_ct1.get_local_id(1); + const auto col_idx = item_ct1.get_group(2) * default_dot_dim + tidx; + const auto num = ceildiv(num_rows, item_ct1.get_group_range(1)); + const auto start_row = item_ct1.get_group(1) * num; + const auto end_row = ((item_ct1.get_group(1) + 1) * num > num_rows) + ? num_rows + : (item_ct1.get_group(1) + 1) * num; + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + + rc_vtype *__restrict__ reduction_helper = (*reduction_helper_array); + rc_vtype local_res = zero(); + if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { + for (size_type i = start_row + tidy; i < end_row; + i += default_dot_dim) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + local_res += squared_norm(next_krylov_basis[next_krylov_idx]); + } + } + reduction_helper[tidx * (default_dot_dim + 1) + tidy] = local_res; + group::this_thread_block(item_ct1).sync(); + local_res = reduction_helper[tidy * (default_dot_dim + 1) + tidx]; + const auto tile_block = group::tiled_partition( + group::this_thread_block(item_ct1)); + const auto sum = ::gko::kernels::dpcpp::reduce( + tile_block, local_res, + [](const rc_vtype &a, const rc_vtype &b) { return a + b; }); + const auto new_col_idx = item_ct1.get_group(2) * default_dot_dim + tidy; + if (tidx == 0 && new_col_idx < num_cols && + !stop_status[new_col_idx].has_stopped()) { + const auto norms_idx = new_col_idx; + atomic_add(norms + norms_idx, sum); + } +} + +template +void multinorm2_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + size_type num_cols, const ValueType *next_krylov_basis, + size_type stride_next_krylov, + remove_complex *norms, + const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor< + UninitializedArray, + default_dot_dim *(default_dot_dim + 1)>, + 0, sycl::access_mode::read_write, sycl::access::target::local> + reduction_helper_array_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + multinorm2_kernel(num_rows, num_cols, next_krylov_basis, + stride_next_krylov, norms, stop_status, + item_ct1, + reduction_helper_array_acc_ct1.get_pointer()); + }); + }); +} + + +template +void multinorminf_without_stop_kernel( + size_type num_rows, size_type num_cols, + const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, remove_complex *__restrict__ norms, + size_type stride_norms, sycl::nd_item<3> item_ct1, + UninitializedArray, + default_dot_dim *(default_dot_dim + 1)> + *reduction_helper_array) +{ + using rc_vtype = remove_complex; + const auto tidx = item_ct1.get_local_id(2); + const auto tidy = item_ct1.get_local_id(1); + const auto col_idx = item_ct1.get_group(2) * default_dot_dim + tidx; + const auto num = ceildiv(num_rows, item_ct1.get_group_range(1)); + const auto start_row = item_ct1.get_group(1) * num; + const auto end_row = ((item_ct1.get_group(1) + 1) * num > num_rows) + ? num_rows + : (item_ct1.get_group(1) + 1) * num; + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + + rc_vtype *__restrict__ reduction_helper = (*reduction_helper_array); + rc_vtype local_max = zero(); + if (col_idx < num_cols) { + for (size_type i = start_row + tidy; i < end_row; + i += default_dot_dim) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + local_max = + (local_max >= std::abs(next_krylov_basis[next_krylov_idx])) + ? local_max + : std::abs(next_krylov_basis[next_krylov_idx]); + } + } + reduction_helper[tidx * (default_dot_dim + 1) + tidy] = local_max; + group::this_thread_block(item_ct1).sync(); + local_max = reduction_helper[tidy * (default_dot_dim + 1) + tidx]; + const auto tile_block = group::tiled_partition( + group::this_thread_block(item_ct1)); + const auto value = ::gko::kernels::dpcpp::reduce( + tile_block, local_max, [](const rc_vtype &a, const rc_vtype &b) { + return ((a >= b) ? a : b); + }); + const auto new_col_idx = item_ct1.get_group(2) * default_dot_dim + tidy; + if (tidx == 0 && new_col_idx < num_cols) { + const auto norms_idx = new_col_idx; + atomic_max(norms + norms_idx, value); + } +} + +template +void multinorminf_without_stop_kernel( + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_rows, size_type num_cols, const ValueType *next_krylov_basis, + size_type stride_next_krylov, remove_complex *norms, + size_type stride_norms) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor< + UninitializedArray, + default_dot_dim *(default_dot_dim + 1)>, + 0, sycl::access_mode::read_write, sycl::access::target::local> + reduction_helper_array_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + multinorminf_without_stop_kernel( + num_rows, num_cols, next_krylov_basis, stride_next_krylov, + norms, stride_norms, item_ct1, + reduction_helper_array_acc_ct1.get_pointer()); + }); + }); +} + + +// ONLY computes the inf-norm (into norms2) when compute_inf is true +template +void multinorm2_inf_kernel( + size_type num_rows, size_type num_cols, + const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, + remove_complex *__restrict__ norms1, + remove_complex *__restrict__ norms2, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray, + (1 + compute_inf) * + default_dot_dim *(default_dot_dim + 1)> + *reduction_helper_array) +{ + using rc_vtype = remove_complex; + const auto tidx = item_ct1.get_local_id(2); + const auto tidy = item_ct1.get_local_id(1); + const auto col_idx = item_ct1.get_group(2) * default_dot_dim + tidx; + const auto num = ceildiv(num_rows, item_ct1.get_group_range(1)); + const auto start_row = item_ct1.get_group(1) * num; + const auto end_row = ((item_ct1.get_group(1) + 1) * num > num_rows) + ? num_rows + : (item_ct1.get_group(1) + 1) * num; + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + + rc_vtype *__restrict__ reduction_helper_add = (*reduction_helper_array); + rc_vtype *__restrict__ reduction_helper_max = + static_cast((*reduction_helper_array)) + + default_dot_dim * (default_dot_dim + 1); + rc_vtype local_res = zero(); + rc_vtype local_max = zero(); + if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { + for (size_type i = start_row + tidy; i < end_row; + i += default_dot_dim) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + const auto num = next_krylov_basis[next_krylov_idx]; + local_res += squared_norm(num); + if (compute_inf) { + local_max = + ((local_max >= std::abs(num)) ? local_max : std::abs(num)); + } + } + } + // Add reduction + reduction_helper_add[tidx * (default_dot_dim + 1) + tidy] = local_res; + if (compute_inf) { + reduction_helper_max[tidx * (default_dot_dim + 1) + tidy] = local_max; + } + group::this_thread_block(item_ct1).sync(); + local_res = reduction_helper_add[tidy * (default_dot_dim + 1) + tidx]; + const auto tile_block = group::tiled_partition( + group::this_thread_block(item_ct1)); + const auto sum = ::gko::kernels::dpcpp::reduce( + tile_block, local_res, + [](const rc_vtype &a, const rc_vtype &b) { return a + b; }); + rc_vtype reduced_max{}; + if (compute_inf) { + local_max = reduction_helper_max[tidy * (default_dot_dim + 1) + tidx]; + reduced_max = ::gko::kernels::dpcpp::reduce( + tile_block, local_max, [](const rc_vtype &a, const rc_vtype &b) { + return ((a >= b) ? a : b); + }); + } + const auto new_col_idx = item_ct1.get_group(2) * default_dot_dim + tidy; + if (tidx == 0 && new_col_idx < num_cols && + !stop_status[new_col_idx].has_stopped()) { + const auto norms_idx = new_col_idx; + atomic_add(norms1 + norms_idx, sum); + if (compute_inf) { + atomic_max(norms2 + norms_idx, reduced_max); + } + } +} + +template +void multinorm2_inf_kernel( + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_rows, size_type num_cols, const ValueType *next_krylov_basis, + size_type stride_next_krylov, remove_complex *norms1, + remove_complex *norms2, const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor< + UninitializedArray, + (1 + compute_inf) * + default_dot_dim *(default_dot_dim + 1)>, + 0, sycl::access_mode::read_write, sycl::access::target::local> + reduction_helper_array_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + multinorm2_inf_kernel( + num_rows, num_cols, next_krylov_basis, stride_next_krylov, + norms1, norms2, stop_status, item_ct1, + reduction_helper_array_acc_ct1.get_pointer()); + }); + }); +} + + +template +void multidot_kernel( + size_type num_rows, size_type num_cols, + const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, const Accessor3d krylov_bases, + ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray &reduction_helper_array) +{ + /* + * In general in this kernel: + * grid_dim + * x: for col_idx (^= which right hand side) + * y: for row_idx + * z: for num_iters (number of krylov vectors) + * block_dim + * x: for col_idx (must be < dot_dim) + * y: for row_idx (must be < dot_dim) + * (z not used, must be set to 1 in dim) + */ + const size_type tidx = item_ct1.get_local_id(2); + const size_type tidy = item_ct1.get_local_id(1); + const size_type col_idx = + item_ct1.get_group(2) * item_ct1.get_local_range().get(2) + + item_ct1.get_local_id(2); + const size_type num_rows_per_thread = + ceildiv(num_rows, item_ct1.get_group_range(1)); + const size_type start_row = + item_ct1.get_group(1) * num_rows_per_thread + item_ct1.get_local_id(1); + const auto end_row = + min((item_ct1.get_group(1) + 1) * num_rows_per_thread, num_rows); + const size_type k = item_ct1.get_group(0); + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + ValueType *__restrict__ reduction_helper = reduction_helper_array; + + ValueType local_res = zero(); + if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { + for (size_type i = start_row; i < end_row; + i += item_ct1.get_local_range().get(1)) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + ValueType other_basis = krylov_bases(k, i, col_idx); + local_res += next_krylov_basis[next_krylov_idx] * conj(other_basis); + } + } + // Transpose local_res, so each warp contains a local_res from the same + // right hand side + reduction_helper[tidx * dot_dim + tidy] = local_res; + auto thread_block = group::this_thread_block(item_ct1); + thread_block.sync(); + local_res = reduction_helper[tidy * dot_dim + tidx]; + const auto new_col_idx = + item_ct1.get_group(2) * item_ct1.get_local_range().get(2) + tidy; + const auto tile_block = group::tiled_partition(thread_block); + const auto sum = ::gko::kernels::dpcpp::reduce( + tile_block, local_res, + [](const ValueType &a, const ValueType &b) { return a + b; }); + if (tidx == 0 && new_col_idx < num_cols && + !stop_status[new_col_idx].has_stopped()) { + const auto hessenberg_idx = k * stride_hessenberg + new_col_idx; + atomic_add(hessenberg_iter + hessenberg_idx, sum); + } +} + +template +void multidot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, size_type num_cols, + const ValueType *next_krylov_basis, + size_type stride_next_krylov, + const Accessor3d krylov_bases, ValueType *hessenberg_iter, + size_type stride_hessenberg, + const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access_mode::read_write, + sycl::access::target::local> + reduction_helper_array_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + multidot_kernel( + num_rows, num_cols, next_krylov_basis, stride_next_krylov, + krylov_bases, hessenberg_iter, stride_hessenberg, + stop_status, item_ct1, + *reduction_helper_array_acc_ct1.get_pointer()); + }); + }); +} + + +template +void singledot_kernel( + size_type num_rows, const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, const Accessor3d krylov_bases, + ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray &reduction_helper_array) +{ + /* + * In general in this kernel: + * grid_dim + * x: for row_idx + * y: for num_iters (number of krylov vectors) + * block_dim + * x: for row_idx (must be block_size) + * (y and z not used, must be set to 1 in dim) + */ + const size_type tidx = item_ct1.get_local_id(2); + constexpr size_type col_idx{0}; + const size_type k = item_ct1.get_group(1); + const size_type num_rows_per_thread = + ceildiv(num_rows, item_ct1.get_group_range(2)); + const size_type start_row = + item_ct1.get_group(2) * num_rows_per_thread + item_ct1.get_local_id(2); + const auto end_row = + min((item_ct1.get_group(2) + 1) * num_rows_per_thread, num_rows); + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + + ValueType *__restrict__ reduction_helper = reduction_helper_array; + + ValueType local_res = zero(); + if (!stop_status[col_idx].has_stopped()) { + for (size_type i = start_row; i < end_row; i += block_size) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + ValueType other_basis = krylov_bases(k, i, col_idx); + local_res += next_krylov_basis[next_krylov_idx] * conj(other_basis); + } + } + // Transpose local_res, so each warp contains a local_res from the same + // right hand side + reduction_helper[tidx] = local_res; + auto thread_block = group::this_thread_block(item_ct1); + thread_block.sync(); + ::gko::kernels::dpcpp::reduce( + thread_block, reduction_helper, + [](const ValueType &a, const ValueType &b) { return a + b; }); + if (tidx == 0 && !stop_status[col_idx].has_stopped()) { + const auto hessenberg_idx = k * stride_hessenberg + col_idx; + atomic_add(hessenberg_iter + hessenberg_idx, reduction_helper[0]); + } +} + +template +void singledot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + const ValueType *next_krylov_basis, + size_type stride_next_krylov, + const Accessor3d krylov_bases, ValueType *hessenberg_iter, + size_type stride_hessenberg, + const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access_mode::read_write, + sycl::access::target::local> + reduction_helper_array_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + singledot_kernel( + num_rows, next_krylov_basis, stride_next_krylov, + krylov_bases, hessenberg_iter, stride_hessenberg, + stop_status, item_ct1, + *reduction_helper_array_acc_ct1.get_pointer()); + }); + }); +} + + +// Must be called with at least `num_rows * stride_next_krylov` threads in +// total. +template +void update_next_krylov_kernel( + size_type num_iters, size_type num_rows, size_type num_cols, + ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, + const Accessor3d krylov_bases, + const ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row_idx = global_id / stride_next_krylov; + const auto col_idx = global_id % stride_next_krylov; + + if (row_idx < num_rows && col_idx < num_cols && + !stop_status[col_idx].has_stopped()) { + const auto next_krylov_idx = row_idx * stride_next_krylov + col_idx; + auto local_res = next_krylov_basis[next_krylov_idx]; + for (size_type k = 0; k < num_iters; ++k) { + const auto hessenberg_idx = k * stride_hessenberg + col_idx; + + local_res -= hessenberg_iter[hessenberg_idx] * + krylov_bases(k, row_idx, col_idx); + } + next_krylov_basis[next_krylov_idx] = local_res; + } +} + +template +void update_next_krylov_kernel( + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_iters, size_type num_rows, size_type num_cols, + ValueType *next_krylov_basis, size_type stride_next_krylov, + const Accessor3d krylov_bases, const ValueType *hessenberg_iter, + size_type stride_hessenberg, const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + update_next_krylov_kernel( + num_iters, num_rows, num_cols, next_krylov_basis, + stride_next_krylov, krylov_bases, hessenberg_iter, + stride_hessenberg, stop_status, item_ct1); + }); + }); +} + + +// Must be called with at least `num_rows * stride_next_krylov` threads in +// total. +template +void update_next_krylov_and_add_kernel( + size_type num_iters, size_type num_rows, size_type num_cols, + ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, + const Accessor3d krylov_bases, ValueType *__restrict__ hessenberg_iter, + size_type stride_hessenberg, const ValueType *__restrict__ buffer_iter, + size_type stride_buffer, const stopping_status *__restrict__ stop_status, + const stopping_status *__restrict__ reorth_status, + sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row_idx = global_id / stride_next_krylov; + const auto col_idx = global_id % stride_next_krylov; + + if (row_idx < num_rows && col_idx < num_cols && + !stop_status[col_idx].has_stopped() && + !reorth_status[col_idx].has_stopped()) { + const auto next_krylov_idx = row_idx * stride_next_krylov + col_idx; + auto local_res = next_krylov_basis[next_krylov_idx]; + for (size_type k = 0; k < num_iters; ++k) { + const auto hessenberg_idx = k * stride_hessenberg + col_idx; + const auto buffer_idx = k * stride_buffer + col_idx; + local_res -= + buffer_iter[buffer_idx] * krylov_bases(k, row_idx, col_idx); + if ((row_idx == 0) && !reorth_status[col_idx].has_stopped()) { + hessenberg_iter[hessenberg_idx] += buffer_iter[buffer_idx]; + } + } + next_krylov_basis[next_krylov_idx] = local_res; + } +} + +template +void update_next_krylov_and_add_kernel( + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_iters, size_type num_rows, size_type num_cols, + ValueType *next_krylov_basis, size_type stride_next_krylov, + const Accessor3d krylov_bases, ValueType *hessenberg_iter, + size_type stride_hessenberg, const ValueType *buffer_iter, + size_type stride_buffer, const stopping_status *stop_status, + const stopping_status *reorth_status) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + update_next_krylov_and_add_kernel( + num_iters, num_rows, num_cols, next_krylov_basis, + stride_next_krylov, krylov_bases, hessenberg_iter, + stride_hessenberg, buffer_iter, stride_buffer, stop_status, + reorth_status, item_ct1); + }); + }); +} + + +// Must be called with at least `num_rhs` threads +template +void check_arnoldi_norms( + size_type num_rhs, remove_complex *__restrict__ arnoldi_norm, + size_type stride_norm, ValueType *__restrict__ hessenberg_iter, + size_type stride_hessenberg, size_type iter, Accessor3d krylov_bases, + const stopping_status *__restrict__ stop_status, + stopping_status *__restrict__ reorth_status, + size_type *__restrict__ num_reorth, sycl::nd_item<3> item_ct1) +{ + const remove_complex eta_squared = 1.0 / 2.0; + const auto col_idx = thread::get_thread_id_flat(item_ct1); + constexpr bool has_scalar = + gko::cb_gmres::detail::has_3d_scaled_accessor::value; + + if (col_idx < num_rhs && !stop_status[col_idx].has_stopped()) { + const auto num0 = (std::sqrt(eta_squared * arnoldi_norm[col_idx])); + const auto num11 = std::sqrt(arnoldi_norm[col_idx + stride_norm]); + const auto num2 = has_scalar ? (arnoldi_norm[col_idx + 2 * stride_norm]) + : remove_complex{}; + if (num11 < num0) { + reorth_status[col_idx].reset(); + atomic_add(num_reorth, one()); + } else { + reorth_status[col_idx].stop(1); + } + arnoldi_norm[col_idx] = num0; + arnoldi_norm[col_idx + stride_norm] = num11; + hessenberg_iter[iter * stride_hessenberg + col_idx] = num11; + gko::cb_gmres::helper_functions_accessor::write_scalar( + krylov_bases, iter, col_idx, num2 / num11); + } +} + +template +void check_arnoldi_norms(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rhs, + remove_complex *arnoldi_norm, + size_type stride_norm, ValueType *hessenberg_iter, + size_type stride_hessenberg, size_type iter, + Accessor3d krylov_bases, + const stopping_status *stop_status, + stopping_status *reorth_status, size_type *num_reorth) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + check_arnoldi_norms( + num_rhs, arnoldi_norm, stride_norm, hessenberg_iter, + stride_hessenberg, iter, krylov_bases, stop_status, + reorth_status, num_reorth, item_ct1); + }); + }); +} + + +template +void set_scalar_kernel(size_type num_rhs, size_type num_blocks, + const RealValueType *__restrict__ residual_norm, + size_type stride_residual, + const RealValueType *__restrict__ arnoldi_inf, + size_type stride_inf, Accessor3d krylov_bases, + sycl::nd_item<3> item_ct1) +{ + static_assert(!is_complex_s::value, + "ValueType must not be complex!"); + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto krylov_stride = + gko::cb_gmres::helper_functions_accessor::get_stride( + krylov_bases); + const auto blk_idx = global_id / krylov_stride[1]; + const auto col_idx = global_id % krylov_stride[1]; + + if (blk_idx < num_blocks && col_idx < num_rhs) { + if (blk_idx == 0) { + const auto num1 = residual_norm[col_idx]; + const auto num2 = arnoldi_inf[col_idx]; + gko::cb_gmres::helper_functions_accessor::write_scalar( + krylov_bases, {0}, col_idx, num2 / num1); + } else { + const auto num = one(); + gko::cb_gmres::helper_functions_accessor::write_scalar( + krylov_bases, blk_idx, col_idx, num); + } + } +} + +template +void set_scalar_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rhs, + size_type num_blocks, const RealValueType *residual_norm, + size_type stride_residual, + const RealValueType *arnoldi_inf, size_type stride_inf, + Accessor3d krylov_bases) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + set_scalar_kernel( + num_rhs, num_blocks, residual_norm, stride_residual, + arnoldi_inf, stride_inf, krylov_bases, item_ct1); + }); + }); +} + + +// Must be called with at least `num_rows * stride_next_krylov` threads in +// total. +template +void update_krylov_next_krylov_kernel( + size_type iter, size_type num_rows, size_type num_cols, + ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, + Accessor3d krylov_bases, const ValueType *__restrict__ hessenberg_iter, + size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row_idx = global_id / stride_next_krylov; + const auto col_idx = global_id % stride_next_krylov; + const auto hessenberg = + hessenberg_iter[(iter + 1) * stride_hessenberg + col_idx]; + + if (row_idx < num_rows && col_idx < num_cols && + !stop_status[col_idx].has_stopped()) { + const auto next_krylov_idx = row_idx * stride_next_krylov + col_idx; + + const auto next_krylov_value = + next_krylov_basis[next_krylov_idx] / hessenberg; + + next_krylov_basis[next_krylov_idx] = next_krylov_value; + krylov_bases(iter + 1, row_idx, col_idx) = next_krylov_value; + } +} + +template +void update_krylov_next_krylov_kernel( + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + size_type iter, size_type num_rows, size_type num_cols, + ValueType *next_krylov_basis, size_type stride_next_krylov, + Accessor3d krylov_bases, const ValueType *hessenberg_iter, + size_type stride_hessenberg, const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + update_krylov_next_krylov_kernel( + iter, num_rows, num_cols, next_krylov_basis, + stride_next_krylov, krylov_bases, hessenberg_iter, + stride_hessenberg, stop_status, item_ct1); + }); + }); +} + + +// Must be called with at least `stride_preconditioner * num_rows` threads +// in total. +template +void calculate_Qy_kernel(size_type num_rows, size_type num_cols, + const Accessor3d krylov_bases, + const ValueType *__restrict__ y, size_type stride_y, + ValueType *__restrict__ before_preconditioner, + size_type stride_preconditioner, + const size_type *__restrict__ final_iter_nums, + sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row_id = global_id / stride_preconditioner; + const auto col_id = global_id % stride_preconditioner; + + if (row_id < num_rows && col_id < num_cols) { + ValueType temp = zero(); + for (size_type j = 0; j < final_iter_nums[col_id]; ++j) { + temp += krylov_bases(j, row_id, col_id) * y[j * stride_y + col_id]; + } + before_preconditioner[global_id] = temp; + } +} + +template +void calculate_Qy_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + size_type num_cols, const Accessor3d krylov_bases, + const ValueType *y, size_type stride_y, + ValueType *before_preconditioner, + size_type stride_preconditioner, + const size_type *final_iter_nums) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + calculate_Qy_kernel( + num_rows, num_cols, krylov_bases, y, stride_y, + before_preconditioner, stride_preconditioner, + final_iter_nums, item_ct1); + }); + }); +} + + +// Specialization, so the Accessor can use the same function as regular pointers +template +GKO_INLINE auto as_dpcpp_accessor( + const acc::range> &acc) +{ + return acc::range>( + acc.get_accessor().get_size(), acc.get_accessor().get_stored_data(), + acc.get_accessor().get_stride()); +} + +template +GKO_INLINE auto as_dpcpp_accessor( + const acc::range> + &acc) +{ + return acc::range>( + acc.get_accessor().get_size(), acc.get_accessor().get_stored_data(), + acc.get_accessor().get_storage_stride(), + acc.get_accessor().get_scalar(), + acc.get_accessor().get_scalar_stride()); +} + + +template +void zero_matrix(std::shared_ptr exec, size_type m, + size_type n, size_type stride, ValueType *array) +{ + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); + zero_matrix_kernel(grid_size, block_size, 0, exec->get_queue(), m, n, + stride, array); +} + + +template +void initialize_1(std::shared_ptr exec, + const matrix::Dense *b, + matrix::Dense *residual, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + Array *stop_status, size_type krylov_dim) +{ + const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), + krylov_dim * b->get_size()[1]); + const dim3 grid_dim(ceildiv(num_threads, default_block_size), 1, 1); + const dim3 block_dim(default_block_size, 1, 1); + constexpr auto block_size = default_block_size; + + initialize_1_kernel( + grid_dim, block_dim, 0, exec->get_queue(), b->get_size()[0], + b->get_size()[1], krylov_dim, b->get_const_values(), b->get_stride(), + residual->get_values(), residual->get_stride(), + givens_sin->get_values(), givens_sin->get_stride(), + givens_cos->get_values(), givens_cos->get_stride(), + stop_status->get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL); + + +template +void initialize_2(std::shared_ptr exec, + const matrix::Dense *residual, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + matrix::Dense> *arnoldi_norm, + Accessor3d krylov_bases, + matrix::Dense *next_krylov_basis, + Array *final_iter_nums, size_type krylov_dim) +{ + constexpr bool use_scalar = + gko::cb_gmres::detail::has_3d_scaled_accessor::value; + const auto num_rows = residual->get_size()[0]; + const auto num_rhs = residual->get_size()[1]; + const auto krylov_stride = + gko::cb_gmres::helper_functions_accessor::get_stride( + krylov_bases); + const dim3 grid_dim_1( + ceildiv((krylov_dim + 1) * krylov_stride[0], default_block_size), 1, 1); + const dim3 block_dim(default_block_size, 1, 1); + constexpr auto block_size = default_block_size; + const auto stride_arnoldi = arnoldi_norm->get_stride(); + + initialize_2_1_kernel( + grid_dim_1, block_dim, 0, exec->get_queue(), residual->get_size()[0], + residual->get_size()[1], krylov_dim, as_dpcpp_accessor(krylov_bases), + residual_norm_collection->get_values(), + residual_norm_collection->get_stride()); + kernels::dpcpp::dense::compute_norm2(exec, residual, residual_norm); + + if (use_scalar) { + components::fill_array(exec, + arnoldi_norm->get_values() + 2 * stride_arnoldi, + num_rhs, zero>()); + const dim3 grid_size_nrm(ceildiv(num_rhs, default_dot_dim), + exec->get_num_computing_units() * 2); + const dim3 block_size_nrm(default_dot_dim, default_dot_dim); + multinorminf_without_stop_kernel( + grid_size_nrm, block_size_nrm, 0, exec->get_queue(), num_rows, + num_rhs, residual->get_const_values(), residual->get_stride(), + arnoldi_norm->get_values() + 2 * stride_arnoldi, 0); + } + + if (gko::cb_gmres::detail::has_3d_scaled_accessor::value) { + set_scalar_kernel( + ceildiv(num_rhs * (krylov_dim + 1), default_block_size), + default_block_size, 0, exec->get_queue(), num_rhs, krylov_dim + 1, + residual_norm->get_const_values(), residual_norm->get_stride(), + arnoldi_norm->get_const_values() + 2 * stride_arnoldi, + stride_arnoldi, as_dpcpp_accessor(krylov_bases)); + } + + const dim3 grid_dim_2( + ceildiv(num_rows * krylov_stride[1], default_block_size), 1, 1); + initialize_2_2_kernel( + grid_dim_2, block_dim, 0, exec->get_queue(), residual->get_size()[0], + residual->get_size()[1], residual->get_const_values(), + residual->get_stride(), residual_norm->get_const_values(), + residual_norm_collection->get_values(), as_dpcpp_accessor(krylov_bases), + next_krylov_basis->get_values(), next_krylov_basis->get_stride(), + final_iter_nums->get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE( + GKO_DECLARE_CB_GMRES_INITIALIZE_2_KERNEL); + + +template +void finish_arnoldi_CGS(std::shared_ptr exec, + matrix::Dense *next_krylov_basis, + Accessor3dim krylov_bases, + matrix::Dense *hessenberg_iter, + matrix::Dense *buffer_iter, + matrix::Dense> *arnoldi_norm, + size_type iter, const stopping_status *stop_status, + stopping_status *reorth_status, + Array *num_reorth) +{ + using non_complex = remove_complex; + // optimization parameter + constexpr int singledot_block_size = default_dot_dim; + constexpr bool use_scalar = + gko::cb_gmres::detail::has_3d_scaled_accessor::value; + const auto stride_next_krylov = next_krylov_basis->get_stride(); + const auto stride_hessenberg = hessenberg_iter->get_stride(); + const auto stride_buffer = buffer_iter->get_stride(); + const auto stride_arnoldi = arnoldi_norm->get_stride(); + const auto dim_size = next_krylov_basis->get_size(); + const dim3 grid_size(ceildiv(dim_size[1], default_dot_dim), + exec->get_num_computing_units() * 2); + const dim3 grid_size_num_iters(ceildiv(dim_size[1], default_dot_dim), + exec->get_num_computing_units() * 2, + iter + 1); + const dim3 block_size(default_dot_dim, default_dot_dim); + // Note: having iter first (instead of row_idx information) is likely + // beneficial for avoiding atomic_add conflicts, but that needs + // further investigation. + const dim3 grid_size_iters_single(exec->get_num_computing_units() * 2, + iter + 1); + const dim3 block_size_iters_single(singledot_block_size); + size_type num_reorth_host; + + components::fill_array(exec, arnoldi_norm->get_values(), dim_size[1], + zero()); + multinorm2_kernel(grid_size, block_size, 0, exec->get_queue(), dim_size[0], + dim_size[1], next_krylov_basis->get_const_values(), + stride_next_krylov, arnoldi_norm->get_values(), + stop_status); + zero_matrix(exec, iter + 1, dim_size[1], stride_hessenberg, + hessenberg_iter->get_values()); + if (dim_size[1] > 1) { + multidot_kernel( + grid_size_num_iters, block_size, 0, exec->get_queue(), dim_size[0], + dim_size[1], next_krylov_basis->get_const_values(), + stride_next_krylov, as_dpcpp_accessor(krylov_bases), + hessenberg_iter->get_values(), stride_hessenberg, stop_status); + } else { + singledot_kernel( + grid_size_iters_single, block_size_iters_single, 0, + exec->get_queue(), dim_size[0], + next_krylov_basis->get_const_values(), stride_next_krylov, + as_dpcpp_accessor(krylov_bases), hessenberg_iter->get_values(), + stride_hessenberg, stop_status); + } + // for i in 1:iter + // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) + // end + update_next_krylov_kernel( + ceildiv(dim_size[0] * stride_next_krylov, default_block_size), + default_block_size, 0, exec->get_queue(), iter + 1, dim_size[0], + dim_size[1], next_krylov_basis->get_values(), stride_next_krylov, + as_dpcpp_accessor(krylov_bases), hessenberg_iter->get_const_values(), + stride_hessenberg, stop_status); + + // for i in 1:iter + // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) + // end + components::fill_array(exec, arnoldi_norm->get_values() + stride_arnoldi, + dim_size[1], zero()); + if (use_scalar) { + components::fill_array(exec, + arnoldi_norm->get_values() + 2 * stride_arnoldi, + dim_size[1], zero()); + } + multinorm2_inf_kernel( + grid_size, block_size, 0, exec->get_queue(), dim_size[0], dim_size[1], + next_krylov_basis->get_const_values(), stride_next_krylov, + arnoldi_norm->get_values() + stride_arnoldi, + arnoldi_norm->get_values() + 2 * stride_arnoldi, stop_status); + // nrmN = norm(next_krylov_basis) + components::fill_array(exec, num_reorth->get_data(), 1, zero()); + check_arnoldi_norms( + ceildiv(dim_size[1], default_block_size), default_block_size, 0, + exec->get_queue(), dim_size[1], arnoldi_norm->get_values(), + stride_arnoldi, hessenberg_iter->get_values(), stride_hessenberg, + iter + 1, as_dpcpp_accessor(krylov_bases), stop_status, reorth_status, + num_reorth->get_data()); + num_reorth_host = exec->copy_val_to_host(num_reorth->get_const_data()); + // num_reorth_host := number of next_krylov vector to be reorthogonalization + for (size_type l = 1; (num_reorth_host > 0) && (l < 3); l++) { + zero_matrix(exec, iter + 1, dim_size[1], stride_buffer, + buffer_iter->get_values()); + if (dim_size[1] > 1) { + multidot_kernel( + grid_size_num_iters, block_size, 0, exec->get_queue(), + dim_size[0], dim_size[1], next_krylov_basis->get_const_values(), + stride_next_krylov, as_dpcpp_accessor(krylov_bases), + buffer_iter->get_values(), stride_buffer, stop_status); + } else { + singledot_kernel( + grid_size_iters_single, block_size_iters_single, 0, + exec->get_queue(), dim_size[0], + next_krylov_basis->get_const_values(), stride_next_krylov, + as_dpcpp_accessor(krylov_bases), buffer_iter->get_values(), + stride_buffer, stop_status); + } + // for i in 1:iter + // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) + // end + update_next_krylov_and_add_kernel( + ceildiv(dim_size[0] * stride_next_krylov, default_block_size), + default_block_size, 0, exec->get_queue(), iter + 1, dim_size[0], + dim_size[1], next_krylov_basis->get_values(), stride_next_krylov, + as_dpcpp_accessor(krylov_bases), hessenberg_iter->get_values(), + stride_hessenberg, buffer_iter->get_const_values(), stride_buffer, + stop_status, reorth_status); + // for i in 1:iter + // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) + // end + components::fill_array(exec, + arnoldi_norm->get_values() + stride_arnoldi, + dim_size[1], zero()); + if (use_scalar) { + components::fill_array( + exec, arnoldi_norm->get_values() + 2 * stride_arnoldi, + dim_size[1], zero()); + } + multinorm2_inf_kernel( + grid_size, block_size, 0, exec->get_queue(), dim_size[0], + dim_size[1], next_krylov_basis->get_const_values(), + stride_next_krylov, arnoldi_norm->get_values() + stride_arnoldi, + arnoldi_norm->get_values() + 2 * stride_arnoldi, stop_status); + // nrmN = norm(next_krylov_basis) + components::fill_array(exec, num_reorth->get_data(), 1, + zero()); + check_arnoldi_norms( + ceildiv(dim_size[1], default_block_size), default_block_size, 0, + exec->get_queue(), dim_size[1], arnoldi_norm->get_values(), + stride_arnoldi, hessenberg_iter->get_values(), stride_hessenberg, + iter + 1, as_dpcpp_accessor(krylov_bases), stop_status, + reorth_status, num_reorth->get_data()); + num_reorth_host = exec->copy_val_to_host(num_reorth->get_const_data()); + } + + update_krylov_next_krylov_kernel( + ceildiv(dim_size[0] * stride_next_krylov, default_block_size), + default_block_size, 0, exec->get_queue(), iter, dim_size[0], + dim_size[1], next_krylov_basis->get_values(), stride_next_krylov, + as_dpcpp_accessor(krylov_bases), hessenberg_iter->get_const_values(), + stride_hessenberg, stop_status); + // next_krylov_basis /= hessenberg(iter, iter + 1) + // krylov_bases(:, iter + 1) = next_krylov_basis + // End of arnoldi +} + +template +void givens_rotation(std::shared_ptr exec, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + matrix::Dense *hessenberg_iter, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + size_type iter, const Array *stop_status) +{ + // TODO: tune block_size for optimal performance + constexpr auto block_size = default_block_size; + const auto num_cols = hessenberg_iter->get_size()[1]; + const dim3 block_dim{block_size, 1, 1}; + const dim3 grid_dim{ + static_cast(ceildiv(num_cols, block_size)), 1, 1}; + + givens_rotation_kernel( + grid_dim, block_dim, 0, exec->get_queue(), + hessenberg_iter->get_size()[0], hessenberg_iter->get_size()[1], iter, + hessenberg_iter->get_values(), hessenberg_iter->get_stride(), + givens_sin->get_values(), givens_sin->get_stride(), + givens_cos->get_values(), givens_cos->get_stride(), + residual_norm->get_values(), residual_norm_collection->get_values(), + residual_norm_collection->get_stride(), stop_status->get_const_data()); +} + + +template +void step_1(std::shared_ptr exec, + matrix::Dense *next_krylov_basis, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + Accessor3d krylov_bases, matrix::Dense *hessenberg_iter, + matrix::Dense *buffer_iter, + matrix::Dense> *arnoldi_norm, + size_type iter, Array *final_iter_nums, + const Array *stop_status, + Array *reorth_status, Array *num_reorth) +{ + increase_final_iteration_numbers_kernel( + static_cast( + ceildiv(final_iter_nums->get_num_elems(), default_block_size)), + default_block_size, 0, exec->get_queue(), final_iter_nums->get_data(), + stop_status->get_const_data(), final_iter_nums->get_num_elems()); + finish_arnoldi_CGS(exec, next_krylov_basis, krylov_bases, hessenberg_iter, + buffer_iter, arnoldi_norm, iter, + stop_status->get_const_data(), reorth_status->get_data(), + num_reorth); + givens_rotation(exec, givens_sin, givens_cos, hessenberg_iter, + residual_norm, residual_norm_collection, iter, stop_status); +} + +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_STEP_1_KERNEL); + + +template +void solve_upper_triangular( + std::shared_ptr exec, + const matrix::Dense *residual_norm_collection, + const matrix::Dense *hessenberg, matrix::Dense *y, + const Array *final_iter_nums) +{ + // TODO: tune block_size for optimal performance + constexpr auto block_size = default_block_size; + const auto num_rhs = residual_norm_collection->get_size()[1]; + const dim3 block_dim{block_size, 1, 1}; + const dim3 grid_dim{static_cast(ceildiv(num_rhs, block_size)), + 1, 1}; + + solve_upper_triangular_kernel( + grid_dim, block_dim, 0, exec->get_queue(), hessenberg->get_size()[1], + num_rhs, residual_norm_collection->get_const_values(), + residual_norm_collection->get_stride(), hessenberg->get_const_values(), + hessenberg->get_stride(), y->get_values(), y->get_stride(), + final_iter_nums->get_const_data()); +} + + +template +void calculate_qy(std::shared_ptr exec, + ConstAccessor3d krylov_bases, size_type num_krylov_bases, + const matrix::Dense *y, + matrix::Dense *before_preconditioner, + const Array *final_iter_nums) +{ + const auto num_rows = before_preconditioner->get_size()[0]; + const auto num_cols = before_preconditioner->get_size()[1]; + const auto stride_before_preconditioner = + before_preconditioner->get_stride(); + + constexpr auto block_size = default_block_size; + const dim3 grid_dim{ + static_cast( + ceildiv(num_rows * stride_before_preconditioner, block_size)), + 1, 1}; + const dim3 block_dim{block_size, 1, 1}; + + + calculate_Qy_kernel( + grid_dim, block_dim, 0, exec->get_queue(), num_rows, num_cols, + as_dpcpp_accessor(krylov_bases), y->get_const_values(), y->get_stride(), + before_preconditioner->get_values(), stride_before_preconditioner, + final_iter_nums->get_const_data()); + // Calculate qy + // before_preconditioner = krylov_bases * y +} + + +template +void step_2(std::shared_ptr exec, + const matrix::Dense *residual_norm_collection, + ConstAccessor3d krylov_bases, + const matrix::Dense *hessenberg, + matrix::Dense *y, + matrix::Dense *before_preconditioner, + const Array *final_iter_nums) +{ + // since hessenberg has dims: iters x iters * num_rhs + // krylov_bases has dims: (iters + 1) x sysmtx[0] x num_rhs + const auto iters = + hessenberg->get_size()[1] / before_preconditioner->get_size()[1]; + const auto num_krylov_bases = iters + 1; + solve_upper_triangular(exec, residual_norm_collection, hessenberg, y, + final_iter_nums); + calculate_qy(exec, krylov_bases, num_krylov_bases, y, before_preconditioner, + final_iter_nums); +} + +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_CONST_TYPE( + GKO_DECLARE_CB_GMRES_STEP_2_KERNEL); + + +} // namespace cb_gmres +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/solver/common_gmres_kernels.dp.inc b/dpcpp/solver/common_gmres_kernels.dp.inc new file mode 100644 index 00000000000..6c2cf483ef2 --- /dev/null +++ b/dpcpp/solver/common_gmres_kernels.dp.inc @@ -0,0 +1,272 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +// Must be called with at least `max(stride_b * num_rows, krylov_dim * +// num_cols)` threads in total. +template +void initialize_1_kernel( + size_type num_rows, size_type num_cols, size_type krylov_dim, + const ValueType *__restrict__ b, size_type stride_b, + ValueType *__restrict__ residual, size_type stride_residual, + ValueType *__restrict__ givens_sin, size_type stride_sin, + ValueType *__restrict__ givens_cos, size_type stride_cos, + stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + + const auto row_idx = global_id / stride_b; + const auto col_idx = global_id % stride_b; + + if (global_id < num_cols) { + stop_status[global_id].reset(); + } + + if (row_idx < num_rows && col_idx < num_cols) { + residual[row_idx * stride_residual + col_idx] = + b[row_idx * stride_b + col_idx]; + } + + if (global_id < krylov_dim * num_cols) { + const auto row_givens = global_id / num_cols; + const auto col_givens = global_id % num_cols; + + givens_sin[row_givens * stride_sin + col_givens] = zero(); + givens_cos[row_givens * stride_cos + col_givens] = zero(); + } +} + +template +void initialize_1_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + size_type num_cols, size_type krylov_dim, + const ValueType *b, size_type stride_b, + ValueType *residual, size_type stride_residual, + ValueType *givens_sin, size_type stride_sin, + ValueType *givens_cos, size_type stride_cos, + stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + initialize_1_kernel( + num_rows, num_cols, krylov_dim, b, stride_b, residual, + stride_residual, givens_sin, stride_sin, givens_cos, + stride_cos, stop_status, item_ct1); + }); + }); +} + + +template +void calculate_sin_and_cos_kernel(size_type col_idx, size_type num_cols, + size_type iter, const ValueType &this_hess, + const ValueType &next_hess, + ValueType *givens_sin, size_type stride_sin, + ValueType *givens_cos, size_type stride_cos, + ValueType ®ister_sin, + ValueType ®ister_cos) +{ + if (this_hess == zero()) { + register_cos = zero(); + register_sin = one(); + } else { + const auto scale = std::abs(this_hess) + std::abs(next_hess); + const auto hypotenuse = + scale * + std::sqrt( + std::abs(this_hess / scale) * std::abs(this_hess / scale) + + std::abs(next_hess / scale) * std::abs(next_hess / scale)); + register_cos = conj(this_hess) / hypotenuse; + register_sin = conj(next_hess) / hypotenuse; + } + givens_cos[iter * stride_cos + col_idx] = register_cos; + givens_sin[iter * stride_sin + col_idx] = register_sin; +} + + +template +void calculate_residual_norm_kernel(size_type col_idx, size_type num_cols, + size_type iter, + const ValueType ®ister_sin, + const ValueType ®ister_cos, + remove_complex *residual_norm, + ValueType *residual_norm_collection, + size_type stride_residual_norm_collection) +{ + const auto this_rnc = + residual_norm_collection[iter * stride_residual_norm_collection + + col_idx]; + + const auto next_rnc = -conj(register_sin) * this_rnc; + residual_norm_collection[iter * stride_residual_norm_collection + col_idx] = + register_cos * this_rnc; + residual_norm[col_idx] = std::abs(next_rnc); + residual_norm_collection[(iter + 1) * stride_residual_norm_collection + + col_idx] = next_rnc; +} + + +// Must be called with at least `num_cols` threads in total. +template +void givens_rotation_kernel( + size_type num_rows, size_type num_cols, size_type iter, + ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + ValueType *__restrict__ givens_sin, size_type stride_sin, + ValueType *__restrict__ givens_cos, size_type stride_cos, + remove_complex *__restrict__ residual_norm, + ValueType *__restrict__ residual_norm_collection, + size_type stride_residual_norm_collection, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) +{ + const auto col_idx = thread::get_thread_id_flat(item_ct1); + + if (col_idx >= num_cols || stop_status[col_idx].has_stopped()) { + return; + } + + auto this_hess = hessenberg_iter[col_idx]; + auto next_hess = hessenberg_iter[stride_hessenberg + col_idx]; + for (size_type i = 0; i < iter; ++i) { + const auto cos = givens_cos[i * stride_cos + col_idx]; + const auto sin = givens_sin[i * stride_sin + col_idx]; + hessenberg_iter[i * stride_hessenberg + col_idx] = + cos * this_hess + sin * next_hess; + this_hess = conj(cos) * next_hess - conj(sin) * this_hess; + next_hess = hessenberg_iter[(i + 2) * stride_hessenberg + col_idx]; + } + // for j in 0:iter - 1 + // temp = cos(j)*hessenberg(j) + + // sin(j)*hessenberg(j+1) + // hessenberg(j+1) = -sin(j)*hessenberg(j) + + // cos(j)*hessenberg(j+1) + // hessenberg(j) = temp; + // end + + ValueType register_sin; + ValueType register_cos; + calculate_sin_and_cos_kernel(col_idx, num_cols, iter, this_hess, next_hess, + givens_sin, stride_sin, givens_cos, stride_cos, + register_sin, register_cos); + // Calculate sin and cos on hessenberg(iter) and hessenberg(iter+1) + + hessenberg_iter[iter * stride_hessenberg + col_idx] = + register_cos * this_hess + register_sin * next_hess; + hessenberg_iter[(iter + 1) * stride_hessenberg + col_idx] = + zero(); + // hessenberg(iter) = cos(iter)*hessenberg(iter) + + // sin(iter)*hessenberg(iter+1) + // hessenberg(iter+1) = 0 + + calculate_residual_norm_kernel( + col_idx, num_cols, iter, register_sin, register_cos, residual_norm, + residual_norm_collection, stride_residual_norm_collection); + // Calculate residual norm +} + +template +void givens_rotation_kernel(dim3 grid, dim3 block, + size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_rows, size_type num_cols, + size_type iter, ValueType *hessenberg_iter, + size_type stride_hessenberg, ValueType *givens_sin, + size_type stride_sin, ValueType *givens_cos, + size_type stride_cos, + remove_complex *residual_norm, + ValueType *residual_norm_collection, + size_type stride_residual_norm_collection, + const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + givens_rotation_kernel( + num_rows, num_cols, iter, hessenberg_iter, + stride_hessenberg, givens_sin, stride_sin, givens_cos, + stride_cos, residual_norm, residual_norm_collection, + stride_residual_norm_collection, stop_status, item_ct1); + }); + }); +} + + +// Must be called with at least `num_rhs` threads in total. +template +void solve_upper_triangular_kernel( + size_type num_cols, size_type num_rhs, + const ValueType *__restrict__ residual_norm_collection, + size_type stride_residual_norm_collection, + const ValueType *__restrict__ hessenberg, size_type stride_hessenberg, + ValueType *__restrict__ y, size_type stride_y, + const size_type *__restrict__ final_iter_nums, sycl::nd_item<3> item_ct1) +{ + const auto col_idx = thread::get_thread_id_flat(item_ct1); + + if (col_idx >= num_rhs) { + return; + } + + for (int i = final_iter_nums[col_idx] - 1; i >= 0; --i) { + auto temp = + residual_norm_collection[i * stride_residual_norm_collection + + col_idx]; + for (size_type j = i + 1; j < final_iter_nums[col_idx]; ++j) { + temp -= hessenberg[i * stride_hessenberg + j * num_rhs + col_idx] * + y[j * stride_y + col_idx]; + } + + y[i * stride_y + col_idx] = + temp / hessenberg[i * stride_hessenberg + i * num_rhs + col_idx]; + } + // Solve upper triangular. + // y = hessenberg \ residual_norm_collection +} + +template +void solve_upper_triangular_kernel( + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_cols, size_type num_rhs, + const ValueType *residual_norm_collection, + size_type stride_residual_norm_collection, const ValueType *hessenberg, + size_type stride_hessenberg, ValueType *y, size_type stride_y, + const size_type *final_iter_nums) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + solve_upper_triangular_kernel( + num_cols, num_rhs, residual_norm_collection, + stride_residual_norm_collection, hessenberg, + stride_hessenberg, y, stride_y, final_iter_nums, item_ct1); + }); + }); +} diff --git a/dpcpp/solver/gmres_kernels.dp.cpp b/dpcpp/solver/gmres_kernels.dp.cpp new file mode 100644 index 00000000000..55369b09e9a --- /dev/null +++ b/dpcpp/solver/gmres_kernels.dp.cpp @@ -0,0 +1,678 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/gmres_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include +#include + + +#include "core/components/fill_array.hpp" +#include "core/matrix/dense_kernels.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/onemkl_bindings.hpp" +#include "dpcpp/components/atomic.dp.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" +#include "dpcpp/components/uninitialized_array.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The GMRES solver namespace. + * + * @ingroup gmres + */ +namespace gmres { + + +constexpr int default_block_size = 256; +constexpr int default_dot_dim = 16; +constexpr int default_dot_size = default_dot_dim * default_dot_dim; + + +#include "dpcpp/solver/common_gmres_kernels.dp.inc" + + +// Must be called with at least `num_rows * num_rhs` threads in total. +template +void initialize_2_2_kernel( + size_type num_rows, size_type num_rhs, + const ValueType *__restrict__ residual, size_type stride_residual, + const remove_complex *__restrict__ residual_norm, + ValueType *__restrict__ residual_norm_collection, + ValueType *__restrict__ krylov_bases, size_type stride_krylov, + size_type *__restrict__ final_iter_nums, sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row_idx = global_id / num_rhs; + const auto col_idx = global_id % num_rhs; + + if (global_id < num_rhs) { + residual_norm_collection[global_id] = residual_norm[global_id]; + final_iter_nums[global_id] = 0; + } + + if (row_idx < num_rows && col_idx < num_rhs) { + auto value = residual[row_idx * stride_residual + col_idx] / + residual_norm[col_idx]; + krylov_bases[row_idx * stride_krylov + col_idx] = value; + } +} + +template +void initialize_2_2_kernel(dim3 grid, dim3 block, + size_type dynamic_shared_memory, sycl::queue *queue, + size_type num_rows, size_type num_rhs, + const ValueType *residual, size_type stride_residual, + const remove_complex *residual_norm, + ValueType *residual_norm_collection, + ValueType *krylov_bases, size_type stride_krylov, + size_type *final_iter_nums) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + initialize_2_2_kernel( + num_rows, num_rhs, residual, stride_residual, residual_norm, + residual_norm_collection, krylov_bases, stride_krylov, + final_iter_nums, item_ct1); + }); + }); +} + + +void increase_final_iteration_numbers_kernel( + size_type *__restrict__ final_iter_nums, + const stopping_status *__restrict__ stop_status, size_type total_number, + sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + if (global_id < total_number) { + final_iter_nums[global_id] += !stop_status[global_id].has_stopped(); + } +} + +GKO_ENABLE_DEFAULT_HOST(increase_final_iteration_numbers_kernel, + increase_final_iteration_numbers_kernel); + + +template +void multidot_kernel( + size_type k, size_type num_rows, size_type num_cols, + const ValueType *__restrict__ krylov_bases, + const ValueType *__restrict__ next_krylov_basis, size_type stride_krylov, + ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray + *reduction_helper_array) +{ + const auto tidx = item_ct1.get_local_id(2); + const auto tidy = item_ct1.get_local_id(1); + const auto col_idx = item_ct1.get_group(2) * default_dot_dim + tidx; + const auto num = ceildiv(num_rows, item_ct1.get_group_range(1)); + const auto start_row = item_ct1.get_group(1) * num; + const auto end_row = ((item_ct1.get_group(1) + 1) * num > num_rows) + ? num_rows + : (item_ct1.get_group(1) + 1) * num; + + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + ValueType *__restrict__ reduction_helper = (*reduction_helper_array); + + ValueType local_res = zero(); + if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { + for (size_type i = start_row + tidy; i < end_row; + i += default_dot_dim) { + const auto krylov_idx = i * stride_krylov + col_idx; + local_res += + conj(krylov_bases[krylov_idx]) * next_krylov_basis[krylov_idx]; + } + } + reduction_helper[tidx * (default_dot_dim + 1) + tidy] = local_res; + item_ct1.barrier(sycl::access::fence_space::local_space); + local_res = reduction_helper[tidy * (default_dot_dim + 1) + tidx]; + const auto tile_block = group::tiled_partition( + group::this_thread_block(item_ct1)); + const auto sum = ::gko::kernels::dpcpp::reduce( + tile_block, local_res, + [](const ValueType &a, const ValueType &b) { return a + b; }); + const auto new_col_idx = item_ct1.get_group(2) * default_dot_dim + tidy; + if (tidx == 0 && new_col_idx < num_cols && + !stop_status[new_col_idx].has_stopped()) { + const auto hessenberg_idx = k * stride_hessenberg + new_col_idx; + atomic_add(hessenberg_iter + hessenberg_idx, sum); + } +} + +template +void multidot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type k, size_type num_rows, + size_type num_cols, const ValueType *krylov_bases, + const ValueType *next_krylov_basis, + size_type stride_krylov, ValueType *hessenberg_iter, + size_type stride_hessenberg, + const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, + 0, sycl::access_mode::read_write, + sycl::access::target::local> + reduction_helper_array_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + multidot_kernel( + k, num_rows, num_cols, krylov_bases, next_krylov_basis, + stride_krylov, hessenberg_iter, stride_hessenberg, + stop_status, item_ct1, + (UninitializedArray *) + reduction_helper_array_acc_ct1.get_pointer()); + }); + }); +} + + +// Must be called with at least `num_rows * stride_next_krylov` threads in +// total. +template +void update_next_krylov_kernel( + size_type k, size_type num_rows, size_type num_cols, + const ValueType *__restrict__ krylov_bases, + ValueType *__restrict__ next_krylov_basis, size_type stride_krylov, + const ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row_idx = global_id / stride_krylov; + const auto col_idx = global_id % stride_krylov; + + if (row_idx < num_rows && col_idx < num_cols && + !stop_status[col_idx].has_stopped()) { + const auto next_krylov_idx = row_idx * stride_krylov + col_idx; + const auto krylov_idx = row_idx * stride_krylov + col_idx; + const auto hessenberg_idx = k * stride_hessenberg + col_idx; + + next_krylov_basis[next_krylov_idx] -= + hessenberg_iter[hessenberg_idx] * krylov_bases[krylov_idx]; + } +} + +template +void update_next_krylov_kernel( + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + size_type k, size_type num_rows, size_type num_cols, + const ValueType *krylov_bases, ValueType *next_krylov_basis, + size_type stride_krylov, const ValueType *hessenberg_iter, + size_type stride_hessenberg, const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + update_next_krylov_kernel( + k, num_rows, num_cols, krylov_bases, next_krylov_basis, + stride_krylov, hessenberg_iter, stride_hessenberg, + stop_status, item_ct1); + }); + }); +} + + +// Must be called with at least `num_cols` blocks, each with `block_size` +// threads. `block_size` must be a power of 2. +template +void update_hessenberg_2_kernel( + size_type iter, size_type num_rows, size_type num_cols, + const ValueType *__restrict__ next_krylov_basis, + size_type stride_next_krylov, ValueType *__restrict__ hessenberg_iter, + size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray &reduction_helper_array) +{ + const auto tidx = item_ct1.get_local_id(2); + const auto col_idx = item_ct1.get_group(2); + + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + + ValueType *__restrict__ reduction_helper = reduction_helper_array; + + if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { + ValueType local_res{}; + for (size_type i = tidx; i < num_rows; i += block_size) { + const auto next_krylov_idx = i * stride_next_krylov + col_idx; + const auto next_krylov_value = next_krylov_basis[next_krylov_idx]; + + local_res += next_krylov_value * next_krylov_value; + } + + reduction_helper[tidx] = local_res; + + // Perform thread block reduction. Result is in reduction_helper[0] + reduce(group::this_thread_block(item_ct1), reduction_helper, + [](const ValueType &a, const ValueType &b) { return a + b; }); + + if (tidx == 0) { + hessenberg_iter[(iter + 1) * stride_hessenberg + col_idx] = + std::sqrt(reduction_helper[0]); + } + } +} + +template +void update_hessenberg_2_kernel( + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + size_type iter, size_type num_rows, size_type num_cols, + const ValueType *next_krylov_basis, size_type stride_next_krylov, + ValueType *hessenberg_iter, size_type stride_hessenberg, + const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access_mode::read_write, + sycl::access::target::local> + reduction_helper_array_acc_ct1(cgh); + + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + update_hessenberg_2_kernel( + iter, num_rows, num_cols, next_krylov_basis, + stride_next_krylov, hessenberg_iter, + stride_hessenberg, stop_status, item_ct1, + *reduction_helper_array_acc_ct1.get_pointer()); + }); + }); +} + + +// Must be called with at least `num_rows * stride_krylov` threads in +// total. +template +void update_krylov_kernel( + size_type iter, size_type num_rows, size_type num_cols, + ValueType *__restrict__ krylov_bases, size_type stride_krylov, + const ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row_idx = global_id / stride_krylov; + const auto col_idx = global_id % stride_krylov; + const auto hessenberg = + hessenberg_iter[(iter + 1) * stride_hessenberg + col_idx]; + + if (row_idx < num_rows && col_idx < num_cols && + !stop_status[col_idx].has_stopped()) { + const auto krylov_idx = row_idx * stride_krylov + col_idx; + + krylov_bases[krylov_idx] /= hessenberg; + } +} + +template +void update_krylov_kernel(dim3 grid, dim3 block, + size_type dynamic_shared_memory, sycl::queue *queue, + size_type iter, size_type num_rows, + size_type num_cols, ValueType *krylov_bases, + size_type stride_krylov, + const ValueType *hessenberg_iter, + size_type stride_hessenberg, + const stopping_status *stop_status) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + update_krylov_kernel( + iter, num_rows, num_cols, krylov_bases, stride_krylov, + hessenberg_iter, stride_hessenberg, stop_status, item_ct1); + }); + }); +} + + +// Must be called with at least `stride_preconditioner * num_rows` threads in +// total. +template +void calculate_Qy_kernel(size_type num_rows, size_type num_cols, + size_type num_rhs, + const ValueType *__restrict__ krylov_bases, + size_type stride_krylov, + const ValueType *__restrict__ y, size_type stride_y, + ValueType *__restrict__ before_preconditioner, + size_type stride_preconditioner, + const size_type *__restrict__ final_iter_nums, + sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row_id = global_id / stride_preconditioner; + const auto col_id = global_id % stride_preconditioner; + + if (row_id < num_rows && col_id < num_cols) { + ValueType temp = zero(); + + for (size_type j = 0; j < final_iter_nums[col_id]; ++j) { + temp += + krylov_bases[(row_id + j * num_rows) * stride_krylov + col_id] * + y[j * stride_y + col_id]; + } + before_preconditioner[global_id] = temp; + } +} + +template +void calculate_Qy_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, + sycl::queue *queue, size_type num_rows, + size_type num_cols, size_type num_rhs, + const ValueType *krylov_bases, size_type stride_krylov, + const ValueType *y, size_type stride_y, + ValueType *before_preconditioner, + size_type stride_preconditioner, + const size_type *final_iter_nums) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + calculate_Qy_kernel( + num_rows, num_cols, num_rhs, krylov_bases, stride_krylov, y, + stride_y, before_preconditioner, stride_preconditioner, + final_iter_nums, item_ct1); + }); + }); +} + + +template +void initialize_1(std::shared_ptr exec, + const matrix::Dense *b, + matrix::Dense *residual, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + Array *stop_status, size_type krylov_dim) +{ + const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), + krylov_dim * b->get_size()[1]); + const dim3 grid_dim(ceildiv(num_threads, default_block_size), 1, 1); + const dim3 block_dim(default_block_size, 1, 1); + constexpr auto block_size = default_block_size; + + initialize_1_kernel( + grid_dim, block_dim, 0, exec->get_queue(), b->get_size()[0], + b->get_size()[1], krylov_dim, b->get_const_values(), b->get_stride(), + residual->get_values(), residual->get_stride(), + givens_sin->get_values(), givens_sin->get_stride(), + givens_cos->get_values(), givens_cos->get_stride(), + stop_status->get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_1_KERNEL); + + +template +void initialize_2(std::shared_ptr exec, + const matrix::Dense *residual, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + matrix::Dense *krylov_bases, + Array *final_iter_nums, size_type krylov_dim) +{ + const auto num_rows = residual->get_size()[0]; + const auto num_rhs = residual->get_size()[1]; + const dim3 grid_dim_1( + ceildiv(krylov_bases->get_size()[0] * krylov_bases->get_stride(), + default_block_size), + 1, 1); + const dim3 block_dim(default_block_size, 1, 1); + constexpr auto block_size = default_block_size; + + kernels::dpcpp::dense::compute_norm2(exec, residual, residual_norm); + + const dim3 grid_dim_2(ceildiv(num_rows * num_rhs, default_block_size), 1, + 1); + initialize_2_2_kernel( + grid_dim_2, block_dim, 0, exec->get_queue(), residual->get_size()[0], + residual->get_size()[1], residual->get_const_values(), + residual->get_stride(), residual_norm->get_const_values(), + residual_norm_collection->get_values(), krylov_bases->get_values(), + krylov_bases->get_stride(), final_iter_nums->get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_2_KERNEL); + + +template +void finish_arnoldi(std::shared_ptr exec, + size_type num_rows, matrix::Dense *krylov_bases, + matrix::Dense *hessenberg_iter, size_type iter, + const stopping_status *stop_status) +{ + const auto stride_krylov = krylov_bases->get_stride(); + const auto stride_hessenberg = hessenberg_iter->get_stride(); + // auto cublas_handle = exec->get_cublas_handle(); + const dim3 grid_size( + ceildiv(hessenberg_iter->get_size()[1], default_dot_dim), + exec->get_num_computing_units() * 2); + const dim3 block_size(default_dot_dim, default_dot_dim); + auto next_krylov_basis = + krylov_bases->get_values() + + (iter + 1) * num_rows * hessenberg_iter->get_size()[1]; + for (size_type k = 0; k < iter + 1; ++k) { + const auto k_krylov_bases = + krylov_bases->get_const_values() + + k * num_rows * hessenberg_iter->get_size()[1]; + if (hessenberg_iter->get_size()[1] > 1) { + // TODO: this condition should be tuned + // single rhs will use vendor's dot, otherwise, use our own + // multidot_kernel which parallelize multiple rhs. + components::fill_array( + exec, hessenberg_iter->get_values() + k * stride_hessenberg, + hessenberg_iter->get_size()[1], zero()); + multidot_kernel(grid_size, block_size, 0, exec->get_queue(), k, + num_rows, hessenberg_iter->get_size()[1], + k_krylov_bases, next_krylov_basis, stride_krylov, + hessenberg_iter->get_values(), stride_hessenberg, + stop_status); + } else { + onemkl::dot(*exec->get_queue(), num_rows, k_krylov_bases, + stride_krylov, next_krylov_basis, stride_krylov, + hessenberg_iter->get_values() + k * stride_hessenberg); + } + update_next_krylov_kernel( + ceildiv(num_rows * stride_krylov, default_block_size), + default_block_size, 0, exec->get_queue(), k, num_rows, + hessenberg_iter->get_size()[1], k_krylov_bases, next_krylov_basis, + stride_krylov, hessenberg_iter->get_const_values(), + stride_hessenberg, stop_status); + } + // for i in 1:iter + // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) + // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) + // end + + + update_hessenberg_2_kernel( + hessenberg_iter->get_size()[1], default_block_size, 0, + exec->get_queue(), iter, num_rows, hessenberg_iter->get_size()[1], + next_krylov_basis, stride_krylov, hessenberg_iter->get_values(), + stride_hessenberg, stop_status); + + update_krylov_kernel( + ceildiv(num_rows * stride_krylov, default_block_size), + default_block_size, 0, exec->get_queue(), iter, num_rows, + hessenberg_iter->get_size()[1], next_krylov_basis, stride_krylov, + hessenberg_iter->get_const_values(), stride_hessenberg, stop_status); + // next_krylov_basis /= hessenberg(iter, iter + 1) + // End of arnoldi +} + + +template +void givens_rotation(std::shared_ptr exec, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + matrix::Dense *hessenberg_iter, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + size_type iter, const Array *stop_status) +{ + // TODO: tune block_size for optimal performance + constexpr auto block_size = default_block_size; + const auto num_cols = hessenberg_iter->get_size()[1]; + const dim3 block_dim{block_size, 1, 1}; + const dim3 grid_dim{ + static_cast(ceildiv(num_cols, block_size)), 1, 1}; + + givens_rotation_kernel( + grid_dim, block_dim, 0, exec->get_queue(), + hessenberg_iter->get_size()[0], hessenberg_iter->get_size()[1], iter, + hessenberg_iter->get_values(), hessenberg_iter->get_stride(), + givens_sin->get_values(), givens_sin->get_stride(), + givens_cos->get_values(), givens_cos->get_stride(), + residual_norm->get_values(), residual_norm_collection->get_values(), + residual_norm_collection->get_stride(), stop_status->get_const_data()); +} + + +template +void step_1(std::shared_ptr exec, size_type num_rows, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + matrix::Dense *krylov_bases, + matrix::Dense *hessenberg_iter, size_type iter, + Array *final_iter_nums, + const Array *stop_status) +{ + increase_final_iteration_numbers_kernel( + static_cast( + ceildiv(final_iter_nums->get_num_elems(), default_block_size)), + default_block_size, 0, exec->get_queue(), final_iter_nums->get_data(), + stop_status->get_const_data(), final_iter_nums->get_num_elems()); + finish_arnoldi(exec, num_rows, krylov_bases, hessenberg_iter, iter, + stop_status->get_const_data()); + givens_rotation(exec, givens_sin, givens_cos, hessenberg_iter, + residual_norm, residual_norm_collection, iter, stop_status); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_STEP_1_KERNEL); + + +template +void solve_upper_triangular( + std::shared_ptr exec, + const matrix::Dense *residual_norm_collection, + const matrix::Dense *hessenberg, matrix::Dense *y, + const Array *final_iter_nums) +{ + // TODO: tune block_size for optimal performance + constexpr auto block_size = default_block_size; + const auto num_rhs = residual_norm_collection->get_size()[1]; + const dim3 block_dim{block_size, 1, 1}; + const dim3 grid_dim{static_cast(ceildiv(num_rhs, block_size)), + 1, 1}; + + solve_upper_triangular_kernel( + grid_dim, block_dim, 0, exec->get_queue(), hessenberg->get_size()[1], + num_rhs, residual_norm_collection->get_const_values(), + residual_norm_collection->get_stride(), hessenberg->get_const_values(), + hessenberg->get_stride(), y->get_values(), y->get_stride(), + final_iter_nums->get_const_data()); +} + + +template +void calculate_qy(std::shared_ptr exec, + const matrix::Dense *krylov_bases, + const matrix::Dense *y, + matrix::Dense *before_preconditioner, + const Array *final_iter_nums) +{ + const auto num_rows = before_preconditioner->get_size()[0]; + const auto num_cols = krylov_bases->get_size()[1]; + const auto num_rhs = before_preconditioner->get_size()[1]; + const auto stride_before_preconditioner = + before_preconditioner->get_stride(); + + constexpr auto block_size = default_block_size; + const dim3 grid_dim{ + static_cast( + ceildiv(num_rows * stride_before_preconditioner, block_size)), + 1, 1}; + const dim3 block_dim{block_size, 1, 1}; + + + calculate_Qy_kernel( + grid_dim, block_dim, 0, exec->get_queue(), num_rows, num_cols, num_rhs, + krylov_bases->get_const_values(), krylov_bases->get_stride(), + y->get_const_values(), y->get_stride(), + before_preconditioner->get_values(), stride_before_preconditioner, + final_iter_nums->get_const_data()); + // Calculate qy + // before_preconditioner = krylov_bases * y +} + + +template +void step_2(std::shared_ptr exec, + const matrix::Dense *residual_norm_collection, + const matrix::Dense *krylov_bases, + const matrix::Dense *hessenberg, + matrix::Dense *y, + matrix::Dense *before_preconditioner, + const Array *final_iter_nums) +{ + solve_upper_triangular(exec, residual_norm_collection, hessenberg, y, + final_iter_nums); + calculate_qy(exec, krylov_bases, y, before_preconditioner, final_iter_nums); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_STEP_2_KERNEL); + + +} // namespace gmres +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/solver/idr_kernels.dp.cpp b/dpcpp/solver/idr_kernels.dp.cpp new file mode 100644 index 00000000000..a4f18019128 --- /dev/null +++ b/dpcpp/solver/idr_kernels.dp.cpp @@ -0,0 +1,871 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/idr_kernels.hpp" + + +#include +#include + + +#include +#include + + +#include +#include + + +#include "core/components/fill_array.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/onemkl_bindings.hpp" +#include "dpcpp/components/atomic.dp.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The IDR solver namespace. + * + * @ingroup idr + */ +namespace idr { + + +constexpr int default_block_size = 256; +constexpr int default_dot_dim = 16; +constexpr int default_dot_size = default_dot_dim * default_dot_dim; + + +template +void initialize_m_kernel(size_type subspace_dim, size_type nrhs, + ValueType *__restrict__ m_values, size_type m_stride, + stopping_status *__restrict__ stop_status, + sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row = global_id / m_stride; + const auto col = global_id % m_stride; + + if (global_id < nrhs) { + stop_status[global_id].reset(); + } + + if (row < subspace_dim && col < nrhs * subspace_dim) { + m_values[row * m_stride + col] = + (row == col / nrhs) ? one() : zero(); + } +} + +template +void initialize_m_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, + sycl::queue *stream, size_type subspace_dim, + size_type nrhs, ValueType *m_values, + size_type m_stride, stopping_status *stop_status) +{ + stream->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + initialize_m_kernel(subspace_dim, nrhs, m_values, m_stride, + stop_status, item_ct1); + }); + }); +} + + +template +void orthonormalize_subspace_vectors_kernel( + size_type num_rows, size_type num_cols, ValueType *__restrict__ values, + size_type stride, sycl::nd_item<3> item_ct1, + UninitializedArray &reduction_helper_array) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + + // they are not be used in the same time. + ValueType *reduction_helper = reduction_helper_array; + auto reduction_helper_real = + reinterpret_cast *>(reduction_helper); + + for (size_type row = 0; row < num_rows; row++) { + for (size_type i = 0; i < row; i++) { + auto dot = zero(); + for (size_type j = tidx; j < num_cols; j += block_size) { + dot += values[row * stride + j] * conj(values[i * stride + j]); + } + + // Ensure already finish reading this shared memory + item_ct1.barrier(sycl::access::fence_space::local_space); + reduction_helper[tidx] = dot; + ::gko::kernels::dpcpp::reduce( + group::this_thread_block(item_ct1), reduction_helper, + [](const ValueType &a, const ValueType &b) { return a + b; }); + item_ct1.barrier(sycl::access::fence_space::local_space); + + dot = reduction_helper[0]; + for (size_type j = tidx; j < num_cols; j += block_size) { + values[row * stride + j] -= dot * values[i * stride + j]; + } + } + + auto norm = zero>(); + for (size_type j = tidx; j < num_cols; j += block_size) { + norm += squared_norm(values[row * stride + j]); + } + + // Ensure already finish reading this shared memory + item_ct1.barrier(sycl::access::fence_space::local_space); + reduction_helper_real[tidx] = norm; + ::gko::kernels::dpcpp::reduce( + group::this_thread_block(item_ct1), reduction_helper_real, + [](const remove_complex &a, + const remove_complex &b) { return a + b; }); + item_ct1.barrier(sycl::access::fence_space::local_space); + + norm = std::sqrt(reduction_helper_real[0]); + for (size_type j = tidx; j < num_cols; j += block_size) { + values[row * stride + j] /= norm; + } + } +} + +template +void orthonormalize_subspace_vectors_kernel( + dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue *stream, + size_type num_rows, size_type num_cols, ValueType *values, size_type stride) +{ + stream->submit([&](sycl::handler &cgh) { + sycl::accessor, 0, + sycl::access_mode::read_write, + sycl::access::target::local> + reduction_helper_array_acc_ct1(cgh); + + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + orthonormalize_subspace_vectors_kernel( + num_rows, num_cols, values, stride, item_ct1, + *reduction_helper_array_acc_ct1.get_pointer()); + }); + }); +} + + +template +void solve_lower_triangular_kernel( + size_type subspace_dim, size_type nrhs, + const ValueType *__restrict__ m_values, size_type m_stride, + const ValueType *__restrict__ f_values, size_type f_stride, + ValueType *__restrict__ c_values, size_type c_stride, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + + if (global_id >= nrhs) { + return; + } + + if (!stop_status[global_id].has_stopped()) { + for (size_type row = 0; row < subspace_dim; row++) { + auto temp = f_values[row * f_stride + global_id]; + for (size_type col = 0; col < row; col++) { + temp -= m_values[row * m_stride + col * nrhs + global_id] * + c_values[col * c_stride + global_id]; + } + c_values[row * c_stride + global_id] = + temp / m_values[row * m_stride + row * nrhs + global_id]; + } + } +} + +template +void solve_lower_triangular_kernel( + dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue *stream, + size_type subspace_dim, size_type nrhs, const ValueType *m_values, + size_type m_stride, const ValueType *f_values, size_type f_stride, + ValueType *c_values, size_type c_stride, const stopping_status *stop_status) +{ + stream->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + solve_lower_triangular_kernel( + subspace_dim, nrhs, m_values, m_stride, f_values, f_stride, + c_values, c_stride, stop_status, item_ct1); + }); + }); +} + + +template +void step_1_kernel(size_type k, size_type num_rows, size_type subspace_dim, + size_type nrhs, + const ValueType *__restrict__ residual_values, + size_type residual_stride, + const ValueType *__restrict__ c_values, size_type c_stride, + const ValueType *__restrict__ g_values, size_type g_stride, + ValueType *__restrict__ v_values, size_type v_stride, + const stopping_status *__restrict__ stop_status, + sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row = global_id / nrhs; + const auto col = global_id % nrhs; + + if (row >= num_rows) { + return; + } + + if (!stop_status[col].has_stopped()) { + auto temp = residual_values[row * residual_stride + col]; + for (size_type j = k; j < subspace_dim; j++) { + temp -= c_values[j * c_stride + col] * + g_values[row * g_stride + j * nrhs + col]; + } + v_values[row * v_stride + col] = temp; + } +} + +template +void step_1_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, + sycl::queue *stream, size_type k, size_type num_rows, + size_type subspace_dim, size_type nrhs, + const ValueType *residual_values, size_type residual_stride, + const ValueType *c_values, size_type c_stride, + const ValueType *g_values, size_type g_stride, + ValueType *v_values, size_type v_stride, + const stopping_status *stop_status) +{ + stream->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + step_1_kernel(k, num_rows, subspace_dim, nrhs, residual_values, + residual_stride, c_values, c_stride, g_values, + g_stride, v_values, v_stride, stop_status, + item_ct1); + }); + }); +} + + +template +void step_2_kernel(size_type k, size_type num_rows, size_type subspace_dim, + size_type nrhs, const ValueType *__restrict__ omega_values, + const ValueType *__restrict__ v_values, size_type v_stride, + const ValueType *__restrict__ c_values, size_type c_stride, + ValueType *__restrict__ u_values, size_type u_stride, + const stopping_status *__restrict__ stop_status, + sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row = global_id / nrhs; + const auto col = global_id % nrhs; + + if (row >= num_rows) { + return; + } + + if (!stop_status[col].has_stopped()) { + auto temp = omega_values[col] * v_values[row * v_stride + col]; + for (size_type j = k; j < subspace_dim; j++) { + temp += c_values[j * c_stride + col] * + u_values[row * u_stride + j * nrhs + col]; + } + u_values[row * u_stride + k * nrhs + col] = temp; + } +} + +template +void step_2_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, + sycl::queue *stream, size_type k, size_type num_rows, + size_type subspace_dim, size_type nrhs, + const ValueType *omega_values, const ValueType *v_values, + size_type v_stride, const ValueType *c_values, + size_type c_stride, ValueType *u_values, size_type u_stride, + const stopping_status *stop_status) +{ + stream->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + step_2_kernel(k, num_rows, subspace_dim, nrhs, omega_values, + v_values, v_stride, c_values, c_stride, u_values, + u_stride, stop_status, item_ct1); + }); + }); +} + + +template +void multidot_kernel( + size_type num_rows, size_type nrhs, const ValueType *__restrict__ p_i, + const ValueType *__restrict__ g_k, size_type g_k_stride, + ValueType *__restrict__ alpha, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray + &reduction_helper_array) +{ + const auto tidx = item_ct1.get_local_id(2); + const auto tidy = item_ct1.get_local_id(1); + const auto rhs = item_ct1.get_group(2) * default_dot_dim + tidx; + const auto num = ceildiv(num_rows, item_ct1.get_group_range(1)); + const auto start_row = item_ct1.get_group(1) * num; + const auto end_row = ((item_ct1.get_group(1) + 1) * num > num_rows) + ? num_rows + : (item_ct1.get_group(1) + 1) * num; + // Used that way to get around dynamic initialization warning and + // template error when using `reduction_helper_array` directly in `reduce` + ValueType *__restrict__ reduction_helper = reduction_helper_array; + + ValueType local_res = zero(); + if (rhs < nrhs && !stop_status[rhs].has_stopped()) { + for (size_type i = start_row + tidy; i < end_row; + i += default_dot_dim) { + const auto g_idx = i * g_k_stride + rhs; + local_res += p_i[i] * g_k[g_idx]; + } + } + reduction_helper[tidx * (default_dot_dim + 1) + tidy] = local_res; + item_ct1.barrier(sycl::access::fence_space::local_space); + local_res = reduction_helper[tidy * (default_dot_dim + 1) + tidx]; + const auto tile_block = group::tiled_partition( + group::this_thread_block(item_ct1)); + const auto sum = ::gko::kernels::dpcpp::reduce( + tile_block, local_res, + [](const ValueType &a, const ValueType &b) { return a + b; }); + const auto new_rhs = item_ct1.get_group(2) * default_dot_dim + tidy; + if (tidx == 0 && new_rhs < nrhs && !stop_status[new_rhs].has_stopped()) { + atomic_add(alpha + new_rhs, sum); + } +} + +template +void multidot_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, + sycl::queue *stream, size_type num_rows, size_type nrhs, + const ValueType *p_i, const ValueType *g_k, + size_type g_k_stride, ValueType *alpha, + const stopping_status *stop_status) +{ + stream->submit([&](sycl::handler &cgh) { + sycl::accessor, + 0, sycl::access_mode::read_write, + sycl::access::target::local> + reduction_helper_array_acc_ct1(cgh); + + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + multidot_kernel(num_rows, nrhs, p_i, g_k, g_k_stride, alpha, + stop_status, item_ct1, + *reduction_helper_array_acc_ct1.get_pointer()); + }); + }); +} + + +template +void update_g_k_and_u_kernel( + size_type k, size_type i, size_type size, size_type nrhs, + const ValueType *__restrict__ alpha, const ValueType *__restrict__ m_values, + size_type m_stride, const ValueType *__restrict__ g_values, + size_type g_stride, ValueType *__restrict__ g_k_values, + size_type g_k_stride, ValueType *__restrict__ u_values, size_type u_stride, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + const auto row = tidx / g_k_stride; + const auto rhs = tidx % g_k_stride; + + if (row >= size || rhs >= nrhs) { + return; + } + + if (!stop_status[rhs].has_stopped()) { + const auto fact = alpha[rhs] / m_values[i * m_stride + i * nrhs + rhs]; + g_k_values[row * g_k_stride + rhs] -= + fact * g_values[row * g_stride + i * nrhs + rhs]; + u_values[row * u_stride + k * nrhs + rhs] -= + fact * u_values[row * u_stride + i * nrhs + rhs]; + } +} + +template +void update_g_k_and_u_kernel(dim3 grid, dim3 block, + size_t dynamic_shared_memory, sycl::queue *stream, + size_type k, size_type i, size_type size, + size_type nrhs, const ValueType *alpha, + const ValueType *m_values, size_type m_stride, + const ValueType *g_values, size_type g_stride, + ValueType *g_k_values, size_type g_k_stride, + ValueType *u_values, size_type u_stride, + const stopping_status *stop_status) +{ + stream->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + update_g_k_and_u_kernel( + k, i, size, nrhs, alpha, m_values, m_stride, + g_values, g_stride, g_k_values, g_k_stride, + u_values, u_stride, stop_status, item_ct1); + }); + }); +} + + +template +void update_g_kernel(size_type k, size_type size, size_type nrhs, + const ValueType *__restrict__ g_k_values, + size_type g_k_stride, ValueType *__restrict__ g_values, + size_type g_stride, + const stopping_status *__restrict__ stop_status, + sycl::nd_item<3> item_ct1) +{ + const auto tidx = thread::get_thread_id_flat(item_ct1); + const auto row = tidx / g_k_stride; + const auto rhs = tidx % nrhs; + + if (row >= size || rhs >= nrhs) { + return; + } + + if (!stop_status[rhs].has_stopped()) { + g_values[row * g_stride + k * nrhs + rhs] = + g_k_values[row * g_k_stride + rhs]; + } +} + +template +void update_g_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, + sycl::queue *stream, size_type k, size_type size, + size_type nrhs, const ValueType *g_k_values, + size_type g_k_stride, ValueType *g_values, + size_type g_stride, const stopping_status *stop_status) +{ + stream->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + update_g_kernel(k, size, nrhs, g_k_values, + g_k_stride, g_values, g_stride, + stop_status, item_ct1); + }); + }); +} + + +template +void update_x_r_and_f_kernel( + size_type k, size_type size, size_type subspace_dim, size_type nrhs, + const ValueType *__restrict__ m_values, size_type m_stride, + const ValueType *__restrict__ g_values, size_type g_stride, + const ValueType *__restrict__ u_values, size_type u_stride, + ValueType *__restrict__ f_values, size_type f_stride, + ValueType *__restrict__ r_values, size_type r_stride, + ValueType *__restrict__ x_values, size_type x_stride, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + const auto row = global_id / x_stride; + const auto col = global_id % x_stride; + + if (row >= size || col >= nrhs) { + return; + } + + if (!stop_status[col].has_stopped()) { + const auto beta = f_values[k * f_stride + col] / + m_values[k * m_stride + k * nrhs + col]; + r_values[row * r_stride + col] -= + beta * g_values[row * g_stride + k * nrhs + col]; + x_values[row * x_stride + col] += + beta * u_values[row * u_stride + k * nrhs + col]; + + if (k < row && k + 1 < subspace_dim && row < subspace_dim) { + f_values[row * f_stride + col] -= + beta * m_values[row * m_stride + k * nrhs + col]; + } + } +} + +template +void update_x_r_and_f_kernel( + dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue *stream, + size_type k, size_type size, size_type subspace_dim, size_type nrhs, + const ValueType *m_values, size_type m_stride, const ValueType *g_values, + size_type g_stride, const ValueType *u_values, size_type u_stride, + ValueType *f_values, size_type f_stride, ValueType *r_values, + size_type r_stride, ValueType *x_values, size_type x_stride, + const stopping_status *stop_status) +{ + stream->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + update_x_r_and_f_kernel( + k, size, subspace_dim, nrhs, m_values, m_stride, g_values, + g_stride, u_values, u_stride, f_values, f_stride, r_values, + r_stride, x_values, x_stride, stop_status, item_ct1); + }); + }); +} + + +template +void compute_omega_kernel( + size_type nrhs, const remove_complex kappa, + const ValueType *__restrict__ tht, + const remove_complex *__restrict__ residual_norm, + ValueType *__restrict__ omega, + const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) +{ + const auto global_id = thread::get_thread_id_flat(item_ct1); + + if (global_id >= nrhs) { + return; + } + + if (!stop_status[global_id].has_stopped()) { + auto thr = omega[global_id]; + omega[global_id] /= tht[global_id]; + auto absrho = std::abs( + thr / (std::sqrt(real(tht[global_id])) * residual_norm[global_id])); + + if (absrho < kappa) { + omega[global_id] *= kappa / absrho; + } + } +} + +template +void compute_omega_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, + sycl::queue *stream, size_type nrhs, + const remove_complex kappa, + const ValueType *tht, + const remove_complex *residual_norm, + ValueType *omega, const stopping_status *stop_status) +{ + stream->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + compute_omega_kernel(nrhs, kappa, tht, residual_norm, omega, + stop_status, item_ct1); + }); + }); +} + + +namespace { + + +template +void initialize_m(std::shared_ptr exec, + const size_type nrhs, matrix::Dense *m, + Array *stop_status) +{ + const auto subspace_dim = m->get_size()[0]; + const auto m_stride = m->get_stride(); + + const auto grid_dim = ceildiv(m_stride * subspace_dim, default_block_size); + initialize_m_kernel(grid_dim, default_block_size, 0, exec->get_queue(), + subspace_dim, nrhs, m->get_values(), m_stride, + stop_status->get_data()); +} + + +template +void initialize_subspace_vectors(std::shared_ptr exec, + matrix::Dense *subspace_vectors, + bool deterministic) +{ + if (deterministic) { + auto subspace_vectors_data = matrix_data( + subspace_vectors->get_size(), + std::normal_distribution>(0.0, 1.0), + std::ranlux48(15)); + subspace_vectors->read(subspace_vectors_data); + } else { + auto seed = time(NULL); + auto work = reinterpret_cast *>( + subspace_vectors->get_values()); + auto n = + subspace_vectors->get_size()[0] * subspace_vectors->get_stride(); + n = is_complex() ? 2 * n : n; + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>(n), [=](sycl::item<1> idx) { + std::uint64_t offset = idx.get_linear_id(); + oneapi::dpl::minstd_rand engine(seed, offset); + oneapi::dpl::normal_distribution> + distr(0, 1); + auto res = distr(engine); + + work[idx] = res; + }); + }); + } +} + + +template +void orthonormalize_subspace_vectors(std::shared_ptr exec, + matrix::Dense *subspace_vectors) +{ + orthonormalize_subspace_vectors_kernel( + 1, default_block_size, 0, exec->get_queue(), + subspace_vectors->get_size()[0], subspace_vectors->get_size()[1], + subspace_vectors->get_values(), subspace_vectors->get_stride()); +} + + +template +void solve_lower_triangular(std::shared_ptr exec, + const size_type nrhs, + const matrix::Dense *m, + const matrix::Dense *f, + matrix::Dense *c, + const Array *stop_status) +{ + const auto subspace_dim = m->get_size()[0]; + + const auto grid_dim = ceildiv(nrhs, default_block_size); + solve_lower_triangular_kernel( + grid_dim, default_block_size, 0, exec->get_queue(), subspace_dim, nrhs, + m->get_const_values(), m->get_stride(), f->get_const_values(), + f->get_stride(), c->get_values(), c->get_stride(), + stop_status->get_const_data()); +} + + +template +void update_g_and_u(std::shared_ptr exec, + const size_type nrhs, const size_type k, + const matrix::Dense *p, + const matrix::Dense *m, + matrix::Dense *alpha, + matrix::Dense *g, matrix::Dense *g_k, + matrix::Dense *u, + const Array *stop_status) +{ + const auto size = g->get_size()[0]; + const auto p_stride = p->get_stride(); + + const dim3 grid_dim(ceildiv(nrhs, default_dot_dim), + exec->get_num_computing_units() * 2); + const dim3 block_dim(default_dot_dim, default_dot_dim); + + for (size_type i = 0; i < k; i++) { + const auto p_i = p->get_const_values() + i * p_stride; + if (nrhs > 1 || is_complex()) { + components::fill_array(exec, alpha->get_values(), nrhs, + zero()); + multidot_kernel(grid_dim, block_dim, 0, exec->get_queue(), size, + nrhs, p_i, g_k->get_values(), g_k->get_stride(), + alpha->get_values(), stop_status->get_const_data()); + } else { + onemkl::dot(*exec->get_queue(), size, p_i, 1, g_k->get_values(), + g_k->get_stride(), alpha->get_values()); + } + update_g_k_and_u_kernel( + ceildiv(size * g_k->get_stride(), default_block_size), + default_block_size, 0, exec->get_queue(), k, i, size, nrhs, + alpha->get_const_values(), m->get_const_values(), m->get_stride(), + g->get_const_values(), g->get_stride(), g_k->get_values(), + g_k->get_stride(), u->get_values(), u->get_stride(), + stop_status->get_const_data()); + } + update_g_kernel( + ceildiv(size * g_k->get_stride(), default_block_size), + default_block_size, 0, exec->get_queue(), k, size, nrhs, + g_k->get_const_values(), g_k->get_stride(), g->get_values(), + g->get_stride(), stop_status->get_const_data()); +} + + +template +void update_m(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *p, + const matrix::Dense *g_k, matrix::Dense *m, + const Array *stop_status) +{ + const auto size = g_k->get_size()[0]; + const auto subspace_dim = m->get_size()[0]; + const auto p_stride = p->get_stride(); + const auto m_stride = m->get_stride(); + + const dim3 grid_dim(ceildiv(nrhs, default_dot_dim), + exec->get_num_computing_units() * 2); + const dim3 block_dim(default_dot_dim, default_dot_dim); + + for (size_type i = k; i < subspace_dim; i++) { + const auto p_i = p->get_const_values() + i * p_stride; + auto m_i = m->get_values() + i * m_stride + k * nrhs; + if (nrhs > 1 || is_complex()) { + components::fill_array(exec, m_i, nrhs, zero()); + multidot_kernel(grid_dim, block_dim, 0, exec->get_queue(), size, + nrhs, p_i, g_k->get_const_values(), + g_k->get_stride(), m_i, + stop_status->get_const_data()); + } else { + onemkl::dot(*exec->get_queue(), size, p_i, 1, + g_k->get_const_values(), g_k->get_stride(), m_i); + } + } +} + + +template +void update_x_r_and_f(std::shared_ptr exec, + const size_type nrhs, const size_type k, + const matrix::Dense *m, + const matrix::Dense *g, + const matrix::Dense *u, + matrix::Dense *f, matrix::Dense *r, + matrix::Dense *x, + const Array *stop_status) +{ + const auto size = x->get_size()[0]; + const auto subspace_dim = m->get_size()[0]; + + const auto grid_dim = ceildiv(size * x->get_stride(), default_block_size); + update_x_r_and_f_kernel(grid_dim, default_block_size, 0, exec->get_queue(), + k, size, subspace_dim, nrhs, m->get_const_values(), + m->get_stride(), g->get_const_values(), + g->get_stride(), u->get_const_values(), + u->get_stride(), f->get_values(), f->get_stride(), + r->get_values(), r->get_stride(), x->get_values(), + x->get_stride(), stop_status->get_const_data()); + components::fill_array(exec, f->get_values() + k * f->get_stride(), nrhs, + zero()); +} + + +} // namespace + + +template +void initialize(std::shared_ptr exec, const size_type nrhs, + matrix::Dense *m, + matrix::Dense *subspace_vectors, bool deterministic, + Array *stop_status) +{ + initialize_m(exec, nrhs, m, stop_status); + initialize_subspace_vectors(exec, subspace_vectors, deterministic); + orthonormalize_subspace_vectors(exec, subspace_vectors); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); + + +template +void step_1(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *m, + const matrix::Dense *f, + const matrix::Dense *residual, + const matrix::Dense *g, matrix::Dense *c, + matrix::Dense *v, + const Array *stop_status) +{ + solve_lower_triangular(exec, nrhs, m, f, c, stop_status); + + const auto num_rows = v->get_size()[0]; + const auto subspace_dim = m->get_size()[0]; + + const auto grid_dim = ceildiv(nrhs * num_rows, default_block_size); + step_1_kernel(grid_dim, default_block_size, 0, exec->get_queue(), k, + num_rows, subspace_dim, nrhs, residual->get_const_values(), + residual->get_stride(), c->get_const_values(), + c->get_stride(), g->get_const_values(), g->get_stride(), + v->get_values(), v->get_stride(), + stop_status->get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); + + +template +void step_2(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *omega, + const matrix::Dense *preconditioned_vector, + const matrix::Dense *c, matrix::Dense *u, + const Array *stop_status) +{ + const auto num_rows = preconditioned_vector->get_size()[0]; + const auto subspace_dim = u->get_size()[1] / nrhs; + + const auto grid_dim = ceildiv(nrhs * num_rows, default_block_size); + step_2_kernel(grid_dim, default_block_size, 0, exec->get_queue(), k, + num_rows, subspace_dim, nrhs, omega->get_const_values(), + preconditioned_vector->get_const_values(), + preconditioned_vector->get_stride(), c->get_const_values(), + c->get_stride(), u->get_values(), u->get_stride(), + stop_status->get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); + + +template +void step_3(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *p, + matrix::Dense *g, matrix::Dense *g_k, + matrix::Dense *u, matrix::Dense *m, + matrix::Dense *f, matrix::Dense *alpha, + matrix::Dense *residual, matrix::Dense *x, + const Array *stop_status) +{ + update_g_and_u(exec, nrhs, k, p, m, alpha, g, g_k, u, stop_status); + update_m(exec, nrhs, k, p, g_k, m, stop_status); + update_x_r_and_f(exec, nrhs, k, m, g, u, f, residual, x, stop_status); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); + + +template +void compute_omega( + std::shared_ptr exec, const size_type nrhs, + const remove_complex kappa, const matrix::Dense *tht, + const matrix::Dense> *residual_norm, + matrix::Dense *omega, const Array *stop_status) +{ + const auto grid_dim = ceildiv(nrhs, config::warp_size); + compute_omega_kernel(grid_dim, config::warp_size, 0, exec->get_queue(), + nrhs, kappa, tht->get_const_values(), + residual_norm->get_const_values(), omega->get_values(), + stop_status->get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL); + + +} // namespace idr +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/solver/lower_trs_kernels.dp.cpp b/dpcpp/solver/lower_trs_kernels.dp.cpp new file mode 100644 index 00000000000..7144108593f --- /dev/null +++ b/dpcpp/solver/lower_trs_kernels.dp.cpp @@ -0,0 +1,107 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/lower_trs_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The LOWER_TRS solver namespace. + * + * @ingroup lower_trs + */ +namespace lower_trs { + + +void should_perform_transpose(std::shared_ptr exec, + bool &do_transpose) GKO_NOT_IMPLEMENTED; + + +void init_struct(std::shared_ptr exec, + std::shared_ptr &solve_struct) +{ + // This init kernel is here to allow initialization of the solve struct for + // a more sophisticated implementation as for other executors. +} + + +template +void generate(std::shared_ptr exec, + const matrix::Csr *matrix, + solver::SolveStruct *solve_struct, const gko::size_type num_rhs) +{ + // This generate kernel is here to allow for a more sophisticated + // implementation as for other executors. This kernel would perform the + // "analysis" phase for the triangular matrix. +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_LOWER_TRS_GENERATE_KERNEL); + + +/** + * The parameters trans_x and trans_b are used only in the CUDA executor for + * versions <=9.1 due to a limitation in the cssrsm_solve algorithm + */ +template +void solve(std::shared_ptr exec, + const matrix::Csr *matrix, + const solver::SolveStruct *solve_struct, + matrix::Dense *trans_b, matrix::Dense *trans_x, + const matrix::Dense *b, + matrix::Dense *x) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL); + + +} // namespace lower_trs +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/solver/upper_trs_kernels.dp.cpp b/dpcpp/solver/upper_trs_kernels.dp.cpp new file mode 100644 index 00000000000..cc1d40f711d --- /dev/null +++ b/dpcpp/solver/upper_trs_kernels.dp.cpp @@ -0,0 +1,110 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/upper_trs_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The UPPER_TRS solver namespace. + * + * @ingroup upper_trs + */ +namespace upper_trs { + + +void should_perform_transpose(std::shared_ptr exec, + bool &do_transpose) +{ + do_transpose = false; +} + + +void init_struct(std::shared_ptr exec, + std::shared_ptr &solve_struct) +{ + // This init kernel is here to allow initialization of the solve struct for + // a more sophisticated implementation as for other executors. +} + + +template +void generate(std::shared_ptr exec, + const matrix::Csr *matrix, + solver::SolveStruct *solve_struct, const gko::size_type num_rhs) +{ + // This generate kernel is here to allow for a more sophisticated + // implementation as for other executors. This kernel would perform the + // "analysis" phase for the triangular matrix. +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_UPPER_TRS_GENERATE_KERNEL); + + +/** + * The parameters trans_x and trans_b are used only in the CUDA executor for + * versions <=9.1 due to a limitation in the cssrsm_solve algorithm + */ +template +void solve(std::shared_ptr exec, + const matrix::Csr *matrix, + const solver::SolveStruct *solve_struct, + matrix::Dense *trans_b, matrix::Dense *trans_x, + const matrix::Dense *b, + matrix::Dense *x) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_UPPER_TRS_SOLVE_KERNEL); + + +} // namespace upper_trs +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/stop/criterion_kernels.dp.cpp b/dpcpp/stop/criterion_kernels.dp.cpp new file mode 100644 index 00000000000..46dc6243ad8 --- /dev/null +++ b/dpcpp/stop/criterion_kernels.dp.cpp @@ -0,0 +1,71 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/stop/criterion_kernels.hpp" + + +#include + + +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Setting of all statuses namespace. + * @ref set_status + * @ingroup set_all_statuses + */ +namespace set_all_statuses { + + +void set_all_statuses(std::shared_ptr exec, + uint8 stoppingId, bool setFinalized, + Array *stop_status) +{ + auto size = stop_status->get_num_elems(); + stopping_status *__restrict__ stop_status_ptr = stop_status->get_data(); + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{size}, [=](sycl::id<1> idx_id) { + const auto idx = idx_id[0]; + stop_status_ptr[idx].stop(stoppingId, setFinalized); + }); + }); +} + + +} // namespace set_all_statuses +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/stop/residual_norm_kernels.dp.cpp b/dpcpp/stop/residual_norm_kernels.dp.cpp new file mode 100644 index 00000000000..a527ec4c564 --- /dev/null +++ b/dpcpp/stop/residual_norm_kernels.dp.cpp @@ -0,0 +1,165 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/stop/residual_norm_kernels.hpp" + + +#include + + +#include +#include +#include + + +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Residual norm stopping criterion namespace. + * @ref resnorm + * @ingroup resnorm + */ +namespace residual_norm { + + +template +void residual_norm(std::shared_ptr exec, + const matrix::Dense *tau, + const matrix::Dense *orig_tau, + ValueType rel_residual_goal, uint8 stoppingId, + bool setFinalized, Array *stop_status, + Array *device_storage, bool *all_converged, + bool *one_changed) +{ + static_assert(is_complex_s::value == false, + "ValueType must not be complex in this function!"); + auto device_storage_val = device_storage->get_data(); + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{1}, [=](sycl::id<1>) { + device_storage_val[0] = true; + device_storage_val[1] = false; + }); + }); + + auto orig_tau_val = orig_tau->get_const_values(); + auto tau_val = tau->get_const_values(); + auto stop_status_val = stop_status->get_data(); + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<1>{tau->get_size()[1]}, [=](sycl::id<1> idx_id) { + const auto tidx = idx_id[0]; + if (tau_val[tidx] < rel_residual_goal * orig_tau_val[tidx]) { + stop_status_val[tidx].converge(stoppingId, setFinalized); + device_storage_val[1] = true; + } + // because only false is written to all_converged, write + // conflicts should not cause any problem + else if (!stop_status_val[tidx].has_stopped()) { + device_storage_val[0] = false; + } + }); + }); + + /* Represents all_converged, one_changed */ + *all_converged = exec->copy_val_to_host(device_storage->get_const_data()); + *one_changed = exec->copy_val_to_host(device_storage->get_const_data() + 1); +} + +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE( + GKO_DECLARE_RESIDUAL_NORM_KERNEL); + + +} // namespace residual_norm + + +/** + * @brief The Implicit Residual norm stopping criterion. + * @ref implicit_resnorm + * @ingroup resnorm + */ +namespace implicit_residual_norm { + + +template +void implicit_residual_norm( + std::shared_ptr exec, + const matrix::Dense *tau, + const matrix::Dense> *orig_tau, + remove_complex rel_residual_goal, uint8 stoppingId, + bool setFinalized, Array *stop_status, + Array *device_storage, bool *all_converged, bool *one_changed) +{ + auto device_storage_val = device_storage->get_data(); + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{1}, [=](sycl::id<1>) { + device_storage_val[0] = true; + device_storage_val[1] = false; + }); + }); + + auto orig_tau_val = orig_tau->get_const_values(); + auto tau_val = tau->get_const_values(); + auto stop_status_val = stop_status->get_data(); + exec->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<1>{tau->get_size()[1]}, [=](sycl::id<1> idx_id) { + const auto tidx = idx_id[0]; + if (std::sqrt(std::abs(tau_val[tidx])) < + rel_residual_goal * orig_tau_val[tidx]) { + stop_status_val[tidx].converge(stoppingId, setFinalized); + device_storage_val[1] = true; + } + // because only false is written to all_converged, write + // conflicts should not cause any problem + else if (!stop_status_val[tidx].has_stopped()) { + device_storage_val[0] = false; + } + }); + }); + + /* Represents all_converged, one_changed */ + *all_converged = exec->copy_val_to_host(device_storage->get_const_data()); + *one_changed = exec->copy_val_to_host(device_storage->get_const_data() + 1); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL); + + +} // namespace implicit_residual_norm +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/test/CMakeLists.txt b/dpcpp/test/CMakeLists.txt new file mode 100644 index 00000000000..8ccd05b0518 --- /dev/null +++ b/dpcpp/test/CMakeLists.txt @@ -0,0 +1,8 @@ +include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) +set(GINKGO_COMPILING_DPCPP_TEST ON) + +add_subdirectory(base) +add_subdirectory(components) +add_subdirectory(matrix) +add_subdirectory(solver) +add_subdirectory(stop) diff --git a/dpcpp/test/base/CMakeLists.txt b/dpcpp/test/base/CMakeLists.txt new file mode 100644 index 00000000000..bb9c8a75050 --- /dev/null +++ b/dpcpp/test/base/CMakeLists.txt @@ -0,0 +1,5 @@ +ginkgo_create_dpcpp_test(executor) +ginkgo_create_dpcpp_test(dim3) +ginkgo_create_dpcpp_test(kernel_launch) +# set correct flags for kernel_launch.hpp +target_compile_definitions(dpcpp_test_base_kernel_launch PRIVATE GKO_COMPILING_DPCPP) diff --git a/dpcpp/test/base/dim3.dp.cpp b/dpcpp/test/base/dim3.dp.cpp new file mode 100644 index 00000000000..8d5485749b8 --- /dev/null +++ b/dpcpp/test/base/dim3.dp.cpp @@ -0,0 +1,108 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "dpcpp/base/dim3.dp.hpp" + + +#include + + +#include + + +namespace { + + +using namespace gko::kernels::dpcpp; + + +TEST(DpcppDim3, CanGenerate1DRange) +{ + dim3 block(3); + auto sycl_block = block.get_range(); + + ASSERT_EQ(block.x, 3); + ASSERT_EQ(block.y, 1); + ASSERT_EQ(block.z, 1); + ASSERT_EQ(sycl_block.get(0), 1); + ASSERT_EQ(sycl_block.get(1), 1); + ASSERT_EQ(sycl_block.get(2), 3); +} + + +TEST(DpcppDim3, CanGenerate2DRange) +{ + dim3 block(3, 5); + auto sycl_block = block.get_range(); + + ASSERT_EQ(block.x, 3); + ASSERT_EQ(block.y, 5); + ASSERT_EQ(block.z, 1); + ASSERT_EQ(sycl_block.get(0), 1); + ASSERT_EQ(sycl_block.get(1), 5); + ASSERT_EQ(sycl_block.get(2), 3); +} + + +TEST(DpcppDim3, CanGenerate3DRange) +{ + dim3 block(3, 5, 7); + auto sycl_block = block.get_range(); + + ASSERT_EQ(block.x, 3); + ASSERT_EQ(block.y, 5); + ASSERT_EQ(block.z, 7); + ASSERT_EQ(sycl_block.get(0), 7); + ASSERT_EQ(sycl_block.get(1), 5); + ASSERT_EQ(sycl_block.get(2), 3); +} + + +TEST(DpcppDim3, CanGenerateNDRange) +{ + dim3 block(3, 5, 7); + dim3 grid(17, 13, 11); + + auto ndrange = sycl_nd_range(grid, block); + auto global_size = ndrange.get_global_range(); + auto local_size = ndrange.get_local_range(); + + ASSERT_EQ(local_size.get(0), 7); + ASSERT_EQ(local_size.get(1), 5); + ASSERT_EQ(local_size.get(2), 3); + ASSERT_EQ(global_size.get(0), 7 * 11); + ASSERT_EQ(global_size.get(1), 5 * 13); + ASSERT_EQ(global_size.get(2), 3 * 17); +} + + +} // namespace diff --git a/dpcpp/test/base/executor.dp.cpp b/dpcpp/test/base/executor.dp.cpp new file mode 100644 index 00000000000..93f52a7d1f1 --- /dev/null +++ b/dpcpp/test/base/executor.dp.cpp @@ -0,0 +1,292 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include + + +#include + + +#include +#include +#include + + +namespace { + + +class DpcppExecutor : public ::testing::Test { +protected: + DpcppExecutor() + : ref(gko::ReferenceExecutor::create()), dpcpp(nullptr), dpcpp2(nullptr) + {} + + void SetUp() + { + if (gko::DpcppExecutor::get_num_devices("gpu") > 0) { + dpcpp = gko::DpcppExecutor::create(0, ref, "gpu"); + if (gko::DpcppExecutor::get_num_devices("gpu") > 1) { + dpcpp2 = gko::DpcppExecutor::create(1, ref, "gpu"); + } + } else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) { + dpcpp = gko::DpcppExecutor::create(0, ref, "cpu"); + if (gko::DpcppExecutor::get_num_devices("cpu") > 1) { + dpcpp2 = gko::DpcppExecutor::create(1, ref, "cpu"); + } + } else { + GKO_NOT_IMPLEMENTED; + } + } + + void TearDown() + { + // ensure that previous calls finished and didn't throw an error + ASSERT_NO_THROW(dpcpp->synchronize()); + if (dpcpp2 != nullptr) { + ASSERT_NO_THROW(dpcpp2->synchronize()); + } + } + + std::shared_ptr ref{}; + std::shared_ptr dpcpp{}; + std::shared_ptr dpcpp2{}; +}; + + +TEST_F(DpcppExecutor, CanInstantiateTwoExecutorsOnOneDevice) +{ + auto dpcpp = gko::DpcppExecutor::create(0, ref); + if (dpcpp2 != nullptr) { + auto dpcpp2 = gko::DpcppExecutor::create(0, ref); + } + + // We want automatic deinitialization to not create any error +} + + +TEST_F(DpcppExecutor, CanGetExecInfo) +{ + dpcpp = gko::DpcppExecutor::create(0, ref); + + ASSERT_TRUE(dpcpp->get_num_computing_units() > 0); + ASSERT_TRUE(dpcpp->get_subgroup_sizes().size() > 0); + ASSERT_TRUE(dpcpp->get_max_workitem_sizes().size() > 0); + ASSERT_TRUE(dpcpp->get_max_workgroup_size() > 0); + ASSERT_TRUE(dpcpp->get_max_subgroup_size() > 0); +} + + +TEST_F(DpcppExecutor, KnowsNumberOfDevicesOfTypeAll) +{ + auto count = sycl::device::get_devices(sycl::info::device_type::all).size(); + + auto num_devices = gko::DpcppExecutor::get_num_devices("all"); + + ASSERT_EQ(count, num_devices); +} + + +TEST_F(DpcppExecutor, KnowsNumberOfDevicesOfTypeCPU) +{ + auto count = sycl::device::get_devices(sycl::info::device_type::cpu).size(); + + auto num_devices = gko::DpcppExecutor::get_num_devices("cpu"); + + ASSERT_EQ(count, num_devices); +} + + +TEST_F(DpcppExecutor, KnowsNumberOfDevicesOfTypeGPU) +{ + auto count = sycl::device::get_devices(sycl::info::device_type::gpu).size(); + + auto num_devices = gko::DpcppExecutor::get_num_devices("gpu"); + + ASSERT_EQ(count, num_devices); +} + + +TEST_F(DpcppExecutor, KnowsNumberOfDevicesOfTypeAccelerator) +{ + auto count = + sycl::device::get_devices(sycl::info::device_type::accelerator).size(); + + auto num_devices = gko::DpcppExecutor::get_num_devices("accelerator"); + + ASSERT_EQ(count, num_devices); +} + + +TEST_F(DpcppExecutor, AllocatesAndFreesMemory) +{ + int *ptr = nullptr; + + ASSERT_NO_THROW(ptr = dpcpp->alloc(2)); + ASSERT_NO_THROW(dpcpp->free(ptr)); +} + + +TEST_F(DpcppExecutor, FailsWhenOverallocating) +{ + const gko::size_type num_elems = 1ll << 50; // 4PB of integers + int *ptr = nullptr; + + ASSERT_THROW( + { + ptr = dpcpp->alloc(num_elems); + dpcpp->synchronize(); + }, + gko::AllocationError); + + dpcpp->free(ptr); +} + + +void check_data(int *data, bool *result) +{ + *result = false; + if (data[0] == 3 && data[1] == 8) { + *result = true; + } +} + +TEST_F(DpcppExecutor, CopiesDataToCPU) +{ + int orig[] = {3, 8}; + auto *copy = dpcpp->alloc(2); + gko::Array is_set(ref, 1); + + dpcpp->copy_from(ref.get(), 2, orig, copy); + + is_set.set_executor(dpcpp); + ASSERT_NO_THROW(dpcpp->synchronize()); + ASSERT_NO_THROW(dpcpp->get_queue()->submit([&](sycl::handler &cgh) { + auto *is_set_ptr = is_set.get_data(); + cgh.single_task([=]() { check_data(copy, is_set_ptr); }); + })); + is_set.set_executor(ref); + ASSERT_EQ(*is_set.get_data(), true); + ASSERT_NO_THROW(dpcpp->synchronize()); + dpcpp->free(copy); +} + +void init_data(int *data) +{ + data[0] = 3; + data[1] = 8; +} + +TEST_F(DpcppExecutor, CopiesDataFromCPU) +{ + int copy[2]; + auto orig = dpcpp->alloc(2); + dpcpp->get_queue()->submit([&](sycl::handler &cgh) { + cgh.single_task([=]() { init_data(orig); }); + }); + + ref->copy_from(dpcpp.get(), 2, orig, copy); + + EXPECT_EQ(3, copy[0]); + ASSERT_EQ(8, copy[1]); + dpcpp->free(orig); +} + + +TEST_F(DpcppExecutor, CopiesDataFromDpcppToDpcpp) +{ + if (dpcpp2 == nullptr) { + GTEST_SKIP(); + } + + int copy[2]; + gko::Array is_set(ref, 1); + auto orig = dpcpp->alloc(2); + dpcpp->get_queue()->submit([&](sycl::handler &cgh) { + cgh.single_task([=]() { init_data(orig); }); + }); + + auto copy_dpcpp2 = dpcpp2->alloc(2); + dpcpp2->copy_from(dpcpp.get(), 2, orig, copy_dpcpp2); + // Check that the data is really on GPU + is_set.set_executor(dpcpp2); + ASSERT_NO_THROW(dpcpp2->get_queue()->submit([&](sycl::handler &cgh) { + auto *is_set_ptr = is_set.get_data(); + cgh.single_task([=]() { check_data(copy_dpcpp2, is_set_ptr); }); + })); + is_set.set_executor(ref); + ASSERT_EQ(*is_set.get_data(), true); + + // Put the results on OpenMP and run CPU side assertions + ref->copy_from(dpcpp2.get(), 2, copy_dpcpp2, copy); + EXPECT_EQ(3, copy[0]); + ASSERT_EQ(8, copy[1]); + dpcpp2->free(copy_dpcpp2); + dpcpp->free(orig); +} + + +TEST_F(DpcppExecutor, Synchronizes) +{ + // Todo design a proper unit test once we support streams + ASSERT_NO_THROW(dpcpp->synchronize()); +} + + +TEST_F(DpcppExecutor, FreeAfterKernel) +{ + size_t length = 10000; + auto dpcpp = + gko::DpcppExecutor::create(0, gko::ReferenceExecutor::create()); + { + gko::Array x(dpcpp, length); + gko::Array y(dpcpp, length); + auto x_val = x.get_data(); + auto y_val = y.get_data(); + dpcpp->get_queue()->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<1>{length}, + [=](sycl::id<1> i) { y_val[i] += x_val[i]; }); + }); + } + // to ensure everything on queue is finished. + dpcpp->synchronize(); +} + + +} // namespace diff --git a/dpcpp/test/base/kernel_launch.dp.cpp b/dpcpp/test/base/kernel_launch.dp.cpp new file mode 100644 index 00000000000..25a36d3a29a --- /dev/null +++ b/dpcpp/test/base/kernel_launch.dp.cpp @@ -0,0 +1,260 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "common/unified/base/kernel_launch.hpp" + + +#include +#include + + +#include + + +#include +#include +#include +#include + + +#include "common/unified/base/kernel_launch_solver.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +using gko::dim; +using gko::size_type; +using std::is_same; + + +class KernelLaunch : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using value_type = float; +#else + using value_type = double; +#endif + using Mtx = gko::matrix::Dense; + + KernelLaunch() + : exec(gko::DpcppExecutor::create( + 0, gko::ReferenceExecutor::create(), + gko::DpcppExecutor::get_num_devices("gpu") > 0 ? "gpu" : "cpu")), + zero_array(exec->get_master(), 16), + iota_array(exec->get_master(), 16), + iota_transp_array(exec->get_master(), 16), + iota_dense(Mtx::create(exec, dim<2>{4, 4})), + zero_dense(Mtx::create(exec, dim<2>{4, 4}, 6)), + zero_dense2(Mtx::create(exec, dim<2>{4, 4}, 5)), + vec_dense(Mtx::create(exec, dim<2>{1, 4})) + { + auto ref_iota_dense = Mtx::create(exec->get_master(), dim<2>{4, 4}); + for (int i = 0; i < 16; i++) { + zero_array.get_data()[i] = 0; + iota_array.get_data()[i] = i; + iota_transp_array.get_data()[i] = (i % 4 * 4) + i / 4; + ref_iota_dense->at(i / 4, i % 4) = i; + } + zero_dense->fill(0.0); + zero_dense2->fill(0.0); + iota_dense->copy_from(ref_iota_dense.get()); + zero_array.set_executor(exec); + iota_array.set_executor(exec); + iota_transp_array.set_executor(exec); + } + + std::shared_ptr exec; + gko::Array zero_array; + gko::Array iota_array; + gko::Array iota_transp_array; + std::unique_ptr iota_dense; + std::unique_ptr zero_dense; + std::unique_ptr zero_dense2; + std::unique_ptr vec_dense; +}; + + +TEST_F(KernelLaunch, Runs1D) +{ + gko::kernels::dpcpp::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto d) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + d[i] = i; + }, + zero_array.get_num_elems(), zero_array.get_data()); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_array); +} + + +TEST_F(KernelLaunch, Runs1DArray) +{ + gko::kernels::dpcpp::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto d, auto d_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + if (d == d_ptr) { + d[i] = i; + } else { + d[i] = 0; + } + }, + zero_array.get_num_elems(), zero_array, zero_array.get_const_data()); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_array); +} + + +TEST_F(KernelLaunch, Runs1DDense) +{ + gko::kernels::dpcpp::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto d, auto d2, auto d_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, + "type"); + static_assert( + is_same::value, "type"); + static_assert(is_same::value, + "type"); + bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; + bool strides_correct = d.stride == 5 && d2.stride == 5; + bool accessors_2d_correct = + &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && + &d2(0, 0) == d_ptr && &d2(1, 0) == d_ptr + d.stride; + bool accessors_1d_correct = &d[0] == d_ptr && &d2[0] == d_ptr; + if (pointers_correct && strides_correct && accessors_2d_correct && + accessors_1d_correct) { + d(i / 4, i % 4) = i; + } else { + d(i / 4, i % 4) = 0; + } + }, + 16, zero_dense2.get(), static_cast(zero_dense2.get()), + zero_dense2->get_const_values()); + + GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); +} + + +TEST_F(KernelLaunch, Runs2D) +{ + gko::kernels::dpcpp::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto j, auto d) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + d[i + 4 * j] = 4 * i + j; + }, + dim<2>{4, 4}, zero_array.get_data()); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_transp_array); +} + + +TEST_F(KernelLaunch, Runs2DArray) +{ + gko::kernels::dpcpp::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto j, auto d, auto d_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + if (d == d_ptr) { + d[i + 4 * j] = 4 * i + j; + } else { + d[i + 4 * j] = 0; + } + }, + dim<2>{4, 4}, zero_array, zero_array.get_const_data()); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_transp_array); +} + + +TEST_F(KernelLaunch, Runs2DDense) +{ + gko::kernels::dpcpp::run_kernel_solver( + exec, + [] GKO_KERNEL(auto i, auto j, auto d, auto d2, auto d_ptr, auto d3, + auto d4, auto d2_ptr, auto d3_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, + "type"); + static_assert( + is_same::value, "type"); + static_assert(is_same::value, + "type"); + static_assert(is_same::value, + "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, + "type"); + static_assert(is_same::value, + "type"); + bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && + d3.data == d2_ptr && d4 == d3_ptr; + bool strides_correct = + d.stride == 5 && d2.stride == 5 && d3.stride == 6; + bool accessors_2d_correct = + &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && + &d2(0, 0) == d_ptr && &d2(1, 0) == d_ptr + d2.stride && + &d3(0, 0) == d2_ptr && &d3(1, 0) == d2_ptr + d3.stride; + bool accessors_1d_correct = + &d[0] == d_ptr && &d2[0] == d_ptr && &d3[0] == d2_ptr; + if (pointers_correct && strides_correct && accessors_2d_correct && + accessors_1d_correct) { + d(i, j) = 4 * i + j; + } else { + d(i, j) = 0; + } + }, + dim<2>{4, 4}, zero_dense->get_stride(), zero_dense2.get(), + static_cast(zero_dense2.get()), + zero_dense2->get_const_values(), + gko::kernels::dpcpp::default_stride(zero_dense.get()), + gko::kernels::dpcpp::row_vector(vec_dense.get()), + zero_dense->get_values(), vec_dense->get_values()); + + GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); +} + + +} // namespace diff --git a/dpcpp/test/components/CMakeLists.txt b/dpcpp/test/components/CMakeLists.txt new file mode 100644 index 00000000000..77ad6684840 --- /dev/null +++ b/dpcpp/test/components/CMakeLists.txt @@ -0,0 +1,5 @@ +ginkgo_create_test(absolute_array) +ginkgo_create_dpcpp_test(cooperative_groups_kernels) +ginkgo_create_test(fill_array) +ginkgo_create_test(precision_conversion) +ginkgo_create_test(prefix_sum) diff --git a/dpcpp/test/components/absolute_array.cpp b/dpcpp/test/components/absolute_array.cpp new file mode 100644 index 00000000000..47eeab25980 --- /dev/null +++ b/dpcpp/test/components/absolute_array.cpp @@ -0,0 +1,137 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/absolute_array.hpp" + + +#include +#include +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +class AbsoluteArray : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using value_type = float; + using complex_type = std::complex; +#else + using value_type = double; + using complex_type = std::complex; +#endif // GINKGO_DPCPP_SINGLE_MODE + AbsoluteArray() + : ref(gko::ReferenceExecutor::create()), + exec(gko::DpcppExecutor::create(0, ref)), + total_size(6344), + vals(ref, total_size), + dvals(exec, total_size), + complex_vals(ref, total_size), + dcomplex_vals(exec, total_size) + { + std::fill_n(vals.get_data(), total_size, -1234.0); + dvals = vals; + std::fill_n(complex_vals.get_data(), total_size, complex_type{3, 4}); + dcomplex_vals = complex_vals; + } + + std::shared_ptr ref; + std::shared_ptr exec; + gko::size_type total_size; + gko::Array vals; + gko::Array dvals; + gko::Array complex_vals; + gko::Array dcomplex_vals; +}; + + +TEST_F(AbsoluteArray, InplaceEqualsReference) +{ + gko::kernels::dpcpp::components::inplace_absolute_array( + exec, dvals.get_data(), total_size); + gko::kernels::reference::components::inplace_absolute_array( + ref, vals.get_data(), total_size); + + GKO_ASSERT_ARRAY_EQ(vals, dvals); +} + + +TEST_F(AbsoluteArray, InplaceComplexEqualsReference) +{ + gko::kernels::dpcpp::components::inplace_absolute_array( + exec, dcomplex_vals.get_data(), total_size); + gko::kernels::reference::components::inplace_absolute_array( + ref, complex_vals.get_data(), total_size); + + GKO_ASSERT_ARRAY_EQ(complex_vals, dcomplex_vals); +} + + +TEST_F(AbsoluteArray, OutplaceEqualsReference) +{ + gko::Array abs_vals(ref, total_size); + gko::Array dabs_vals(exec, total_size); + + gko::kernels::dpcpp::components::outplace_absolute_array( + exec, dvals.get_const_data(), total_size, dabs_vals.get_data()); + gko::kernels::reference::components::outplace_absolute_array( + ref, vals.get_const_data(), total_size, abs_vals.get_data()); + + GKO_ASSERT_ARRAY_EQ(abs_vals, dabs_vals); +} + + +TEST_F(AbsoluteArray, OutplaceComplexEqualsReference) +{ + gko::Array abs_vals(ref, total_size); + gko::Array dabs_vals(exec, total_size); + + gko::kernels::dpcpp::components::outplace_absolute_array( + exec, dcomplex_vals.get_const_data(), total_size, dabs_vals.get_data()); + gko::kernels::reference::components::outplace_absolute_array( + ref, complex_vals.get_const_data(), total_size, abs_vals.get_data()); + + GKO_ASSERT_ARRAY_EQ(abs_vals, dabs_vals); +} + + +} // namespace diff --git a/dpcpp/test/components/cooperative_groups_kernels.dp.cpp b/dpcpp/test/components/cooperative_groups_kernels.dp.cpp new file mode 100644 index 00000000000..300b6cac8cf --- /dev/null +++ b/dpcpp/test/components/cooperative_groups_kernels.dp.cpp @@ -0,0 +1,242 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "dpcpp/components/cooperative_groups.dp.hpp" + + +#include +#include + + +#include + + +#include + + +#include +#include +#include + + +#include "core/base/types.hpp" +#include "core/synthesizer/implementation_selection.hpp" +#include "core/test/utils/assertions.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/helper.hpp" + + +namespace { + + +using namespace gko::kernels::dpcpp; +using KCfg = gko::ConfigSet<11, 7>; +constexpr auto default_config_list = + ::gko::syn::value_list(); + + +class CooperativeGroups : public testing::TestWithParam { +protected: + CooperativeGroups() + : ref(gko::ReferenceExecutor::create()), + dpcpp(gko::DpcppExecutor::create(0, ref)), + test_case(3), + max_num(test_case * 64), + result(ref, max_num), + dresult(dpcpp) + { + for (int i = 0; i < max_num; i++) { + result.get_data()[i] = false; + } + dresult = result; + } + + template + void test_all_subgroup(Kernel kernel) + { + auto subgroup_size = GetParam(); + auto queue = dpcpp->get_queue(); + if (gko::kernels::dpcpp::validate(queue, subgroup_size, + subgroup_size)) { + const auto cfg = KCfg::encode(subgroup_size, subgroup_size); + for (int i = 0; i < test_case * subgroup_size; i++) { + result.get_data()[i] = true; + } + + kernel(cfg, 1, subgroup_size, 0, dpcpp->get_queue(), + dresult.get_data()); + + // each subgreoup size segment for one test + GKO_ASSERT_ARRAY_EQ(result, dresult); + } else { + GTEST_SKIP() << "This device does not contain this subgroup size " + << subgroup_size; + } + } + + int test_case; + int max_num; + std::shared_ptr ref; + std::shared_ptr dpcpp; + gko::Array result; + gko::Array dresult; +}; + + +// kernel implementation +template +__WG_BOUND__(KCfg::decode<0>(config)) +void cg_shuffle(bool *s, sycl::nd_item<3> item_ct1) +{ + constexpr auto sg_size = KCfg::decode<1>(config); + auto group = + group::tiled_partition(group::this_thread_block(item_ct1)); + auto i = int(group.thread_rank()); + + s[i] = group.shfl_up(i, 1) == sycl::max(0, (int)(i - 1)); + s[i + sg_size] = + group.shfl_down(i, 1) == + sycl::min((unsigned int)(i + 1), (unsigned int)(sg_size - 1)); + s[i + sg_size * 2] = group.shfl(i, 0) == 0; +} + +// group all kernel things together +template +void cg_shuffle_host(dim3 grid, dim3 block, + gko::size_type dynamic_shared_memory, sycl::queue *queue, + bool *s) +{ + queue->submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) { + cg_shuffle(s, item_ct1); + }); + }); +} + +// config selection +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(cg_shuffle_config, cg_shuffle_host) + +// the call +void cg_shuffle_config_call(std::uint32_t desired_cfg, dim3 grid, dim3 block, + gko::size_type dynamic_shared_memory, + sycl::queue *queue, bool *s) +{ + cg_shuffle_config( + default_config_list, + // validate + [&desired_cfg](std::uint32_t cfg) { return cfg == desired_cfg; }, + ::gko::syn::value_list(), ::gko::syn::value_list(), + ::gko::syn::value_list(), ::gko::syn::type_list<>(), + grid, block, dynamic_shared_memory, queue, s); +} + +TEST_P(CooperativeGroups, Shuffle) +{ + test_all_subgroup(cg_shuffle_config_call); +} + + +template +__WG_BOUND__(KCfg::decode<0>(config)) +void cg_all(bool *s, sycl::nd_item<3> item_ct1) +{ + constexpr auto sg_size = KCfg::decode<1>(config); + auto group = + group::tiled_partition(group::this_thread_block(item_ct1)); + auto i = int(group.thread_rank()); + + s[i] = group.all(true); + s[i + sg_size] = !group.all(false); + s[i + sg_size * 2] = + group.all(item_ct1.get_local_id(2) < 13) == sg_size < 13; +} + +GKO_ENABLE_DEFAULT_HOST_CONFIG(cg_all, cg_all) +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(cg_all, cg_all) +GKO_ENABLE_DEFAULT_CONFIG_CALL(cg_all_call, cg_all, default_config_list) + +TEST_P(CooperativeGroups, All) { test_all_subgroup(cg_all_call); } + + +template +__WG_BOUND__(KCfg::decode<0>(config)) +void cg_any(bool *s, sycl::nd_item<3> item_ct1) +{ + constexpr auto sg_size = KCfg::decode<1>(config); + auto group = group::tiled_partition(config)>( + group::this_thread_block(item_ct1)); + auto i = int(group.thread_rank()); + + s[i] = group.any(true); + s[i + sg_size] = group.any(item_ct1.get_local_id(2) == 0); + s[i + sg_size * 2] = !group.any(false); +} + +GKO_ENABLE_DEFAULT_HOST_CONFIG(cg_any, cg_any) +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(cg_any, cg_any) +GKO_ENABLE_DEFAULT_CONFIG_CALL(cg_any_call, cg_any, default_config_list) + +TEST_P(CooperativeGroups, Any) { test_all_subgroup(cg_any_call); } + + +template +__WG_BOUND__(KCfg::decode<0>(config)) +void cg_ballot(bool *s, sycl::nd_item<3> item_ct1) +{ + constexpr auto sg_size = KCfg::decode<1>(config); + auto group = + group::tiled_partition(group::this_thread_block(item_ct1)); + auto active = gko::detail::mask(); + auto i = int(group.thread_rank()); + + s[i] = group.ballot(false) == 0; + s[i + sg_size] = group.ballot(true) == (~config::lane_mask_type{} & active); + s[i + sg_size * 2] = group.ballot(item_ct1.get_local_id(2) < 4) == 0xf; +} + +GKO_ENABLE_DEFAULT_HOST_CONFIG(cg_ballot, cg_ballot) +GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(cg_ballot, cg_ballot) +GKO_ENABLE_DEFAULT_CONFIG_CALL(cg_ballot_call, cg_ballot, default_config_list) + +TEST_P(CooperativeGroups, Ballot) { test_all_subgroup(cg_ballot_call); } + + +INSTANTIATE_TEST_SUITE_P(DifferentSubgroup, CooperativeGroups, + testing::Values(4, 8, 16, 32, 64), + testing::PrintToStringParamName()); + + +} // namespace diff --git a/dpcpp/test/components/fill_array.cpp b/dpcpp/test/components/fill_array.cpp new file mode 100644 index 00000000000..1871793b83b --- /dev/null +++ b/dpcpp/test/components/fill_array.cpp @@ -0,0 +1,100 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/fill_array.hpp" + + +#include +#include +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class FillArray : public ::testing::Test { +protected: + using value_type = T; + FillArray() + : ref(gko::ReferenceExecutor::create()), + exec(gko::DpcppExecutor::create(0, ref)), + total_size(63531), + vals(ref, total_size), + dvals(exec, total_size), + seqs(ref, total_size) + { + std::fill_n(vals.get_data(), total_size, T(1234)); + std::iota(seqs.get_data(), seqs.get_data() + total_size, 0); + } + + std::shared_ptr ref; + std::shared_ptr exec; + gko::size_type total_size; + gko::Array vals; + gko::Array dvals; + gko::Array seqs; +}; + +TYPED_TEST_SUITE(FillArray, gko::test::ValueAndIndexTypes); + + +TYPED_TEST(FillArray, EqualsReference) +{ + using T = typename TestFixture::value_type; + gko::kernels::dpcpp::components::fill_array( + this->exec, this->dvals.get_data(), this->total_size, T(1234)); + + GKO_ASSERT_ARRAY_EQ(this->vals, this->dvals); +} + + +TYPED_TEST(FillArray, FillSeqEqualsReference) +{ + using T = typename TestFixture::value_type; + gko::kernels::dpcpp::components::fill_seq_array( + this->exec, this->dvals.get_data(), this->total_size); + + GKO_ASSERT_ARRAY_EQ(this->seqs, this->dvals); +} + + +} // namespace diff --git a/dpcpp/test/components/precision_conversion.cpp b/dpcpp/test/components/precision_conversion.cpp new file mode 100644 index 00000000000..e9584e3b2db --- /dev/null +++ b/dpcpp/test/components/precision_conversion.cpp @@ -0,0 +1,175 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include +#include +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +class PrecisionConversion : public ::testing::Test { +protected: + PrecisionConversion() + : ref(gko::ReferenceExecutor::create()), + exec(gko::DpcppExecutor::create(0, ref)), + rand(293), + total_size(42793), + vals(ref, total_size), + cvals(ref, total_size), + vals2(ref, 1), + expected_float(ref, 1), + expected_double(ref, 1), + dvals(exec), + dcvals(exec), + dvals2(exec) + { + auto maxval = 1e10f; + std::uniform_real_distribution dist(-maxval, maxval); + for (gko::size_type i = 0; i < total_size; ++i) { + vals.get_data()[i] = dist(rand); + cvals.get_data()[i] = {dist(rand), dist(rand)}; + } + dvals = vals; + dcvals = cvals; + gko::uint64 rawdouble{0x4218888000889111ULL}; + gko::uint32 rawfloat{0x50c44400UL}; + gko::uint64 rawrounded{0x4218888000000000ULL}; + std::memcpy(vals2.get_data(), &rawdouble, sizeof(double)); + std::memcpy(expected_float.get_data(), &rawfloat, sizeof(float)); + std::memcpy(expected_double.get_data(), &rawrounded, sizeof(double)); + dvals2 = vals2; + } + + std::shared_ptr ref; + std::shared_ptr exec; + std::default_random_engine rand; + gko::size_type total_size; + gko::Array vals; + gko::Array dvals; + gko::Array vals2; + gko::Array dvals2; + gko::Array expected_float; + gko::Array expected_double; + gko::Array> cvals; + gko::Array> dcvals; +}; + + +#ifndef GINKGO_DPCPP_SINGLE_MODE +TEST_F(PrecisionConversion, ConvertsReal) +{ + gko::Array dtmp; + gko::Array dout; + + dtmp = dvals; + dout = dtmp; + + GKO_ASSERT_ARRAY_EQ(dvals, dout); +} + + +TEST_F(PrecisionConversion, ConvertsRealViaRef) +{ + gko::Array tmp{ref}; + gko::Array dout; + + tmp = dvals; + dout = tmp; + + GKO_ASSERT_ARRAY_EQ(dvals, dout); +} + + +TEST_F(PrecisionConversion, ConvertsComplex) +{ + gko::Array> dtmp; + gko::Array> dout; + + dtmp = dcvals; + dout = dtmp; + + GKO_ASSERT_ARRAY_EQ(dcvals, dout); +} + + +TEST_F(PrecisionConversion, ConversionRounds) +{ + gko::Array dtmp; + gko::Array dout; + + dtmp = dvals2; + dout = dtmp; + + GKO_ASSERT_ARRAY_EQ(dtmp, expected_float); + GKO_ASSERT_ARRAY_EQ(dout, expected_double); +} + + +TEST_F(PrecisionConversion, ConvertsRealFromRef) +{ + gko::Array dtmp; + gko::Array dout; + + dtmp = vals; + dout = dtmp; + + GKO_ASSERT_ARRAY_EQ(dvals, dout); +} + + +TEST_F(PrecisionConversion, ConvertsComplexFromRef) +{ + gko::Array> dtmp; + gko::Array> dout; + + dtmp = cvals; + dout = dtmp; + + GKO_ASSERT_ARRAY_EQ(dcvals, dout); +} +#endif // GINKGO_DPCPP_SINGLE_MODE + + +} // namespace diff --git a/dpcpp/test/components/prefix_sum.cpp b/dpcpp/test/components/prefix_sum.cpp new file mode 100644 index 00000000000..402192d0b77 --- /dev/null +++ b/dpcpp/test/components/prefix_sum.cpp @@ -0,0 +1,96 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/prefix_sum.hpp" + + +#include +#include +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +class PrefixSum : public ::testing::Test { +protected: + using index_type = gko::int32; + PrefixSum() + : ref(gko::ReferenceExecutor::create()), + exec(gko::DpcppExecutor::create(0, ref)), + rand(293), + total_size(42793), + vals(ref, total_size), + dvals(exec) + { + std::uniform_int_distribution dist(0, 1000); + for (gko::size_type i = 0; i < total_size; ++i) { + vals.get_data()[i] = dist(rand); + } + dvals = vals; + } + + void test(gko::size_type size) + { + gko::kernels::reference::components::prefix_sum(ref, vals.get_data(), + size); + gko::kernels::dpcpp::components::prefix_sum(exec, dvals.get_data(), + size); + + GKO_ASSERT_ARRAY_EQ(vals, dvals); + } + + std::shared_ptr ref; + std::shared_ptr exec; + std::default_random_engine rand; + gko::size_type total_size; + gko::Array vals; + gko::Array dvals; +}; + + +TEST_F(PrefixSum, SmallEqualsReference) { test(100); } + + +TEST_F(PrefixSum, BigEqualsReference) { test(total_size); } + + +} // namespace diff --git a/dpcpp/test/matrix/CMakeLists.txt b/dpcpp/test/matrix/CMakeLists.txt new file mode 100644 index 00000000000..5be841b3d00 --- /dev/null +++ b/dpcpp/test/matrix/CMakeLists.txt @@ -0,0 +1,8 @@ +ginkgo_create_test(coo_kernels) +ginkgo_create_test(csr_kernels) +ginkgo_create_test(dense_kernels) +ginkgo_create_test(diagonal_kernels) +ginkgo_create_test(ell_kernels) +ginkgo_create_test(fbcsr_kernels) +ginkgo_create_test(hybrid_kernels) +ginkgo_create_test(sellp_kernels) diff --git a/dpcpp/test/matrix/coo_kernels.cpp b/dpcpp/test/matrix/coo_kernels.cpp new file mode 100644 index 00000000000..06073dd56eb --- /dev/null +++ b/dpcpp/test/matrix/coo_kernels.cpp @@ -0,0 +1,410 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/coo_kernels.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/unsort_matrix.hpp" + + +namespace { + + +class Coo : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using vtype = float; +#else + using vtype = double; +#endif // GINKGO_DPCPP_SINGLE_MODE + using Mtx = gko::matrix::Coo; + using Vec = gko::matrix::Dense; + using ComplexVec = gko::matrix::Dense>; + + Coo() : rand_engine(42) {} + + void SetUp() + { + ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, std::uniform_int_distribution<>(1, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_data(int num_vectors = 1) + { + mtx = Mtx::create(ref); + mtx->copy_from(gen_mtx(532, 231)); + expected = gen_mtx(532, num_vectors); + y = gen_mtx(231, num_vectors); + alpha = gko::initialize({2.0}, ref); + beta = gko::initialize({-1.0}, ref); + dmtx = Mtx::create(dpcpp); + dmtx->copy_from(mtx.get()); + dresult = Vec::create(dpcpp); + dresult->copy_from(expected.get()); + dy = Vec::create(dpcpp); + dy->copy_from(y.get()); + dalpha = Vec::create(dpcpp); + dalpha->copy_from(alpha.get()); + dbeta = Vec::create(dpcpp); + dbeta->copy_from(beta.get()); + } + + void unsort_mtx() + { + gko::test::unsort_matrix(mtx.get(), rand_engine); + dmtx->copy_from(mtx.get()); + } + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + std::ranlux48 rand_engine; + + std::unique_ptr mtx; + std::unique_ptr expected; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + + std::unique_ptr dmtx; + std::unique_ptr dresult; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; +}; + + +TEST_F(Coo, SimpleApplyIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, SimpleApplyDoesntOverwritePadding) +{ + set_up_apply_data(); + auto dresult_padded = + Vec::create(dpcpp, dresult->get_size(), dresult->get_stride() + 1); + dresult_padded->copy_from(dresult.get()); + vtype padding_val{1234.0}; + dpcpp->copy_from(dpcpp->get_master().get(), 1, &padding_val, + dresult_padded->get_values() + 1); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult_padded.get()); + + GKO_ASSERT_MTX_NEAR(dresult_padded, expected, r::value); + ASSERT_EQ(dpcpp->copy_val_to_host(dresult_padded->get_values() + 1), + 1234.0); +} + + +TEST_F(Coo, SimpleApplyIsEquivalentToRefUnsorted) +{ + set_up_apply_data(); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, AdvancedApplyIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, AdvancedApplyDoesntOverwritePadding) +{ + set_up_apply_data(); + auto dresult_padded = + Vec::create(dpcpp, dresult->get_size(), dresult->get_stride() + 1); + dresult_padded->copy_from(dresult.get()); + vtype padding_val{1234.0}; + dpcpp->copy_from(dpcpp->get_master().get(), 1, &padding_val, + dresult_padded->get_values() + 1); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult_padded.get()); + + GKO_ASSERT_MTX_NEAR(dresult_padded, expected, r::value); + ASSERT_EQ(dpcpp->copy_val_to_host(dresult_padded->get_values() + 1), + 1234.0); +} + + +TEST_F(Coo, SimpleApplyAddIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->apply2(y.get(), expected.get()); + dmtx->apply2(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, AdvancedApplyAddIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->apply2(alpha.get(), y.get(), expected.get()); + dmtx->apply2(dalpha.get(), dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, SimpleApplyToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(3); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, AdvancedApplyToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(3); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, SimpleApplyAddToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(3); + + mtx->apply2(y.get(), expected.get()); + dmtx->apply2(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, SimpleApplyAddToLargeDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(33); + + mtx->apply2(y.get(), expected.get()); + dmtx->apply2(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, AdvancedApplyAddToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(3); + + mtx->apply2(alpha.get(), y.get(), expected.get()); + dmtx->apply2(dalpha.get(), dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, AdvancedApplyAddToLargeDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(33); + + mtx->apply2(y.get(), expected.get()); + dmtx->apply2(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Coo, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Coo, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Coo, ApplyAddToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply2(alpha.get(), complex_b.get(), complex_x.get()); + dmtx->apply2(dalpha.get(), dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Coo, ConvertToDenseIsEquivalentToRef) +{ + set_up_apply_data(); + auto dense_mtx = gko::matrix::Dense::create(ref); + auto ddense_mtx = gko::matrix::Dense::create(dpcpp); + + mtx->convert_to(dense_mtx.get()); + dmtx->convert_to(ddense_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), r::value); +} + + +TEST_F(Coo, ConvertToCsrIsEquivalentToRef) +{ + set_up_apply_data(); + auto dense_mtx = gko::matrix::Dense::create(ref); + auto csr_mtx = gko::matrix::Csr::create(ref); + auto dcsr_mtx = gko::matrix::Csr::create(dpcpp); + + mtx->convert_to(dense_mtx.get()); + dense_mtx->convert_to(csr_mtx.get()); + dmtx->convert_to(dcsr_mtx.get()); + + GKO_ASSERT_MTX_NEAR(csr_mtx.get(), dcsr_mtx.get(), r::value); +} + + +TEST_F(Coo, ExtractDiagonalIsEquivalentToRef) +{ + set_up_apply_data(); + + auto diag = mtx->extract_diagonal(); + auto ddiag = dmtx->extract_diagonal(); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + +TEST_F(Coo, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, r::value); +} + + +TEST_F(Coo, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, r::value); +} + + +} // namespace diff --git a/dpcpp/test/matrix/csr_kernels.cpp b/dpcpp/test/matrix/csr_kernels.cpp new file mode 100644 index 00000000000..e76669e7136 --- /dev/null +++ b/dpcpp/test/matrix/csr_kernels.cpp @@ -0,0 +1,947 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/csr_kernels.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/unsort_matrix.hpp" + + +namespace { + + +class Csr : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using vtype = float; +#else + using vtype = double; +#endif // GINKGO_DPCPP_SINGLE_MODE + using Arr = gko::Array; + using Vec = gko::matrix::Dense; + using Mtx = gko::matrix::Csr; + using ComplexVec = gko::matrix::Dense>; + using ComplexMtx = gko::matrix::Csr>; + + Csr() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 231), +#else + : mtx_size(532, 231), +#endif + rand_engine(42) + {} + + void SetUp() + { + ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(min_nnz_row, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_data(std::shared_ptr strategy, + int num_vectors = 1) + { + mtx = Mtx::create(ref, strategy); + mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[1], 1)); + square_mtx = Mtx::create(ref, strategy); + square_mtx->copy_from(gen_mtx(mtx_size[0], mtx_size[0], 1)); + expected = gen_mtx(mtx_size[0], num_vectors, 1); + y = gen_mtx(mtx_size[1], num_vectors, 1); + alpha = gko::initialize({2.0}, ref); + beta = gko::initialize({-1.0}, ref); + dmtx = Mtx::create(dpcpp, strategy); + dmtx->copy_from(mtx.get()); + square_dmtx = Mtx::create(dpcpp, strategy); + square_dmtx->copy_from(square_mtx.get()); + dresult = Vec::create(dpcpp); + dresult->copy_from(expected.get()); + dy = Vec::create(dpcpp); + dy->copy_from(y.get()); + dalpha = Vec::create(dpcpp); + dalpha->copy_from(alpha.get()); + dbeta = Vec::create(dpcpp); + dbeta->copy_from(beta.get()); + + std::vector tmp(mtx->get_size()[0], 0); + auto rng = std::default_random_engine{}; + std::iota(tmp.begin(), tmp.end(), 0); + std::shuffle(tmp.begin(), tmp.end(), rng); + std::vector tmp2(mtx->get_size()[1], 0); + std::iota(tmp2.begin(), tmp2.end(), 0); + std::shuffle(tmp2.begin(), tmp2.end(), rng); + rpermute_idxs = std::make_unique(ref, tmp.begin(), tmp.end()); + cpermute_idxs = std::make_unique(ref, tmp2.begin(), tmp2.end()); + } + + void set_up_apply_complex_data( + std::shared_ptr strategy) + { + complex_mtx = ComplexMtx::create(ref, strategy); + complex_mtx->copy_from( + gen_mtx(mtx_size[0], mtx_size[1], 1)); + complex_dmtx = ComplexMtx::create(dpcpp, strategy); + complex_dmtx->copy_from(complex_mtx.get()); + } + + void unsort_mtx() + { + gko::test::unsort_matrix(mtx.get(), rand_engine); + dmtx->copy_from(mtx.get()); + } + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + const gko::dim<2> mtx_size; + std::ranlux48 rand_engine; + + std::unique_ptr mtx; + std::unique_ptr complex_mtx; + std::unique_ptr square_mtx; + std::unique_ptr expected; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + + std::unique_ptr dmtx; + std::unique_ptr complex_dmtx; + std::unique_ptr square_dmtx; + std::unique_ptr dresult; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; + std::unique_ptr rpermute_idxs; + std::unique_ptr cpermute_idxs; +}; + + +TEST_F(Csr, StrategyAfterCopyIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(dpcpp)); + + ASSERT_EQ(mtx->get_strategy()->get_name(), + dmtx->get_strategy()->get_name()); +} + + +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithLoadBalance) +{ + set_up_apply_data(std::make_shared(dpcpp)); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithLoadBalanceUnsorted) +{ + set_up_apply_data(std::make_shared(dpcpp)); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithLoadBalance) +{ + set_up_apply_data(std::make_shared(dpcpp)); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithOnemkl) +{ + set_up_apply_data(std::make_shared()); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithOnemklUnsorted) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithOnemkl) +{ + set_up_apply_data(std::make_shared()); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithMergePath) +{ + set_up_apply_data(std::make_shared()); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithMergePathUnsorted) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithMergePath) +{ + set_up_apply_data(std::make_shared()); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithClassical) +{ + set_up_apply_data(std::make_shared()); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithClassicalUnsorted) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithClassical) +{ + set_up_apply_data(std::make_shared()); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithAutomatical) +{ + set_up_apply_data(std::make_shared(dpcpp)); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) +{ + set_up_apply_data(std::make_shared(dpcpp), 3); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) +{ + set_up_apply_data(std::make_shared(dpcpp), 3); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyToDenseMatrixIsEquivalentToRefWithClassical) +{ + set_up_apply_data(std::make_shared(), 3); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithClassical) +{ + set_up_apply_data(std::make_shared(), 3); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SimpleApplyToDenseMatrixIsEquivalentToRefWithMergePath) +{ + set_up_apply_data(std::make_shared(), 3); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, AdvancedApplyToDenseMatrixIsEquivalentToRefWithMergePath) +{ + set_up_apply_data(std::make_shared(), 3); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, AdvancedApplyToCsrMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto trans = mtx->transpose(); + auto d_trans = dmtx->transpose(); + + mtx->apply(alpha.get(), trans.get(), beta.get(), square_mtx.get()); + dmtx->apply(dalpha.get(), d_trans.get(), dbeta.get(), square_dmtx.get()); + + GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, r::value); + GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); + ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); +} + + +TEST_F(Csr, SimpleApplyToCsrMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto trans = mtx->transpose(); + auto d_trans = dmtx->transpose(); + + mtx->apply(trans.get(), square_mtx.get()); + dmtx->apply(d_trans.get(), square_dmtx.get()); + + GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, r::value); + GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx); + ASSERT_TRUE(square_dmtx->is_sorted_by_column_index()); +} + + +TEST_F(Csr, AdvancedApplyToIdentityMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto a = gen_mtx(mtx_size[0], mtx_size[1], 0); + auto b = gen_mtx(mtx_size[0], mtx_size[1], 0); + auto da = Mtx::create(dpcpp); + auto db = Mtx::create(dpcpp); + da->copy_from(a.get()); + db->copy_from(b.get()); + auto id = gko::matrix::Identity::create(ref, mtx_size[1]); + auto did = + gko::matrix::Identity::create(dpcpp, mtx_size[1]); + + a->apply(alpha.get(), id.get(), beta.get(), b.get()); + da->apply(dalpha.get(), did.get(), dbeta.get(), db.get()); + + GKO_ASSERT_MTX_NEAR(b, db, r::value); + GKO_ASSERT_MTX_EQ_SPARSITY(b, db); + ASSERT_TRUE(db->is_sorted_by_column_index()); +} + + +TEST_F(Csr, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto complex_b = gen_mtx(this->mtx_size[1], 3, 1); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(this->mtx_size[0], 3, 1); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Csr, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto complex_b = gen_mtx(this->mtx_size[1], 3, 1); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(this->mtx_size[0], 3, 1); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Csr, TransposeIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(dpcpp)); + + auto trans = gko::as(mtx->transpose()); + auto d_trans = gko::as(dmtx->transpose()); + + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); +} + + +TEST_F(Csr, ConjugateTransposeIsEquivalentToRef) +{ + set_up_apply_complex_data(std::make_shared(dpcpp)); + + auto trans = gko::as(complex_mtx->conj_transpose()); + auto d_trans = gko::as(complex_dmtx->conj_transpose()); + + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); +} + + +TEST_F(Csr, ConvertToDenseIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto dense_mtx = gko::matrix::Dense::create(ref); + auto ddense_mtx = gko::matrix::Dense::create(dpcpp); + + mtx->convert_to(dense_mtx.get()); + dmtx->convert_to(ddense_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), r::value); +} + + +TEST_F(Csr, MoveToDenseIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto dense_mtx = gko::matrix::Dense::create(ref); + auto ddense_mtx = gko::matrix::Dense::create(dpcpp); + + mtx->move_to(dense_mtx.get()); + dmtx->move_to(ddense_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), r::value); +} + + +TEST_F(Csr, ConvertToEllIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto ell_mtx = gko::matrix::Ell::create(ref); + auto dell_mtx = gko::matrix::Ell::create(dpcpp); + + mtx->convert_to(ell_mtx.get()); + dmtx->convert_to(dell_mtx.get()); + + GKO_ASSERT_MTX_NEAR(ell_mtx.get(), dell_mtx.get(), r::value); +} + + +TEST_F(Csr, MoveToEllIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto ell_mtx = gko::matrix::Ell::create(ref); + auto dell_mtx = gko::matrix::Ell::create(dpcpp); + + mtx->move_to(ell_mtx.get()); + dmtx->move_to(dell_mtx.get()); + + GKO_ASSERT_MTX_NEAR(ell_mtx.get(), dell_mtx.get(), r::value); +} + + +TEST_F(Csr, ConvertToSparsityCsrIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto sparsity_mtx = gko::matrix::SparsityCsr::create(ref); + auto d_sparsity_mtx = gko::matrix::SparsityCsr::create(dpcpp); + + mtx->convert_to(sparsity_mtx.get()); + dmtx->convert_to(d_sparsity_mtx.get()); + + GKO_ASSERT_MTX_NEAR(sparsity_mtx.get(), d_sparsity_mtx.get(), + r::value); +} + + +TEST_F(Csr, MoveToSparsityCsrIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto sparsity_mtx = gko::matrix::SparsityCsr::create(ref); + auto d_sparsity_mtx = gko::matrix::SparsityCsr::create(dpcpp); + + mtx->move_to(sparsity_mtx.get()); + dmtx->move_to(d_sparsity_mtx.get()); + + GKO_ASSERT_MTX_NEAR(sparsity_mtx.get(), d_sparsity_mtx.get(), + r::value); +} + + +TEST_F(Csr, CalculateMaxNnzPerRowIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + gko::size_type max_nnz_per_row; + gko::size_type dmax_nnz_per_row; + + gko::kernels::reference::csr::calculate_max_nnz_per_row(ref, mtx.get(), + &max_nnz_per_row); + gko::kernels::dpcpp::csr::calculate_max_nnz_per_row(dpcpp, dmtx.get(), + &dmax_nnz_per_row); + + ASSERT_EQ(max_nnz_per_row, dmax_nnz_per_row); +} + + +TEST_F(Csr, ConvertToCooIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto coo_mtx = gko::matrix::Coo::create(ref); + auto dcoo_mtx = gko::matrix::Coo::create(dpcpp); + + mtx->convert_to(coo_mtx.get()); + dmtx->convert_to(dcoo_mtx.get()); + + GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), r::value); +} + + +TEST_F(Csr, MoveToCooIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto coo_mtx = gko::matrix::Coo::create(ref); + auto dcoo_mtx = gko::matrix::Coo::create(dpcpp); + + mtx->move_to(coo_mtx.get()); + dmtx->move_to(dcoo_mtx.get()); + + GKO_ASSERT_MTX_NEAR(coo_mtx.get(), dcoo_mtx.get(), r::value); +} + + +TEST_F(Csr, ConvertToSellpIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto sellp_mtx = gko::matrix::Sellp::create(ref); + auto dsellp_mtx = gko::matrix::Sellp::create(dpcpp); + + mtx->convert_to(sellp_mtx.get()); + dmtx->convert_to(dsellp_mtx.get()); + + GKO_ASSERT_MTX_NEAR(sellp_mtx.get(), dsellp_mtx.get(), r::value); +} + + +TEST_F(Csr, MoveToSellpIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + auto sellp_mtx = gko::matrix::Sellp::create(ref); + auto dsellp_mtx = gko::matrix::Sellp::create(dpcpp); + + mtx->move_to(sellp_mtx.get()); + dmtx->move_to(dsellp_mtx.get()); + + GKO_ASSERT_MTX_NEAR(sellp_mtx.get(), dsellp_mtx.get(), r::value); +} + + +TEST_F(Csr, ConvertsEmptyToSellp) +{ + auto dempty_mtx = Mtx::create(dpcpp); + auto dsellp_mtx = gko::matrix::Sellp::create(dpcpp); + + dempty_mtx->convert_to(dsellp_mtx.get()); + + ASSERT_EQ(dpcpp->copy_val_to_host(dsellp_mtx->get_const_slice_sets()), 0); + ASSERT_FALSE(dsellp_mtx->get_size()); +} + + +TEST_F(Csr, CalculateTotalColsIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + gko::size_type total_cols; + gko::size_type dtotal_cols; + + gko::kernels::reference::csr::calculate_total_cols( + ref, mtx.get(), &total_cols, 2, gko::matrix::default_slice_size); + gko::kernels::dpcpp::csr::calculate_total_cols( + dpcpp, dmtx.get(), &dtotal_cols, 2, gko::matrix::default_slice_size); + + ASSERT_EQ(total_cols, dtotal_cols); +} + + +TEST_F(Csr, CalculatesNonzerosPerRow) +{ + set_up_apply_data(std::make_shared()); + gko::Array row_nnz(ref, mtx->get_size()[0]); + gko::Array drow_nnz(dpcpp, dmtx->get_size()[0]); + + gko::kernels::reference::csr::calculate_nonzeros_per_row(ref, mtx.get(), + &row_nnz); + gko::kernels::dpcpp::csr::calculate_nonzeros_per_row(dpcpp, dmtx.get(), + &drow_nnz); + + GKO_ASSERT_ARRAY_EQ(row_nnz, drow_nnz); +} + + +TEST_F(Csr, ConvertToHybridIsEquivalentToRef) +{ + using Hybrid_type = gko::matrix::Hybrid; + set_up_apply_data(std::make_shared()); + auto hybrid_mtx = Hybrid_type::create( + ref, std::make_shared(2)); + auto dhybrid_mtx = Hybrid_type::create( + dpcpp, std::make_shared(2)); + + mtx->convert_to(hybrid_mtx.get()); + dmtx->convert_to(dhybrid_mtx.get()); + + GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), r::value); +} + + +TEST_F(Csr, MoveToHybridIsEquivalentToRef) +{ + using Hybrid_type = gko::matrix::Hybrid; + set_up_apply_data(std::make_shared()); + auto hybrid_mtx = Hybrid_type::create( + ref, std::make_shared(2)); + auto dhybrid_mtx = Hybrid_type::create( + dpcpp, std::make_shared(2)); + + mtx->move_to(hybrid_mtx.get()); + dmtx->move_to(dhybrid_mtx.get()); + + GKO_ASSERT_MTX_NEAR(hybrid_mtx.get(), dhybrid_mtx.get(), r::value); +} + + +TEST_F(Csr, IsPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto permuted = gko::as(square_mtx->permute(rpermute_idxs.get())); + auto dpermuted = gko::as(square_dmtx->permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + +TEST_F(Csr, IsInversePermutable) +{ + set_up_apply_data(std::make_shared()); + + auto permuted = + gko::as(square_mtx->inverse_permute(rpermute_idxs.get())); + auto dpermuted = + gko::as(square_dmtx->inverse_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + +TEST_F(Csr, IsRowPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto r_permute = gko::as(mtx->row_permute(rpermute_idxs.get())); + auto dr_permute = gko::as(dmtx->row_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(r_permute, dr_permute); + GKO_ASSERT_MTX_NEAR(r_permute, dr_permute, 0); +} + + +TEST_F(Csr, IsColPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto c_permute = gko::as(mtx->column_permute(cpermute_idxs.get())); + auto dc_permute = gko::as(dmtx->column_permute(cpermute_idxs.get())); + + ASSERT_TRUE(dc_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(c_permute, dc_permute); + GKO_ASSERT_MTX_NEAR(c_permute, dc_permute, 0); +} + + +TEST_F(Csr, IsInverseRowPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto inverse_r_permute = + gko::as(mtx->inverse_row_permute(rpermute_idxs.get())); + auto d_inverse_r_permute = + gko::as(dmtx->inverse_row_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_r_permute, d_inverse_r_permute); + GKO_ASSERT_MTX_NEAR(inverse_r_permute, d_inverse_r_permute, 0); +} + + +TEST_F(Csr, IsInverseColPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto inverse_c_permute = + gko::as(mtx->inverse_column_permute(cpermute_idxs.get())); + auto d_inverse_c_permute = + gko::as(dmtx->inverse_column_permute(cpermute_idxs.get())); + + ASSERT_TRUE(d_inverse_c_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_c_permute, d_inverse_c_permute); + GKO_ASSERT_MTX_NEAR(inverse_c_permute, d_inverse_c_permute, 0); +} + + +TEST_F(Csr, RecognizeSortedMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + bool is_sorted_dpcpp{}; + bool is_sorted_ref{}; + + is_sorted_ref = mtx->is_sorted_by_column_index(); + is_sorted_dpcpp = dmtx->is_sorted_by_column_index(); + + ASSERT_EQ(is_sorted_ref, is_sorted_dpcpp); +} + + +TEST_F(Csr, RecognizeUnsortedMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + bool is_sorted_dpcpp{}; + bool is_sorted_ref{}; + + is_sorted_ref = mtx->is_sorted_by_column_index(); + is_sorted_dpcpp = dmtx->is_sorted_by_column_index(); + + ASSERT_EQ(is_sorted_ref, is_sorted_dpcpp); +} + + +TEST_F(Csr, SortSortedMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + + mtx->sort_by_column_index(); + dmtx->sort_by_column_index(); + + // Values must be unchanged, therefore, tolerance is `0` + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 0); +} + + +TEST_F(Csr, SortUnsortedMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + + mtx->sort_by_column_index(); + dmtx->sort_by_column_index(); + + // Values must be unchanged, therefore, tolerance is `0` + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 0); +} + + +TEST_F(Csr, OneAutomaticalWorksWithDifferentMatrices) +{ + auto automatical = std::make_shared(); + auto row_len_limit = std::max(automatical->nvidia_row_len_limit, + automatical->amd_row_len_limit); + auto load_balance_mtx = Mtx::create(ref); + auto classical_mtx = Mtx::create(ref); + load_balance_mtx->copy_from( + gen_mtx(1, row_len_limit + 1000, row_len_limit + 1)); + classical_mtx->copy_from(gen_mtx(50, 50, 1)); + auto load_balance_mtx_d = Mtx::create(dpcpp); + auto classical_mtx_d = Mtx::create(dpcpp); + load_balance_mtx_d->copy_from(load_balance_mtx.get()); + classical_mtx_d->copy_from(classical_mtx.get()); + + load_balance_mtx_d->set_strategy(automatical); + classical_mtx_d->set_strategy(automatical); + + EXPECT_EQ("load_balance", load_balance_mtx_d->get_strategy()->get_name()); + EXPECT_EQ("classical", classical_mtx_d->get_strategy()->get_name()); + ASSERT_NE(load_balance_mtx_d->get_strategy().get(), + classical_mtx_d->get_strategy().get()); +} + + +TEST_F(Csr, ExtractDiagonalIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared()); + + auto diag = mtx->extract_diagonal(); + auto ddiag = dmtx->extract_diagonal(); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + +TEST_F(Csr, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(dpcpp)); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, r::value); +} + + +TEST_F(Csr, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(dpcpp)); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, r::value); +} + + +TEST_F(Csr, InplaceAbsoluteComplexMatrixIsEquivalentToRef) +{ + set_up_apply_complex_data(std::make_shared(dpcpp)); + + complex_mtx->compute_absolute_inplace(); + complex_dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(complex_mtx, complex_dmtx, r::value); +} + + +TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) +{ + set_up_apply_complex_data(std::make_shared(dpcpp)); + + auto abs_mtx = complex_mtx->compute_absolute(); + auto dabs_mtx = complex_dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, r::value); +} + + +} // namespace diff --git a/dpcpp/test/matrix/dense_kernels.cpp b/dpcpp/test/matrix/dense_kernels.cpp new file mode 100644 index 00000000000..a21d6fd15fe --- /dev/null +++ b/dpcpp/test/matrix/dense_kernels.cpp @@ -0,0 +1,622 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/components/fill_array.hpp" +#include "core/matrix/dense_kernels.hpp" +#include "core/test/utils.hpp" +#include "dpcpp/test/utils.hpp" + + +namespace { + + +class Dense : public ::testing::Test { +protected: + using itype = int; +#if GINKGO_DPCPP_SINGLE_MODE + using vtype = float; +#else + using vtype = double; +#endif // GINKGO_DPCPP_SINGLE_MODE + using Mtx = gko::matrix::Dense; + using MixedMtx = gko::matrix::Dense>; + using NormVector = gko::matrix::Dense>; + using Arr = gko::Array; + using ComplexMtx = gko::matrix::Dense>; + using MixedComplexMtx = + gko::matrix::Dense>>; + + Dense() : rand_engine(15) {} + + void SetUp() + { + ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(0.0, 1.0), rand_engine, ref); + } + + void set_up_vector_data(gko::size_type num_vecs, + bool different_alpha = false) + { + x = gen_mtx(1000, num_vecs); + y = gen_mtx(1000, num_vecs); + if (different_alpha) { + alpha = gen_mtx(1, num_vecs); + } else { + alpha = gko::initialize({2.0}, ref); + } + dx = Mtx::create(dpcpp); + dx->copy_from(x.get()); + dy = Mtx::create(dpcpp); + dy->copy_from(y.get()); + dalpha = Mtx::create(dpcpp); + dalpha->copy_from(alpha.get()); + expected = Mtx::create(ref, gko::dim<2>{1, num_vecs}); + dresult = Mtx::create(dpcpp, gko::dim<2>{1, num_vecs}); + } + + void set_up_apply_data() + { + x = gen_mtx(65, 25); + c_x = gen_mtx(65, 25); + y = gen_mtx(25, 35); + expected = gen_mtx(65, 35); + alpha = gko::initialize({2.0}, ref); + beta = gko::initialize({-1.0}, ref); + square = gen_mtx(x->get_size()[0], x->get_size()[0]); + dx = Mtx::create(dpcpp); + dx->copy_from(x.get()); + dc_x = ComplexMtx::create(dpcpp); + dc_x->copy_from(c_x.get()); + dy = Mtx::create(dpcpp); + dy->copy_from(y.get()); + dresult = Mtx::create(dpcpp); + dresult->copy_from(expected.get()); + dalpha = Mtx::create(dpcpp); + dalpha->copy_from(alpha.get()); + dbeta = Mtx::create(dpcpp); + dbeta->copy_from(beta.get()); + dsquare = Mtx::create(dpcpp); + dsquare->copy_from(square.get()); + + std::vector tmp(x->get_size()[0], 0); + auto rng = std::default_random_engine{}; + std::iota(tmp.begin(), tmp.end(), 0); + std::shuffle(tmp.begin(), tmp.end(), rng); + std::vector tmp2(x->get_size()[1], 0); + std::iota(tmp2.begin(), tmp2.end(), 0); + std::shuffle(tmp2.begin(), tmp2.end(), rng); + std::vector tmp3(x->get_size()[0] / 10); + std::uniform_int_distribution row_dist(0, x->get_size()[0] - 1); + for (auto &i : tmp3) { + i = row_dist(rng); + } + rpermute_idxs = + std::unique_ptr(new Arr{ref, tmp.begin(), tmp.end()}); + cpermute_idxs = + std::unique_ptr(new Arr{ref, tmp2.begin(), tmp2.end()}); + rgather_idxs = + std::unique_ptr(new Arr{ref, tmp3.begin(), tmp3.end()}); + } + + template + std::unique_ptr convert(InputType &&input) + { + auto result = ConvertedType::create(input->get_executor()); + input->convert_to(result.get()); + return result; + } + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + std::ranlux48 rand_engine; + + std::unique_ptr x; + std::unique_ptr c_x; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + std::unique_ptr expected; + std::unique_ptr square; + std::unique_ptr dresult; + std::unique_ptr dx; + std::unique_ptr dc_x; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; + std::unique_ptr dsquare; + std::unique_ptr rpermute_idxs; + std::unique_ptr cpermute_idxs; + std::unique_ptr rgather_idxs; +}; + + +TEST_F(Dense, SingleVectorDpcppComputeDotIsEquivalentToRef) +{ + set_up_vector_data(1); + + x->compute_dot(y.get(), expected.get()); + dx->compute_dot(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Dense, MultipleVectorDpcppComputeDotIsEquivalentToRef) +{ + set_up_vector_data(20); + + x->compute_dot(y.get(), expected.get()); + dx->compute_dot(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Dense, SingleVectorDpcppComputeConjDotIsEquivalentToRef) +{ + set_up_vector_data(1); + + x->compute_conj_dot(y.get(), expected.get()); + dx->compute_conj_dot(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Dense, MultipleVectorDpcppComputeConjDotIsEquivalentToRef) +{ + set_up_vector_data(20); + + x->compute_conj_dot(y.get(), expected.get()); + dx->compute_conj_dot(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Dense, DpcppComputeNorm2IsEquivalentToRef) +{ + set_up_vector_data(20); + auto norm_size = gko::dim<2>{1, x->get_size()[1]}; + auto norm_expected = NormVector::create(this->ref, norm_size); + auto dnorm = NormVector::create(this->dpcpp, norm_size); + + x->compute_norm2(norm_expected.get()); + dx->compute_norm2(dnorm.get()); + + GKO_ASSERT_MTX_NEAR(norm_expected, dnorm, r::value); +} + + +TEST_F(Dense, SimpleApplyIsEquivalentToRef) +{ + set_up_apply_data(); + + x->apply(y.get(), expected.get()); + dx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Dense, SimpleApplyMixedIsEquivalentToRef) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + + x->apply(convert(y).get(), convert(expected).get()); + dx->apply(convert(dy).get(), convert(dresult).get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-7); +} + + +TEST_F(Dense, AdvancedApplyIsEquivalentToRef) +{ + set_up_apply_data(); + + x->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Dense, AdvancedApplyMixedIsEquivalentToRef) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + + x->apply(convert(alpha).get(), convert(y).get(), + convert(beta).get(), convert(expected).get()); + dx->apply(convert(dalpha).get(), convert(dy).get(), + convert(dbeta).get(), convert(dresult).get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-7); +} + + +TEST_F(Dense, ApplyToComplexIsEquivalentToRef) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = ComplexMtx::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = ComplexMtx::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + x->apply(complex_b.get(), complex_x.get()); + dx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Dense, ApplyToMixedComplexIsEquivalentToRef) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = MixedComplexMtx::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = MixedComplexMtx::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + x->apply(complex_b.get(), complex_x.get()); + dx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-7); +} + + +TEST_F(Dense, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = ComplexMtx::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = ComplexMtx::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + x->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Dense, AdvancedApplyToMixedComplexIsEquivalentToRef) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = MixedComplexMtx::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = MixedComplexMtx::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + x->apply(convert(alpha).get(), complex_b.get(), + convert(beta).get(), complex_x.get()); + dx->apply(convert(dalpha).get(), dcomplex_b.get(), + convert(dbeta).get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-7); +} + + +TEST_F(Dense, ComputeDotComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(1234, 2); + auto dcomplex_b = ComplexMtx::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(1234, 2); + auto dcomplex_x = ComplexMtx::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + auto result = ComplexMtx::create(ref, gko::dim<2>{1, 2}); + auto dresult = ComplexMtx::create(dpcpp, gko::dim<2>{1, 2}); + + complex_b->compute_dot(complex_x.get(), result.get()); + dcomplex_b->compute_dot(dcomplex_x.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(result, dresult, r::value); +} + + +TEST_F(Dense, ComputeConjDotComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(1234, 2); + auto dcomplex_b = ComplexMtx::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(1234, 2); + auto dcomplex_x = ComplexMtx::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + auto result = ComplexMtx::create(ref, gko::dim<2>{1, 2}); + auto dresult = ComplexMtx::create(dpcpp, gko::dim<2>{1, 2}); + + complex_b->compute_conj_dot(complex_x.get(), result.get()); + dcomplex_b->compute_conj_dot(dcomplex_x.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(result, dresult, r::value); +} + + +TEST_F(Dense, IsTransposable) +{ + set_up_apply_data(); + + auto trans = x->transpose(); + auto dtrans = dx->transpose(); + + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); +} + + +TEST_F(Dense, IsConjugateTransposable) +{ + set_up_apply_data(); + + auto trans = c_x->conj_transpose(); + auto dtrans = dc_x->conj_transpose(); + + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); +} + + +TEST_F(Dense, ConvertToCooIsEquivalentToRef) +{ + set_up_apply_data(); + auto coo_mtx = gko::matrix::Coo::create(ref); + auto dcoo_mtx = gko::matrix::Coo::create(dpcpp); + + x->convert_to(coo_mtx.get()); + dx->convert_to(dcoo_mtx.get()); + + ASSERT_EQ(dcoo_mtx->get_num_stored_elements(), + coo_mtx->get_num_stored_elements()); + GKO_ASSERT_MTX_NEAR(dcoo_mtx.get(), coo_mtx.get(), r::value); +} + + +TEST_F(Dense, MoveToCooIsEquivalentToRef) +{ + set_up_apply_data(); + auto coo_mtx = gko::matrix::Coo::create(ref); + auto dcoo_mtx = gko::matrix::Coo::create(dpcpp); + + x->move_to(coo_mtx.get()); + dx->move_to(dcoo_mtx.get()); + + ASSERT_EQ(dcoo_mtx->get_num_stored_elements(), + coo_mtx->get_num_stored_elements()); + GKO_ASSERT_MTX_NEAR(dcoo_mtx.get(), coo_mtx.get(), r::value); +} + + +TEST_F(Dense, ConvertToCsrIsEquivalentToRef) +{ + set_up_apply_data(); + auto csr_mtx = gko::matrix::Csr::create(ref); + auto dcsr_mtx = gko::matrix::Csr::create(dpcpp); + + x->convert_to(csr_mtx.get()); + dx->convert_to(dcsr_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dcsr_mtx.get(), csr_mtx.get(), r::value); +} + + +TEST_F(Dense, MoveToCsrIsEquivalentToRef) +{ + set_up_apply_data(); + auto csr_mtx = gko::matrix::Csr::create(ref); + auto dcsr_mtx = gko::matrix::Csr::create(dpcpp); + + x->move_to(csr_mtx.get()); + dx->move_to(dcsr_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dcsr_mtx.get(), csr_mtx.get(), r::value); +} + + +TEST_F(Dense, ConvertToEllIsEquivalentToRef) +{ + set_up_apply_data(); + auto ell_mtx = gko::matrix::Ell::create(ref); + auto dell_mtx = gko::matrix::Ell::create(dpcpp); + + x->convert_to(ell_mtx.get()); + dx->convert_to(dell_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dell_mtx.get(), ell_mtx.get(), r::value); +} + + +TEST_F(Dense, MoveToEllIsEquivalentToRef) +{ + set_up_apply_data(); + auto ell_mtx = gko::matrix::Ell::create(ref); + auto dell_mtx = gko::matrix::Ell::create(dpcpp); + + x->move_to(ell_mtx.get()); + dx->move_to(dell_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dell_mtx.get(), ell_mtx.get(), r::value); +} + + +TEST_F(Dense, ConvertToSellpIsEquivalentToRef) +{ + set_up_apply_data(); + auto sellp_mtx = gko::matrix::Sellp::create(ref); + auto dsellp_mtx = gko::matrix::Sellp::create(dpcpp); + + x->convert_to(sellp_mtx.get()); + dx->convert_to(dsellp_mtx.get()); + + GKO_ASSERT_MTX_NEAR(sellp_mtx, dsellp_mtx, r::value); +} + + +TEST_F(Dense, MoveToSellpIsEquivalentToRef) +{ + set_up_apply_data(); + auto sellp_mtx = gko::matrix::Sellp::create(ref); + auto dsellp_mtx = gko::matrix::Sellp::create(dpcpp); + + x->move_to(sellp_mtx.get()); + dx->move_to(dsellp_mtx.get()); + + GKO_ASSERT_MTX_NEAR(sellp_mtx, dsellp_mtx, r::value); +} + + +TEST_F(Dense, ConvertsEmptyToSellp) +{ + auto dempty_mtx = Mtx::create(dpcpp); + auto dsellp_mtx = gko::matrix::Sellp::create(dpcpp); + + dempty_mtx->convert_to(dsellp_mtx.get()); + + ASSERT_EQ(dpcpp->copy_val_to_host(dsellp_mtx->get_const_slice_sets()), 0); + ASSERT_FALSE(dsellp_mtx->get_size()); +} + + +TEST_F(Dense, CountNNZIsEquivalentToRef) +{ + set_up_apply_data(); + gko::size_type nnz; + gko::size_type dnnz; + + gko::kernels::reference::dense::count_nonzeros(ref, x.get(), &nnz); + gko::kernels::dpcpp::dense::count_nonzeros(dpcpp, dx.get(), &dnnz); + + ASSERT_EQ(nnz, dnnz); +} + + +TEST_F(Dense, CalculateNNZPerRowIsEquivalentToRef) +{ + set_up_apply_data(); + gko::Array nnz_per_row(ref); + nnz_per_row.resize_and_reset(x->get_size()[0]); + gko::Array dnnz_per_row(dpcpp); + dnnz_per_row.resize_and_reset(dx->get_size()[0]); + + gko::kernels::reference::dense::calculate_nonzeros_per_row(ref, x.get(), + &nnz_per_row); + gko::kernels::dpcpp::dense::calculate_nonzeros_per_row(dpcpp, dx.get(), + &dnnz_per_row); + + auto tmp = gko::Array(ref, dnnz_per_row); + for (gko::size_type i = 0; i < nnz_per_row.get_num_elems(); i++) { + ASSERT_EQ(nnz_per_row.get_const_data()[i], tmp.get_const_data()[i]); + } +} + + +TEST_F(Dense, CalculateMaxNNZPerRowIsEquivalentToRef) +{ + set_up_apply_data(); + gko::size_type max_nnz; + gko::size_type dmax_nnz; + + gko::kernels::reference::dense::calculate_max_nnz_per_row(ref, x.get(), + &max_nnz); + gko::kernels::dpcpp::dense::calculate_max_nnz_per_row(dpcpp, dx.get(), + &dmax_nnz); + + ASSERT_EQ(max_nnz, dmax_nnz); +} + + +TEST_F(Dense, CalculateTotalColsIsEquivalentToRef) +{ + set_up_apply_data(); + gko::size_type total_cols; + gko::size_type dtotal_cols; + + gko::kernels::reference::dense::calculate_total_cols( + ref, x.get(), &total_cols, 2, gko::matrix::default_slice_size); + gko::kernels::dpcpp::dense::calculate_total_cols( + dpcpp, dx.get(), &dtotal_cols, 2, gko::matrix::default_slice_size); + + ASSERT_EQ(total_cols, dtotal_cols); +} + + +} // namespace diff --git a/dpcpp/test/matrix/diagonal_kernels.cpp b/dpcpp/test/matrix/diagonal_kernels.cpp new file mode 100644 index 00000000000..352c5e40823 --- /dev/null +++ b/dpcpp/test/matrix/diagonal_kernels.cpp @@ -0,0 +1,283 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include +#include +#include + + +#include + + +#include +#include + + +#include "core/matrix/diagonal_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +class Diagonal : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using vtype = float; +#else + using vtype = double; +#endif // GINKGO_DPCPP_SINGLE_MODE + using ValueType = vtype; + using ComplexValueType = std::complex; + using Csr = gko::matrix::Csr; + using Diag = gko::matrix::Diagonal; + using Dense = gko::matrix::Dense; + using ComplexDiag = gko::matrix::Diagonal; + + Diagonal() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 231), +#else + : mtx_size(532, 231), +#endif + rand_engine(42) + {} + + void SetUp() + { + ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(min_nnz_row, num_cols), + std::normal_distribution(0.0, 1.0), rand_engine, ref); + } + + std::unique_ptr gen_diag(int size) + { + auto diag = Diag::create(ref, size); + auto vals = diag->get_values(); + auto value_dist = std::normal_distribution(0.0, 1.0); + for (int i = 0; i < size; i++) { + vals[i] = gko::test::detail::get_rand_value(value_dist, + rand_engine); + } + return diag; + } + + std::unique_ptr gen_cdiag(int size) + { + auto cdiag = ComplexDiag::create(ref, size); + auto vals = cdiag->get_values(); + auto value_dist = std::normal_distribution(0.0, 1.0); + for (int i = 0; i < size; i++) { + vals[i] = ComplexValueType{ + gko::test::detail::get_rand_value( + value_dist, rand_engine)}; + } + return cdiag; + } + + void set_up_apply_data() + { + diag = gen_diag(mtx_size[0]); + ddiag = Diag::create(dpcpp); + ddiag->copy_from(diag.get()); + dense1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[1]); + dense2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[0]); + denseexpected1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[1]); + denseexpected2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[0]); + ddense1 = Dense::create(dpcpp); + ddense1->copy_from(dense1.get()); + ddense2 = Dense::create(dpcpp); + ddense2->copy_from(dense2.get()); + denseresult1 = Dense::create(dpcpp); + denseresult1->copy_from(denseexpected1.get()); + denseresult2 = Dense::create(dpcpp); + denseresult2->copy_from(denseexpected2.get()); + csr1 = gen_mtx(mtx_size[0], mtx_size[1], 1); + csr2 = gen_mtx(mtx_size[1], mtx_size[0], 1); + csrexpected1 = gen_mtx(mtx_size[0], mtx_size[1], 1); + csrexpected2 = gen_mtx(mtx_size[1], mtx_size[0], 1); + dcsr1 = Csr::create(dpcpp); + dcsr1->copy_from(csr1.get()); + dcsr2 = Csr::create(dpcpp); + dcsr2->copy_from(csr2.get()); + csrresult1 = Csr::create(dpcpp); + csrresult1->copy_from(csrexpected1.get()); + csrresult2 = Csr::create(dpcpp); + csrresult2->copy_from(csrexpected2.get()); + } + + void set_up_complex_data() + { + cdiag = gen_cdiag(mtx_size[0]); + dcdiag = ComplexDiag::create(dpcpp); + dcdiag->copy_from(cdiag.get()); + } + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + const gko::dim<2> mtx_size; + std::ranlux48 rand_engine; + + std::unique_ptr diag; + std::unique_ptr ddiag; + std::unique_ptr cdiag; + std::unique_ptr dcdiag; + + std::unique_ptr dense1; + std::unique_ptr dense2; + std::unique_ptr denseexpected1; + std::unique_ptr denseexpected2; + std::unique_ptr denseresult1; + std::unique_ptr denseresult2; + std::unique_ptr ddense1; + std::unique_ptr ddense2; + std::unique_ptr csr1; + std::unique_ptr csr2; + std::unique_ptr dcsr1; + std::unique_ptr dcsr2; + std::unique_ptr csrexpected1; + std::unique_ptr csrexpected2; + std::unique_ptr csrresult1; + std::unique_ptr csrresult2; +}; + + +TEST_F(Diagonal, ApplyToDenseIsEquivalentToRef) +{ + set_up_apply_data(); + + diag->apply(dense1.get(), denseexpected1.get()); + ddiag->apply(ddense1.get(), denseresult1.get()); + + GKO_ASSERT_MTX_NEAR(denseexpected1, denseresult1, r::value); +} + + +TEST_F(Diagonal, RightApplyToDenseIsEquivalentToRef) +{ + set_up_apply_data(); + + diag->rapply(dense2.get(), denseexpected2.get()); + ddiag->rapply(ddense2.get(), denseresult2.get()); + + GKO_ASSERT_MTX_NEAR(denseexpected2, denseresult2, r::value); +} + + +TEST_F(Diagonal, ApplyToCsrIsEquivalentToRef) +{ + set_up_apply_data(); + + diag->apply(csr1.get(), csrexpected1.get()); + ddiag->apply(dcsr1.get(), csrresult1.get()); + + GKO_ASSERT_MTX_NEAR(csrexpected1, csrresult1, r::value); +} + + +TEST_F(Diagonal, RightApplyToCsrIsEquivalentToRef) +{ + set_up_apply_data(); + + diag->rapply(csr2.get(), csrexpected2.get()); + ddiag->rapply(dcsr2.get(), csrresult2.get()); + + GKO_ASSERT_MTX_NEAR(csrexpected2, csrresult2, r::value); +} + + +TEST_F(Diagonal, ConvertToCsrIsEquivalentToRef) +{ + set_up_apply_data(); + + diag->convert_to(csr1.get()); + ddiag->convert_to(dcsr1.get()); + + GKO_ASSERT_MTX_NEAR(csr1, dcsr1, 0); +} + + +TEST_F(Diagonal, ConjTransposeIsEquivalentToRef) +{ + set_up_complex_data(); + + auto trans = cdiag->conj_transpose(); + auto trans_diag = static_cast(trans.get()); + auto dtrans = dcdiag->conj_transpose(); + auto dtrans_diag = static_cast(dtrans.get()); + + GKO_ASSERT_MTX_NEAR(trans_diag, dtrans_diag, 0); +} + + +TEST_F(Diagonal, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + diag->compute_absolute_inplace(); + ddiag->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(diag, ddiag, r::value); +} + + +TEST_F(Diagonal, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + auto abs_diag = diag->compute_absolute(); + auto dabs_diag = ddiag->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_diag, dabs_diag, r::value); +} + + +} // namespace diff --git a/dpcpp/test/matrix/ell_kernels.cpp b/dpcpp/test/matrix/ell_kernels.cpp new file mode 100644 index 00000000000..3b735aa686e --- /dev/null +++ b/dpcpp/test/matrix/ell_kernels.cpp @@ -0,0 +1,665 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/ell_kernels.hpp" +#include "core/test/utils.hpp" +#include "dpcpp/test/utils.hpp" + + +namespace { + + +class Ell : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using vtype = float; +#else + using vtype = double; +#endif // GINKGO_DPCPP_SINGLE_MODE + using Mtx = gko::matrix::Ell; + using Vec = gko::matrix::Dense; + using Vec2 = gko::matrix::Dense; + using ComplexVec = gko::matrix::Dense>; + + Ell() + : rand_engine(42), size{532, 231}, num_els_rowwise{300}, ell_stride{600} + {} + + void SetUp() + { + ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, std::uniform_int_distribution<>(1, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_data(int num_rows = 532, int num_cols = 231, + int num_vectors = 1, + int num_stored_elements_per_row = 0, int stride = 0) + { + mtx = Mtx::create(ref, gko::dim<2>{}, num_stored_elements_per_row, + stride); + mtx->copy_from(gen_mtx(num_rows, num_cols)); + expected = gen_mtx(num_rows, num_vectors); + expected2 = Vec2::create(ref); + expected2->copy_from(expected.get()); + y = gen_mtx(num_cols, num_vectors); + y2 = Vec2::create(ref); + y2->copy_from(y.get()); + alpha = gko::initialize({2.0}, ref); + alpha2 = gko::initialize({2.0}, ref); + beta = gko::initialize({-1.0}, ref); + beta2 = gko::initialize({-1.0}, ref); + dmtx = Mtx::create(dpcpp); + dmtx->copy_from(mtx.get()); + dresult = Vec::create(dpcpp); + dresult->copy_from(expected.get()); + dresult2 = Vec2::create(dpcpp); + dresult2->copy_from(expected2.get()); + dy = Vec::create(dpcpp); + dy->copy_from(y.get()); + dy2 = Vec2::create(dpcpp); + dy2->copy_from(y2.get()); + dalpha = Vec::create(dpcpp); + dalpha->copy_from(alpha.get()); + dalpha2 = Vec2::create(dpcpp); + dalpha2->copy_from(alpha2.get()); + dbeta = Vec::create(dpcpp); + dbeta->copy_from(beta.get()); + dbeta2 = Vec2::create(dpcpp); + dbeta2->copy_from(beta2.get()); + } + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + std::ranlux48 rand_engine; + gko::dim<2> size; + gko::size_type num_els_rowwise; + gko::size_type ell_stride; + + std::unique_ptr mtx; + std::unique_ptr expected; + std::unique_ptr expected2; + std::unique_ptr y; + std::unique_ptr y2; + std::unique_ptr alpha; + std::unique_ptr alpha2; + std::unique_ptr beta; + std::unique_ptr beta2; + + std::unique_ptr dmtx; + std::unique_ptr dresult; + std::unique_ptr dresult2; + std::unique_ptr dy; + std::unique_ptr dy2; + std::unique_ptr dalpha; + std::unique_ptr dalpha2; + std::unique_ptr dbeta; + std::unique_ptr dbeta2; +}; + + +TEST_F(Ell, SimpleApplyIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, MixedSimpleApplyIsEquivalentToRef1) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + + mtx->apply(y2.get(), expected2.get()); + dmtx->apply(dy2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedSimpleApplyIsEquivalentToRef2) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + + mtx->apply(y2.get(), expected.get()); + dmtx->apply(dy2.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedSimpleApplyIsEquivalentToRef3) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + + mtx->apply(y.get(), expected2.get()); + dmtx->apply(dy.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, AdvancedApplyIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, MixedAdvancedApplyIsEquivalentToRef1) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + + mtx->apply(alpha2.get(), y2.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedAdvancedApplyIsEquivalentToRef2) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + + mtx->apply(alpha2.get(), y2.get(), beta.get(), expected.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedAdvancedApplyIsEquivalentToRef3) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(); + + mtx->apply(alpha.get(), y.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, SimpleApplyWithStrideIsEquivalentToRef) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideIsEquivalentToRef1) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected2.get()); + dmtx->apply(dy2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideIsEquivalentToRef2) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected.get()); + dmtx->apply(dy2.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideIsEquivalentToRef3) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(y.get(), expected2.get()); + dmtx->apply(dy.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, AdvancedApplyWithStrideIsEquivalentToRef) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideIsEquivalentToRef1) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideIsEquivalentToRef2) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta.get(), expected.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideIsEquivalentToRef3) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(alpha.get(), y.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, SimpleApplyWithStrideToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideToDenseMatrixIsEquivalentToRef1) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected2.get()); + dmtx->apply(dy2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideToDenseMatrixIsEquivalentToRef2) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected.get()); + dmtx->apply(dy2.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideToDenseMatrixIsEquivalentToRef3) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(y.get(), expected2.get()); + dmtx->apply(dy.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, AdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef1) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef2) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta.get(), expected.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef3) +{ + SKIP_IF_SINGLE_MODE; + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(alpha.get(), y.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, SimpleApplyByAtomicIsEquivalentToRef) +{ + set_up_apply_data(10, 10000); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, AdvancedByAtomicApplyIsEquivalentToRef) +{ + set_up_apply_data(10, 10000); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, SimpleApplyByAtomicToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(10, 10000, 3); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, AdvancedByAtomicToDenseMatrixApplyIsEquivalentToRef) +{ + set_up_apply_data(10, 10000, 3); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, SimpleApplyOnSmallMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, 10); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 5 * r::value); +} + + +TEST_F(Ell, AdvancedApplyOnSmallMatrixToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, 10, 3); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, SimpleApplyOnSmallMatrixToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, 10, 3); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, AdvancedApplyOnSmallMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, 10); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value * 5); +} + + +TEST_F(Ell, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(size[1], 3); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(size[0], 3); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Ell, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(size[1], 3); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(size[0], 3); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Ell, ConvertToDenseIsEquivalentToRef) +{ + set_up_apply_data(); + + auto dense_mtx = gko::matrix::Dense::create(ref); + auto ddense_mtx = gko::matrix::Dense::create(dpcpp); + + mtx->convert_to(dense_mtx.get()); + dmtx->convert_to(ddense_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), r::value); +} + + +TEST_F(Ell, ConvertToCsrIsEquivalentToRef) +{ + set_up_apply_data(); + + auto csr_mtx = gko::matrix::Csr::create(ref); + auto dcsr_mtx = gko::matrix::Csr::create(dpcpp); + + mtx->convert_to(csr_mtx.get()); + dmtx->convert_to(dcsr_mtx.get()); + + GKO_ASSERT_MTX_NEAR(csr_mtx.get(), dcsr_mtx.get(), r::value); +} + + +TEST_F(Ell, CalculateNNZPerRowIsEquivalentToRef) +{ + set_up_apply_data(); + + gko::Array nnz_per_row; + nnz_per_row.set_executor(ref); + nnz_per_row.resize_and_reset(mtx->get_size()[0]); + + gko::Array dnnz_per_row; + dnnz_per_row.set_executor(dpcpp); + dnnz_per_row.resize_and_reset(dmtx->get_size()[0]); + + gko::kernels::reference::ell::calculate_nonzeros_per_row(ref, mtx.get(), + &nnz_per_row); + gko::kernels::dpcpp::ell::calculate_nonzeros_per_row(dpcpp, dmtx.get(), + &dnnz_per_row); + + auto tmp = gko::Array(ref, dnnz_per_row); + for (auto i = 0; i < nnz_per_row.get_num_elems(); i++) { + ASSERT_EQ(nnz_per_row.get_const_data()[i], tmp.get_const_data()[i]); + } +} + + +TEST_F(Ell, CountNNZIsEquivalentToRef) +{ + set_up_apply_data(); + + gko::size_type nnz; + gko::size_type dnnz; + + gko::kernels::reference::ell::count_nonzeros(ref, mtx.get(), &nnz); + gko::kernels::dpcpp::ell::count_nonzeros(dpcpp, dmtx.get(), &dnnz); + + ASSERT_EQ(nnz, dnnz); +} + + +TEST_F(Ell, ExtractDiagonalIsEquivalentToRef) +{ + set_up_apply_data(); + + auto diag = mtx->extract_diagonal(); + auto ddiag = dmtx->extract_diagonal(); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + +TEST_F(Ell, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, r::value); +} + + +TEST_F(Ell, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, r::value); +} + + +} // namespace diff --git a/dpcpp/test/matrix/fbcsr_kernels.cpp b/dpcpp/test/matrix/fbcsr_kernels.cpp new file mode 100644 index 00000000000..5b2dcf19628 --- /dev/null +++ b/dpcpp/test/matrix/fbcsr_kernels.cpp @@ -0,0 +1,95 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include "core/test/matrix/fbcsr_sample.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +class Fbcsr : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using vtype = float; +#else + using vtype = double; +#endif // GINKGO_DPCPP_SINGLE_MODE + using Mtx = gko::matrix::Fbcsr; + + void SetUp() + { + ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + std::unique_ptr mtx; +}; + + +TEST_F(Fbcsr, CanWriteFromMatrixOnDevice) +{ + using value_type = Mtx::value_type; + using index_type = Mtx::index_type; + using MatData = gko::matrix_data; + gko::testing::FbcsrSample sample(ref); + auto refmat = sample.generate_fbcsr(); + auto dpcppmat = Mtx::create(dpcpp); + dpcppmat->copy_from(gko::lend(refmat)); + MatData refdata; + MatData dpcppdata; + + refmat->write(refdata); + dpcppmat->write(dpcppdata); + + ASSERT_TRUE(refdata.nonzeros == dpcppdata.nonzeros); +} + + +} // namespace diff --git a/dpcpp/test/matrix/hybrid_kernels.cpp b/dpcpp/test/matrix/hybrid_kernels.cpp new file mode 100644 index 00000000000..016c9c9e608 --- /dev/null +++ b/dpcpp/test/matrix/hybrid_kernels.cpp @@ -0,0 +1,302 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include +#include +#include + + +#include "core/matrix/hybrid_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +class Hybrid : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using vtype = float; +#else + using vtype = double; +#endif // GINKGO_DPCPP_SINGLE_MODE + using Mtx = gko::matrix::Hybrid; + using Vec = gko::matrix::Dense; + using ComplexVec = gko::matrix::Dense>; + + Hybrid() : rand_engine(42) {} + + void SetUp() + { + ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(min_nnz_row, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_data(int num_vectors = 1, + std::shared_ptr strategy = + std::make_shared()) + { + mtx = Mtx::create(ref, strategy); + mtx->copy_from(gen_mtx(532, 231, 1)); + expected = gen_mtx(532, num_vectors, 1); + y = gen_mtx(231, num_vectors, 1); + alpha = gko::initialize({2.0}, ref); + beta = gko::initialize({-1.0}, ref); + dmtx = Mtx::create(dpcpp, strategy); + dmtx->copy_from(mtx.get()); + dresult = Vec::create(dpcpp); + dresult->copy_from(expected.get()); + dy = Vec::create(dpcpp); + dy->copy_from(y.get()); + dalpha = Vec::create(dpcpp); + dalpha->copy_from(alpha.get()); + dbeta = Vec::create(dpcpp); + dbeta->copy_from(beta.get()); + } + + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + std::ranlux48 rand_engine; + + std::unique_ptr mtx; + std::unique_ptr expected; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + + std::unique_ptr dmtx; + std::unique_ptr dresult; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; +}; + + +TEST_F(Hybrid, SubMatrixExecutorAfterCopyIsEquivalentToExcutor) +{ + set_up_apply_data(); + + auto coo_mtx = dmtx->get_coo(); + auto ell_mtx = dmtx->get_ell(); + + ASSERT_EQ(coo_mtx->get_executor(), dpcpp); + ASSERT_EQ(ell_mtx->get_executor(), dpcpp); + ASSERT_EQ(dmtx->get_executor(), dpcpp); +} + + +TEST_F(Hybrid, SimpleApplyIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Hybrid, AdvancedApplyIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Hybrid, SimpleApplyToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(3); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Hybrid, AdvancedApplyToDenseMatrixIsEquivalentToRef) +{ + set_up_apply_data(3); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Hybrid, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3, 1); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3, 1); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Hybrid, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3, 1); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3, 1); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Hybrid, CountNonzerosIsEquivalentToRef) +{ + set_up_apply_data(); + gko::size_type nonzeros; + gko::size_type dnonzeros; + + gko::kernels::reference::hybrid::count_nonzeros(ref, mtx.get(), &nonzeros); + gko::kernels::dpcpp::hybrid::count_nonzeros(dpcpp, dmtx.get(), &dnonzeros); + + ASSERT_EQ(nonzeros, dnonzeros); +} + + +TEST_F(Hybrid, ConvertToCsrIsEquivalentToRef) +{ + set_up_apply_data(1, std::make_shared(2)); + auto csr_mtx = gko::matrix::Csr::create(ref); + auto dcsr_mtx = gko::matrix::Csr::create(dpcpp); + + mtx->convert_to(csr_mtx.get()); + dmtx->convert_to(dcsr_mtx.get()); + + GKO_ASSERT_MTX_NEAR(csr_mtx.get(), dcsr_mtx.get(), r::value); +} + + +TEST_F(Hybrid, MoveToCsrIsEquivalentToRef) +{ + set_up_apply_data(1, std::make_shared(2)); + auto csr_mtx = gko::matrix::Csr::create(ref); + auto dcsr_mtx = gko::matrix::Csr::create(dpcpp); + + mtx->move_to(csr_mtx.get()); + dmtx->move_to(dcsr_mtx.get()); + + GKO_ASSERT_MTX_NEAR(csr_mtx.get(), dcsr_mtx.get(), r::value); +} + + +TEST_F(Hybrid, ExtractDiagonalIsEquivalentToRef) +{ + set_up_apply_data(); + + auto diag = mtx->extract_diagonal(); + auto ddiag = dmtx->extract_diagonal(); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + +TEST_F(Hybrid, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, r::value); +} + + +TEST_F(Hybrid, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, std::make_shared(2)); + using AbsMtx = gko::remove_complex; + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + auto abs_strategy = gko::as(abs_mtx->get_strategy()); + auto dabs_strategy = + gko::as(dabs_mtx->get_strategy()); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, r::value); + GKO_ASSERT_EQ(abs_strategy->get_num_columns(), + dabs_strategy->get_num_columns()); + GKO_ASSERT_EQ(abs_strategy->get_num_columns(), 2); +} + + +} // namespace diff --git a/dpcpp/test/matrix/sellp_kernels.cpp b/dpcpp/test/matrix/sellp_kernels.cpp new file mode 100644 index 00000000000..4fd072227bd --- /dev/null +++ b/dpcpp/test/matrix/sellp_kernels.cpp @@ -0,0 +1,367 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/sellp_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +class Sellp : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using vtype = float; +#else + using vtype = double; +#endif // GINKGO_DPCPP_SINGLE_MODE + using Mtx = gko::matrix::Sellp; + using Vec = gko::matrix::Dense; + using ComplexVec = gko::matrix::Dense>; + + Sellp() : rand_engine(42) {} + + void SetUp() + { + ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + template + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, std::uniform_int_distribution<>(1, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_matrix( + int total_cols = 1, int slice_size = gko::matrix::default_slice_size, + int stride_factor = gko::matrix::default_stride_factor) + { + mtx = Mtx::create(ref); + mtx->copy_from(gen_mtx(532, 231)); + empty = Mtx::create(ref); + expected = gen_mtx(532, total_cols); + y = gen_mtx(231, total_cols); + alpha = gko::initialize({2.0}, ref); + beta = gko::initialize({-1.0}, ref); + dmtx = Mtx::create(dpcpp); + dmtx->copy_from(mtx.get()); + dempty = Mtx::create(dpcpp); + dresult = Vec::create(dpcpp); + dresult->copy_from(expected.get()); + dy = Vec::create(dpcpp); + dy->copy_from(y.get()); + dalpha = Vec::create(dpcpp); + dalpha->copy_from(alpha.get()); + dbeta = Vec::create(dpcpp); + dbeta->copy_from(beta.get()); + } + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + std::ranlux48 rand_engine; + + std::unique_ptr mtx; + std::unique_ptr empty; + std::unique_ptr expected; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + + std::unique_ptr dmtx; + std::unique_ptr dempty; + std::unique_ptr dresult; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; +}; + + +TEST_F(Sellp, SimpleApplyIsEquivalentToRef) +{ + set_up_apply_matrix(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Sellp, AdvancedApplyIsEquivalentToRef) +{ + set_up_apply_matrix(); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Sellp, SimpleApplyWithSliceSizeAndStrideFactorIsEquivalentToRef) +{ + set_up_apply_matrix(1, 32, 2); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Sellp, AdvancedApplyWithSliceSizeAndStrideFActorIsEquivalentToRef) +{ + set_up_apply_matrix(1, 32, 2); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Sellp, SimpleApplyMultipleRHSIsEquivalentToRef) +{ + set_up_apply_matrix(64); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Sellp, AdvancedApplyMultipleRHSIsEquivalentToRef) +{ + set_up_apply_matrix(64); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Sellp, + SimpleApplyMultipleRHSWithSliceSizeAndStrideFactorIsEquivalentToRef) +{ + set_up_apply_matrix(32, 2); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Sellp, + AdvancedApplyMultipleRHSWithSliceSizeAndStrideFActorIsEquivalentToRef) +{ + set_up_apply_matrix(32, 2); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Sellp, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_matrix(64); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Sellp, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_matrix(64); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(dpcpp); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(dpcpp); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, r::value); +} + + +TEST_F(Sellp, ConvertToDenseIsEquivalentToRef) +{ + set_up_apply_matrix(64); + auto dense_mtx = gko::matrix::Dense::create(ref); + auto ddense_mtx = gko::matrix::Dense::create(dpcpp); + + mtx->convert_to(dense_mtx.get()); + dmtx->convert_to(ddense_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), r::value); +} + + +TEST_F(Sellp, ConvertToCsrIsEquivalentToRef) +{ + set_up_apply_matrix(64); + auto csr_mtx = gko::matrix::Csr::create(ref); + auto dcsr_mtx = gko::matrix::Csr::create(dpcpp); + + mtx->convert_to(csr_mtx.get()); + dmtx->convert_to(dcsr_mtx.get()); + + GKO_ASSERT_MTX_NEAR(csr_mtx.get(), dcsr_mtx.get(), r::value); +} + + +TEST_F(Sellp, ConvertEmptyToDenseIsEquivalentToRef) +{ + set_up_apply_matrix(64); + auto dense_mtx = gko::matrix::Dense::create(ref); + auto ddense_mtx = gko::matrix::Dense::create(dpcpp); + + empty->convert_to(dense_mtx.get()); + dempty->convert_to(ddense_mtx.get()); + + GKO_ASSERT_MTX_NEAR(dense_mtx.get(), ddense_mtx.get(), 0); +} + + +TEST_F(Sellp, ConvertEmptyToCsrIsEquivalentToRef) +{ + set_up_apply_matrix(64); + auto csr_mtx = gko::matrix::Csr::create(ref); + auto dcsr_mtx = gko::matrix::Csr::create(dpcpp); + + empty->convert_to(csr_mtx.get()); + dempty->convert_to(dcsr_mtx.get()); + + GKO_ASSERT_MTX_NEAR(csr_mtx.get(), dcsr_mtx.get(), 0); +} + + +TEST_F(Sellp, CountNonzerosIsEquivalentToRef) +{ + set_up_apply_matrix(64); + gko::size_type nnz; + gko::size_type dnnz; + + gko::kernels::reference::sellp::count_nonzeros(ref, mtx.get(), &nnz); + gko::kernels::dpcpp::sellp::count_nonzeros(dpcpp, dmtx.get(), &dnnz); + + ASSERT_EQ(nnz, dnnz); +} + + +TEST_F(Sellp, ExtractDiagonalIsEquivalentToRef) +{ + set_up_apply_matrix(64); + + auto diag = mtx->extract_diagonal(); + auto ddiag = dmtx->extract_diagonal(); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + +TEST_F(Sellp, ExtractDiagonalWithSliceSizeAndStrideFactorIsEquivalentToRef) +{ + set_up_apply_matrix(64, 32, 2); + + auto diag = mtx->extract_diagonal(); + auto ddiag = dmtx->extract_diagonal(); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + +TEST_F(Sellp, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_matrix(64, 32, 2); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, r::value); +} + + +TEST_F(Sellp, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_matrix(64, 32, 2); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, r::value); +} + + +} // namespace diff --git a/dpcpp/test/solver/CMakeLists.txt b/dpcpp/test/solver/CMakeLists.txt new file mode 100644 index 00000000000..b4f0e4059ca --- /dev/null +++ b/dpcpp/test/solver/CMakeLists.txt @@ -0,0 +1,3 @@ +ginkgo_create_test(gmres_kernels) +ginkgo_create_test(cb_gmres_kernels) +ginkgo_create_test(idr_kernels) diff --git a/dpcpp/test/solver/cb_gmres_kernels.cpp b/dpcpp/test/solver/cb_gmres_kernels.cpp new file mode 100644 index 00000000000..5ebbe3a810e --- /dev/null +++ b/dpcpp/test/solver/cb_gmres_kernels.cpp @@ -0,0 +1,375 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/solver/cb_gmres_accessor.hpp" +#include "core/solver/cb_gmres_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +class CbGmres : public ::testing::Test { +protected: +// the CbGmres in single mode does not give benefit from mixed precision storage +#if GINKGO_DPCPP_SINGLE_MODE + using value_type = float; +#else + using value_type = double; +#endif + using storage_type = float; + using index_type = int; + using size_type = gko::size_type; + using Range3dHelper = + gko::cb_gmres::Range3dHelper; + using Range3d = typename Range3dHelper::Range; + using Dense = gko::matrix::Dense; + using Mtx = Dense; + static constexpr unsigned int default_krylov_dim_mixed{100}; + + CbGmres() : rand_engine(30) {} + + void SetUp() + { + ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, ref); + } + + Range3dHelper generate_krylov_helper(gko::dim<3> size) + { + auto helper = Range3dHelper{ref, size}; + auto &bases = helper.get_bases(); + const auto num_rows = size[0] * size[1]; + const auto num_cols = size[2]; + auto temp_krylov_bases = gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, + ref); + std::copy_n(temp_krylov_bases->get_const_values(), + bases.get_num_elems(), bases.get_data()); + // Only useful when the Accessor actually has a scale + auto range = helper.get_range(); + auto dist = std::normal_distribution(-1, 1); + for (size_type k = 0; k < size[0]; ++k) { + for (size_type i = 0; i < size[2]; ++i) { + gko::cb_gmres::helper_functions_accessor::write_scalar( + range, k, i, dist(rand_engine)); + } + } + return helper; + } + + void initialize_data() + { +#ifdef GINKGO_FAST_TESTS + int m = 123; +#else + int m = 597; +#endif + int n = 43; + x = gen_mtx(m, n); + y = gen_mtx(default_krylov_dim_mixed, n); + before_preconditioner = Mtx::create_with_config_of(x.get()); + b = gen_mtx(m, n); + arnoldi_norm = gen_mtx(3, n); + gko::dim<3> krylov_bases_dim(default_krylov_dim_mixed + 1, m, n); + range_helper = generate_krylov_helper(krylov_bases_dim); + + next_krylov_basis = gen_mtx(m, n); + hessenberg = + gen_mtx(default_krylov_dim_mixed + 1, default_krylov_dim_mixed * n); + hessenberg_iter = gen_mtx(default_krylov_dim_mixed + 1, n); + buffer_iter = gen_mtx(default_krylov_dim_mixed + 1, n); + residual = gen_mtx(m, n); + residual_norm = gen_mtx(1, n); + residual_norm_collection = gen_mtx(default_krylov_dim_mixed + 1, n); + givens_sin = gen_mtx(default_krylov_dim_mixed, n); + givens_cos = gen_mtx(default_krylov_dim_mixed, n); + stop_status = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { + stop_status->get_data()[i].reset(); + } + reorth_status = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < reorth_status->get_num_elems(); ++i) { + reorth_status->get_data()[i].reset(); + } + final_iter_nums = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < final_iter_nums->get_num_elems(); ++i) { + final_iter_nums->get_data()[i] = 5; + } + num_reorth = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < num_reorth->get_num_elems(); ++i) { + num_reorth->get_data()[i] = 5; + } + + d_x = Mtx::create(dpcpp); + d_x->copy_from(x.get()); + d_before_preconditioner = Mtx::create_with_config_of(d_x.get()); + d_y = Mtx::create(dpcpp); + d_y->copy_from(y.get()); + d_b = Mtx::create(dpcpp); + d_b->copy_from(b.get()); + d_arnoldi_norm = Mtx::create(dpcpp); + d_arnoldi_norm->copy_from(arnoldi_norm.get()); + d_range_helper = Range3dHelper{dpcpp, {}}; + d_range_helper = range_helper; + d_next_krylov_basis = Mtx::create(dpcpp); + d_next_krylov_basis->copy_from(next_krylov_basis.get()); + d_hessenberg = Mtx::create(dpcpp); + d_hessenberg->copy_from(hessenberg.get()); + d_hessenberg_iter = Mtx::create(dpcpp); + d_hessenberg_iter->copy_from(hessenberg_iter.get()); + d_buffer_iter = Mtx::create(dpcpp); + d_buffer_iter->copy_from(buffer_iter.get()); + d_residual = Mtx::create(dpcpp); + d_residual->copy_from(residual.get()); + d_residual_norm = Mtx::create(dpcpp); + d_residual_norm->copy_from(residual_norm.get()); + d_residual_norm_collection = Mtx::create(dpcpp); + d_residual_norm_collection->copy_from(residual_norm_collection.get()); + d_givens_sin = Mtx::create(dpcpp); + d_givens_sin->copy_from(givens_sin.get()); + d_givens_cos = Mtx::create(dpcpp); + d_givens_cos->copy_from(givens_cos.get()); + d_stop_status = std::unique_ptr>( + new gko::Array(dpcpp, n)); + *d_stop_status = *stop_status; + d_reorth_status = std::unique_ptr>( + new gko::Array(dpcpp, n)); + *d_reorth_status = *reorth_status; + d_final_iter_nums = std::unique_ptr>( + new gko::Array(dpcpp, n)); + *d_final_iter_nums = *final_iter_nums; + d_num_reorth = std::unique_ptr>( + new gko::Array(dpcpp, n)); + *d_num_reorth = *num_reorth; + } + + void assert_krylov_bases_near() + { + gko::Array d_to_host{ref}; + auto &krylov_bases = range_helper.get_bases(); + d_to_host = d_range_helper.get_bases(); + const auto tolerance = r::value; + using sycl::abs; + for (gko::size_type i = 0; i < krylov_bases.get_num_elems(); ++i) { + const auto ref_value = krylov_bases.get_const_data()[i]; + const auto dev_value = d_to_host.get_const_data()[i]; + ASSERT_LE(fabsf(dev_value - ref_value), tolerance); + } + } + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + std::ranlux48 rand_engine; + + std::unique_ptr before_preconditioner; + std::unique_ptr x; + std::unique_ptr y; + std::unique_ptr b; + std::unique_ptr arnoldi_norm; + Range3dHelper range_helper; + std::unique_ptr next_krylov_basis; + std::unique_ptr hessenberg; + std::unique_ptr hessenberg_iter; + std::unique_ptr buffer_iter; + std::unique_ptr residual; + std::unique_ptr residual_norm; + std::unique_ptr residual_norm_collection; + std::unique_ptr givens_sin; + std::unique_ptr givens_cos; + std::unique_ptr> stop_status; + std::unique_ptr> reorth_status; + std::unique_ptr> final_iter_nums; + std::unique_ptr> num_reorth; + + std::unique_ptr d_x; + std::unique_ptr d_before_preconditioner; + std::unique_ptr d_y; + std::unique_ptr d_b; + std::unique_ptr d_arnoldi_norm; + Range3dHelper d_range_helper; + std::unique_ptr d_next_krylov_basis; + std::unique_ptr d_hessenberg; + std::unique_ptr d_hessenberg_iter; + std::unique_ptr d_buffer_iter; + std::unique_ptr d_residual; + std::unique_ptr d_residual_norm; + std::unique_ptr d_residual_norm_collection; + std::unique_ptr d_givens_sin; + std::unique_ptr d_givens_cos; + std::unique_ptr> d_stop_status; + std::unique_ptr> d_reorth_status; + std::unique_ptr> d_final_iter_nums; + std::unique_ptr> d_num_reorth; +}; + + +TEST_F(CbGmres, DpcppCbGmresInitialize1IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::cb_gmres::initialize_1( + ref, b.get(), residual.get(), givens_sin.get(), givens_cos.get(), + stop_status.get(), default_krylov_dim_mixed); + gko::kernels::dpcpp::cb_gmres::initialize_1( + dpcpp, d_b.get(), d_residual.get(), d_givens_sin.get(), + d_givens_cos.get(), d_stop_status.get(), default_krylov_dim_mixed); + + GKO_ASSERT_MTX_NEAR(d_residual, residual, r::value); + GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); + GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, r::value); + GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); +} + + +TEST_F(CbGmres, DpcppCbGmresInitialize2IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::cb_gmres::initialize_2( + ref, residual.get(), residual_norm.get(), + residual_norm_collection.get(), arnoldi_norm.get(), + range_helper.get_range(), next_krylov_basis.get(), + final_iter_nums.get(), default_krylov_dim_mixed); + gko::kernels::dpcpp::cb_gmres::initialize_2( + dpcpp, d_residual.get(), d_residual_norm.get(), + d_residual_norm_collection.get(), d_arnoldi_norm.get(), + d_range_helper.get_range(), d_next_krylov_basis.get(), + d_final_iter_nums.get(), default_krylov_dim_mixed); + + GKO_ASSERT_MTX_NEAR(d_arnoldi_norm, arnoldi_norm, r::value); + GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, r::value); + GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, + r::value); + assert_krylov_bases_near(); + GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); +} + + +TEST_F(CbGmres, DpcppCbGmresStep1IsEquivalentToRef) +{ + initialize_data(); + int iter = 5; + + gko::kernels::reference::cb_gmres::step_1( + ref, next_krylov_basis.get(), givens_sin.get(), givens_cos.get(), + residual_norm.get(), residual_norm_collection.get(), + range_helper.get_range(), hessenberg_iter.get(), buffer_iter.get(), + arnoldi_norm.get(), iter, final_iter_nums.get(), stop_status.get(), + reorth_status.get(), num_reorth.get()); + gko::kernels::dpcpp::cb_gmres::step_1( + dpcpp, d_next_krylov_basis.get(), d_givens_sin.get(), + d_givens_cos.get(), d_residual_norm.get(), + d_residual_norm_collection.get(), d_range_helper.get_range(), + d_hessenberg_iter.get(), d_buffer_iter.get(), d_arnoldi_norm.get(), + iter, d_final_iter_nums.get(), d_stop_status.get(), + d_reorth_status.get(), d_num_reorth.get()); + + GKO_ASSERT_MTX_NEAR(d_arnoldi_norm, arnoldi_norm, r::value); + GKO_ASSERT_MTX_NEAR(d_next_krylov_basis, next_krylov_basis, + r::value); + GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); + GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, r::value); + GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, r::value); + GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, + r::value); + GKO_ASSERT_MTX_NEAR(d_hessenberg_iter, hessenberg_iter, + r::value); + assert_krylov_bases_near(); + GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); +} + + +TEST_F(CbGmres, DpcppCbGmresStep2IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::cb_gmres::step_2( + ref, residual_norm_collection.get(), + range_helper.get_range().get_accessor().to_const(), hessenberg.get(), + y.get(), before_preconditioner.get(), final_iter_nums.get()); + gko::kernels::dpcpp::cb_gmres::step_2( + dpcpp, d_residual_norm_collection.get(), + d_range_helper.get_range().get_accessor().to_const(), + d_hessenberg.get(), d_y.get(), d_before_preconditioner.get(), + d_final_iter_nums.get()); + + GKO_ASSERT_MTX_NEAR(d_y, y, r::value); + GKO_ASSERT_MTX_NEAR(d_x, x, r::value); +} + + +} // namespace diff --git a/dpcpp/test/solver/gmres_kernels.cpp b/dpcpp/test/solver/gmres_kernels.cpp new file mode 100644 index 00000000000..72f7ba0ae06 --- /dev/null +++ b/dpcpp/test/solver/gmres_kernels.cpp @@ -0,0 +1,309 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/solver/gmres_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +class Gmres : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using value_type = float; +#else + using value_type = double; +#endif + using index_type = gko::int32; + using Mtx = gko::matrix::Dense; + using norm_type = gko::remove_complex; + using NormVector = gko::matrix::Dense; + template + using Dense = typename gko::matrix::Dense; + + Gmres() : rand_engine(30) {} + + void SetUp() + { + ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + template + std::unique_ptr> gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix>( + num_rows, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, ref); + } + + void initialize_data(int nrhs = 43) + { +#ifdef GINKGO_FAST_TESTS + int m = 123; +#else + int m = 597; +#endif + x = gen_mtx(m, nrhs); + y = gen_mtx(gko::solver::default_krylov_dim, nrhs); + before_preconditioner = Mtx::create_with_config_of(x.get()); + b = gen_mtx(m, nrhs); + krylov_bases = gen_mtx(m * (gko::solver::default_krylov_dim + 1), nrhs); + hessenberg = gen_mtx(gko::solver::default_krylov_dim + 1, + gko::solver::default_krylov_dim * nrhs); + hessenberg_iter = gen_mtx(gko::solver::default_krylov_dim + 1, nrhs); + residual = gen_mtx(m, nrhs); + residual_norm = gen_mtx(1, nrhs); + residual_norm_collection = + gen_mtx(gko::solver::default_krylov_dim + 1, nrhs); + givens_sin = gen_mtx(gko::solver::default_krylov_dim, nrhs); + givens_cos = gen_mtx(gko::solver::default_krylov_dim, nrhs); + stop_status = std::unique_ptr>( + new gko::Array(ref, nrhs)); + for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { + stop_status->get_data()[i].reset(); + } + final_iter_nums = std::unique_ptr>( + new gko::Array(ref, nrhs)); + for (size_t i = 0; i < final_iter_nums->get_num_elems(); ++i) { + final_iter_nums->get_data()[i] = 5; + } + + d_x = Mtx::create(dpcpp); + d_x->copy_from(x.get()); + d_before_preconditioner = Mtx::create_with_config_of(d_x.get()); + d_y = Mtx::create(dpcpp); + d_y->copy_from(y.get()); + d_b = Mtx::create(dpcpp); + d_b->copy_from(b.get()); + d_krylov_bases = Mtx::create(dpcpp); + d_krylov_bases->copy_from(krylov_bases.get()); + d_hessenberg = Mtx::create(dpcpp); + d_hessenberg->copy_from(hessenberg.get()); + d_hessenberg_iter = Mtx::create(dpcpp); + d_hessenberg_iter->copy_from(hessenberg_iter.get()); + d_residual = Mtx::create(dpcpp); + d_residual->copy_from(residual.get()); + d_residual_norm = NormVector::create(dpcpp); + d_residual_norm->copy_from(residual_norm.get()); + d_residual_norm_collection = Mtx::create(dpcpp); + d_residual_norm_collection->copy_from(residual_norm_collection.get()); + d_givens_sin = Mtx::create(dpcpp); + d_givens_sin->copy_from(givens_sin.get()); + d_givens_cos = Mtx::create(dpcpp); + d_givens_cos->copy_from(givens_cos.get()); + d_stop_status = std::unique_ptr>( + new gko::Array(dpcpp, nrhs)); + *d_stop_status = *stop_status; + d_final_iter_nums = std::unique_ptr>( + new gko::Array(dpcpp, nrhs)); + *d_final_iter_nums = *final_iter_nums; + } + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + std::ranlux48 rand_engine; + + std::unique_ptr before_preconditioner; + std::unique_ptr x; + std::unique_ptr y; + std::unique_ptr b; + std::unique_ptr krylov_bases; + std::unique_ptr hessenberg; + std::unique_ptr hessenberg_iter; + std::unique_ptr residual; + std::unique_ptr residual_norm; + std::unique_ptr residual_norm_collection; + std::unique_ptr givens_sin; + std::unique_ptr givens_cos; + std::unique_ptr> stop_status; + std::unique_ptr> final_iter_nums; + + std::unique_ptr d_x; + std::unique_ptr d_before_preconditioner; + std::unique_ptr d_y; + std::unique_ptr d_b; + std::unique_ptr d_krylov_bases; + std::unique_ptr d_hessenberg; + std::unique_ptr d_hessenberg_iter; + std::unique_ptr d_residual; + std::unique_ptr d_residual_norm; + std::unique_ptr d_residual_norm_collection; + std::unique_ptr d_givens_sin; + std::unique_ptr d_givens_cos; + std::unique_ptr> d_stop_status; + std::unique_ptr> d_final_iter_nums; +}; + + +TEST_F(Gmres, DpcppGmresInitialize1IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::gmres::initialize_1( + ref, b.get(), residual.get(), givens_sin.get(), givens_cos.get(), + stop_status.get(), gko::solver::default_krylov_dim); + gko::kernels::dpcpp::gmres::initialize_1( + dpcpp, d_b.get(), d_residual.get(), d_givens_sin.get(), + d_givens_cos.get(), d_stop_status.get(), + gko::solver::default_krylov_dim); + + GKO_ASSERT_MTX_NEAR(d_residual, residual, r::value); + GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); + GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, r::value); + GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); +} + + +TEST_F(Gmres, DpcppGmresInitialize2IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::gmres::initialize_2( + ref, residual.get(), residual_norm.get(), + residual_norm_collection.get(), krylov_bases.get(), + final_iter_nums.get(), gko::solver::default_krylov_dim); + gko::kernels::dpcpp::gmres::initialize_2( + dpcpp, d_residual.get(), d_residual_norm.get(), + d_residual_norm_collection.get(), d_krylov_bases.get(), + d_final_iter_nums.get(), gko::solver::default_krylov_dim); + + GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, r::value); + GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, + r::value); + GKO_ASSERT_MTX_NEAR(d_krylov_bases, krylov_bases, r::value); + GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); +} + + +TEST_F(Gmres, DpcppGmresStep1IsEquivalentToRef) +{ + initialize_data(); + int iter = 5; + + gko::kernels::reference::gmres::step_1( + ref, x->get_size()[0], givens_sin.get(), givens_cos.get(), + residual_norm.get(), residual_norm_collection.get(), krylov_bases.get(), + hessenberg_iter.get(), iter, final_iter_nums.get(), stop_status.get()); + gko::kernels::dpcpp::gmres::step_1( + dpcpp, d_x->get_size()[0], d_givens_sin.get(), d_givens_cos.get(), + d_residual_norm.get(), d_residual_norm_collection.get(), + d_krylov_bases.get(), d_hessenberg_iter.get(), iter, + d_final_iter_nums.get(), d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); + GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, r::value); + GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, r::value); + GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, + r::value); + GKO_ASSERT_MTX_NEAR(d_hessenberg_iter, hessenberg_iter, + 2 * r::value); + GKO_ASSERT_MTX_NEAR(d_krylov_bases, krylov_bases, r::value); + GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); +} + + +TEST_F(Gmres, DpcppGmresStep1OnSingleRHSIsEquivalentToRef) +{ + initialize_data(1); + int iter = 5; + + gko::kernels::reference::gmres::step_1( + ref, x->get_size()[0], givens_sin.get(), givens_cos.get(), + residual_norm.get(), residual_norm_collection.get(), krylov_bases.get(), + hessenberg_iter.get(), iter, final_iter_nums.get(), stop_status.get()); + gko::kernels::dpcpp::gmres::step_1( + dpcpp, d_x->get_size()[0], d_givens_sin.get(), d_givens_cos.get(), + d_residual_norm.get(), d_residual_norm_collection.get(), + d_krylov_bases.get(), d_hessenberg_iter.get(), iter, + d_final_iter_nums.get(), d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); + GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, r::value); + GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, r::value); + GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, + r::value); + GKO_ASSERT_MTX_NEAR(d_hessenberg_iter, hessenberg_iter, + 2 * r::value); + GKO_ASSERT_MTX_NEAR(d_krylov_bases, krylov_bases, r::value); + GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); +} + + +TEST_F(Gmres, DpcppGmresStep2IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::gmres::step_2(ref, residual_norm_collection.get(), + krylov_bases.get(), hessenberg.get(), + y.get(), before_preconditioner.get(), + final_iter_nums.get()); + gko::kernels::dpcpp::gmres::step_2(dpcpp, d_residual_norm_collection.get(), + d_krylov_bases.get(), d_hessenberg.get(), + d_y.get(), d_before_preconditioner.get(), + d_final_iter_nums.get()); + + GKO_ASSERT_MTX_NEAR(d_y, y, r::value); + GKO_ASSERT_MTX_NEAR(d_x, x, r::value); +} + + +} // namespace diff --git a/dpcpp/test/solver/idr_kernels.cpp b/dpcpp/test/solver/idr_kernels.cpp new file mode 100644 index 00000000000..9b655f558be --- /dev/null +++ b/dpcpp/test/solver/idr_kernels.cpp @@ -0,0 +1,389 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/solver/idr_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +// use another alias to avoid conflict name in the Idr +template +using rr = typename gko::test::reduction_factor; + +class Idr : public ::testing::Test { +protected: +#if GINKGO_DPCPP_SINGLE_MODE + using value_type = float; +#else + using value_type = double; +#endif + using Mtx = gko::matrix::Dense; + using Solver = gko::solver::Idr; + + Idr() : rand_engine(30) {} + + void SetUp() + { + ref = gko::ReferenceExecutor::create(); + dpcpp = gko::DpcppExecutor::create(0, ref); + + dpcpp_idr_factory = + Solver::build() + .with_deterministic(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(dpcpp)) + .on(dpcpp); + + ref_idr_factory = + Solver::build() + .with_deterministic(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(ref)) + .on(ref); + } + + void TearDown() + { + if (dpcpp != nullptr) { + ASSERT_NO_THROW(dpcpp->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution>(0.0, 1.0), + rand_engine, ref); + } + + void initialize_data(int size = 597, int input_nrhs = 17) + { + nrhs = input_nrhs; + int s = 4; + mtx = gen_mtx(size, size); + x = gen_mtx(size, nrhs); + b = gen_mtx(size, nrhs); + r = gen_mtx(size, nrhs); + m = gen_mtx(s, nrhs * s); + f = gen_mtx(s, nrhs); + g = gen_mtx(size, nrhs * s); + u = gen_mtx(size, nrhs * s); + c = gen_mtx(s, nrhs); + v = gen_mtx(size, nrhs); + p = gen_mtx(s, size); + alpha = gen_mtx(1, nrhs); + omega = gen_mtx(1, nrhs); + tht = gen_mtx(1, nrhs); + residual_norm = gen_mtx(1, nrhs); + stop_status = std::unique_ptr>( + new gko::Array(ref, nrhs)); + for (size_t i = 0; i < nrhs; ++i) { + stop_status->get_data()[i].reset(); + } + + d_mtx = Mtx::create(dpcpp); + d_x = Mtx::create(dpcpp); + d_b = Mtx::create(dpcpp); + d_r = Mtx::create(dpcpp); + d_m = Mtx::create(dpcpp); + d_f = Mtx::create(dpcpp); + d_g = Mtx::create(dpcpp); + d_u = Mtx::create(dpcpp); + d_c = Mtx::create(dpcpp); + d_v = Mtx::create(dpcpp); + d_p = Mtx::create(dpcpp); + d_alpha = Mtx::create(dpcpp); + d_omega = Mtx::create(dpcpp); + d_tht = Mtx::create(dpcpp); + d_residual_norm = Mtx::create(dpcpp); + d_stop_status = std::unique_ptr>( + new gko::Array(dpcpp)); + + d_mtx->copy_from(mtx.get()); + d_x->copy_from(x.get()); + d_b->copy_from(b.get()); + d_r->copy_from(r.get()); + d_m->copy_from(m.get()); + d_f->copy_from(f.get()); + d_g->copy_from(g.get()); + d_u->copy_from(u.get()); + d_c->copy_from(c.get()); + d_v->copy_from(v.get()); + d_p->copy_from(p.get()); + d_alpha->copy_from(alpha.get()); + d_omega->copy_from(omega.get()); + d_tht->copy_from(tht.get()); + d_residual_norm->copy_from(residual_norm.get()); + *d_stop_status = + *stop_status; // copy_from is not a public member function of Array + } + + std::shared_ptr ref; + std::shared_ptr dpcpp; + + std::ranlux48 rand_engine; + + std::shared_ptr mtx; + std::shared_ptr d_mtx; + std::unique_ptr dpcpp_idr_factory; + std::unique_ptr ref_idr_factory; + + gko::size_type nrhs; + + std::unique_ptr x; + std::unique_ptr b; + std::unique_ptr r; + std::unique_ptr m; + std::unique_ptr f; + std::unique_ptr g; + std::unique_ptr u; + std::unique_ptr c; + std::unique_ptr v; + std::unique_ptr p; + std::unique_ptr alpha; + std::unique_ptr omega; + std::unique_ptr tht; + std::unique_ptr residual_norm; + std::unique_ptr> stop_status; + + std::unique_ptr d_x; + std::unique_ptr d_b; + std::unique_ptr d_r; + std::unique_ptr d_m; + std::unique_ptr d_f; + std::unique_ptr d_g; + std::unique_ptr d_u; + std::unique_ptr d_c; + std::unique_ptr d_v; + std::unique_ptr d_p; + std::unique_ptr d_alpha; + std::unique_ptr d_omega; + std::unique_ptr d_tht; + std::unique_ptr d_residual_norm; + std::unique_ptr> d_stop_status; +}; + + +TEST_F(Idr, IdrInitializeIsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::idr::initialize(ref, nrhs, m.get(), p.get(), true, + stop_status.get()); + gko::kernels::dpcpp::idr::initialize(dpcpp, nrhs, d_m.get(), d_p.get(), + true, d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(m, d_m, rr::value); + GKO_ASSERT_MTX_NEAR(p, d_p, rr::value); +} + + +TEST_F(Idr, IdrStep1IsEquivalentToRef) +{ + initialize_data(); + + gko::size_type k = 2; + gko::kernels::reference::idr::step_1(ref, nrhs, k, m.get(), f.get(), + r.get(), g.get(), c.get(), v.get(), + stop_status.get()); + gko::kernels::dpcpp::idr::step_1(dpcpp, nrhs, k, d_m.get(), d_f.get(), + d_r.get(), d_g.get(), d_c.get(), d_v.get(), + d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(c, d_c, rr::value); + GKO_ASSERT_MTX_NEAR(v, d_v, rr::value); +} + + +TEST_F(Idr, IdrStep2IsEquivalentToRef) +{ + initialize_data(); + + gko::size_type k = 2; + gko::kernels::reference::idr::step_2(ref, nrhs, k, omega.get(), v.get(), + c.get(), u.get(), stop_status.get()); + gko::kernels::dpcpp::idr::step_2(dpcpp, nrhs, k, d_omega.get(), d_v.get(), + d_c.get(), d_u.get(), d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(u, d_u, rr::value); +} + + +TEST_F(Idr, IdrStep3IsEquivalentToRef) +{ + initialize_data(); + + gko::size_type k = 2; + gko::kernels::reference::idr::step_3( + ref, nrhs, k, p.get(), g.get(), v.get(), u.get(), m.get(), f.get(), + alpha.get(), r.get(), x.get(), stop_status.get()); + gko::kernels::dpcpp::idr::step_3( + dpcpp, nrhs, k, d_p.get(), d_g.get(), d_v.get(), d_u.get(), d_m.get(), + d_f.get(), d_alpha.get(), d_r.get(), d_x.get(), d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(g, d_g, 2 * rr::value); + GKO_ASSERT_MTX_NEAR(v, d_v, 2 * rr::value); + GKO_ASSERT_MTX_NEAR(u, d_u, 2 * rr::value); + GKO_ASSERT_MTX_NEAR(m, d_m, 2 * rr::value); + GKO_ASSERT_MTX_NEAR(f, d_f, 13 * rr::value); + GKO_ASSERT_MTX_NEAR(r, d_r, 2 * rr::value); + GKO_ASSERT_MTX_NEAR(x, d_x, 2 * rr::value); +} + + +TEST_F(Idr, IdrComputeOmegaIsEquivalentToRef) +{ + initialize_data(); + + value_type kappa = 0.7; + gko::kernels::reference::idr::compute_omega(ref, nrhs, kappa, tht.get(), + residual_norm.get(), + omega.get(), stop_status.get()); + gko::kernels::dpcpp::idr::compute_omega(dpcpp, nrhs, kappa, d_tht.get(), + d_residual_norm.get(), + d_omega.get(), d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(omega, d_omega, rr::value); +} + + +TEST_F(Idr, IdrIterationOneRHSIsEquivalentToRef) +{ + if (dpcpp->get_queue()->get_device().is_gpu()) { + GTEST_SKIP() << "skip the test because oneMKL GEMM on gpu may give NaN " + "(under investigation)"; + } + initialize_data(123, 1); + auto ref_solver = ref_idr_factory->generate(mtx); + auto dpcpp_solver = dpcpp_idr_factory->generate(d_mtx); + + ref_solver->apply(b.get(), x.get()); + dpcpp_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, rr::value * 10); + GKO_ASSERT_MTX_NEAR(d_x, x, rr::value * 10); +} + + +TEST_F(Idr, IdrIterationWithComplexSubspaceOneRHSIsEquivalentToRef) +{ + initialize_data(123, 1); + dpcpp_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(dpcpp)) + .on(dpcpp); + ref_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(ref)) + .on(ref); + auto ref_solver = ref_idr_factory->generate(mtx); + auto dpcpp_solver = dpcpp_idr_factory->generate(d_mtx); + + ref_solver->apply(b.get(), x.get()); + dpcpp_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, rr::value * 100); + GKO_ASSERT_MTX_NEAR(d_x, x, rr::value * 100); +} + + +TEST_F(Idr, IdrIterationMultipleRHSIsEquivalentToRef) +{ + initialize_data(123, 16); + auto dpcpp_solver = dpcpp_idr_factory->generate(d_mtx); + auto ref_solver = ref_idr_factory->generate(mtx); + + ref_solver->apply(b.get(), x.get()); + dpcpp_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, rr::value * 500); + GKO_ASSERT_MTX_NEAR(d_x, x, rr::value * 500); +} + + +TEST_F(Idr, IdrIterationWithComplexSubspaceMultipleRHSIsEquivalentToRef) +{ + initialize_data(123, 6); + dpcpp_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(dpcpp)) + .on(dpcpp); + ref_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(ref)) + .on(ref); + auto dpcpp_solver = dpcpp_idr_factory->generate(d_mtx); + auto ref_solver = ref_idr_factory->generate(mtx); + + ref_solver->apply(b.get(), x.get()); + dpcpp_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, rr::value * 10); + GKO_ASSERT_MTX_NEAR(d_x, x, rr::value * 10); +} + + +} // namespace diff --git a/dpcpp/test/stop/CMakeLists.txt b/dpcpp/test/stop/CMakeLists.txt new file mode 100644 index 00000000000..0ba0781e077 --- /dev/null +++ b/dpcpp/test/stop/CMakeLists.txt @@ -0,0 +1,2 @@ +ginkgo_create_test(criterion_kernels) +ginkgo_create_test(residual_norm_kernels) diff --git a/dpcpp/test/stop/criterion_kernels.cpp b/dpcpp/test/stop/criterion_kernels.cpp new file mode 100644 index 00000000000..cdb7f00cb46 --- /dev/null +++ b/dpcpp/test/stop/criterion_kernels.cpp @@ -0,0 +1,107 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +namespace { + + +constexpr gko::size_type test_iterations = 10; + + +class Criterion : public ::testing::Test { +protected: + Criterion() + { + ref_ = gko::ReferenceExecutor::create(); + dpcpp_ = gko::DpcppExecutor::create(0, ref_); + // Actually use an iteration stopping criterion because Criterion is an + // abstract class + factory_ = gko::stop::Iteration::build() + .with_max_iters(test_iterations) + .on(dpcpp_); + } + + std::unique_ptr factory_; + std::shared_ptr ref_; + std::shared_ptr dpcpp_; +}; + + +TEST_F(Criterion, SetsOneStopStatus) +{ + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + auto criterion = factory_->generate(nullptr, nullptr, nullptr); + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + + stop_status.set_executor(dpcpp_); + criterion->update() + .num_iterations(test_iterations) + .check(RelativeStoppingId, true, &stop_status, &one_changed); + stop_status.set_executor(ref_); + + ASSERT_EQ(stop_status.get_data()[0].has_stopped(), true); +} + + +TEST_F(Criterion, SetsMultipleStopStatuses) +{ + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + auto criterion = factory_->generate(nullptr, nullptr, nullptr); + gko::Array stop_status(ref_, 3); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.get_data()[2].reset(); + + stop_status.set_executor(dpcpp_); + criterion->update() + .num_iterations(test_iterations) + .check(RelativeStoppingId, true, &stop_status, &one_changed); + stop_status.set_executor(ref_); + + ASSERT_EQ(stop_status.get_data()[0].has_stopped(), true); + ASSERT_EQ(stop_status.get_data()[1].has_stopped(), true); + ASSERT_EQ(stop_status.get_data()[2].has_stopped(), true); +} + + +} // namespace diff --git a/dpcpp/test/stop/residual_norm_kernels.cpp b/dpcpp/test/stop/residual_norm_kernels.cpp new file mode 100644 index 00000000000..d67dfc086c5 --- /dev/null +++ b/dpcpp/test/stop/residual_norm_kernels.cpp @@ -0,0 +1,816 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +#ifdef GINKGO_DPCPP_SINGLE_MODE +using value_type = float; +constexpr auto tol = r::value; +#else +using value_type = double; +constexpr value_type tol = r::value; +#endif + + +class ResidualNorm : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense; + using NormVector = gko::remove_complex; + + ResidualNorm() + { + ref_ = gko::ReferenceExecutor::create(); + dpcpp_ = gko::DpcppExecutor::create(0, ref_); + factory_ = gko::stop::ResidualNorm::build() + .with_reduction_factor(tol) + .on(dpcpp_); + rel_factory_ = gko::stop::ResidualNorm::build() + .with_reduction_factor(tol) + .with_baseline(gko::stop::mode::initial_resnorm) + .on(dpcpp_); + abs_factory_ = gko::stop::ResidualNorm::build() + .with_reduction_factor(tol) + .with_baseline(gko::stop::mode::absolute) + .on(dpcpp_); + } + + std::unique_ptr::Factory> factory_; + std::unique_ptr::Factory> rel_factory_; + std::unique_ptr::Factory> abs_factory_; + std::shared_ptr dpcpp_; + std::shared_ptr ref_; +}; + + +TEST_F(ResidualNorm, WaitsTillResidualGoalForRhsResNorm) +{ + auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + auto rhs_norm = gko::initialize({0.0}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1 * rhs_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9 * rhs_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalMultipleRHSForRhsResNorm) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + auto rhs_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9 * rhs_norm->at(0, 0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9 * rhs_norm->at(0, 1); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalForRelResNorm) +{ + auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = + rel_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1 * res_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9 * res_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalMultipleRHSForRelResNorm) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = + rel_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9 * res_norm->at(0, 0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9 * res_norm->at(0, 1); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalForAbsResNorm) +{ + auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = + abs_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1; + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalMultipleRHSForAbsResNorm) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = + abs_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +class ResidualNormReduction : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense; + using NormVector = gko::matrix::Dense>; + + ResidualNormReduction() + { + ref_ = gko::ReferenceExecutor::create(); + dpcpp_ = gko::DpcppExecutor::create(0, ref_); + factory_ = gko::stop::ResidualNormReduction::build() + .with_reduction_factor(tol) + .on(dpcpp_); + } + + std::unique_ptr::Factory> + factory_; + std::shared_ptr dpcpp_; + std::shared_ptr ref_; +}; + + +TEST_F(ResidualNormReduction, WaitsTillResidualGoal) +{ + auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1 * res_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9 * res_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNormReduction, WaitsTillResidualGoalMultipleRHS) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9 * res_norm->at(0, 0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9 * res_norm->at(0, 1); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +class RelativeResidualNorm : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense; + using NormVector = gko::matrix::Dense>; + + RelativeResidualNorm() + { + ref_ = gko::ReferenceExecutor::create(); + dpcpp_ = gko::DpcppExecutor::create(0, ref_); + factory_ = gko::stop::RelativeResidualNorm::build() + .with_tolerance(tol) + .on(dpcpp_); + } + + std::unique_ptr::Factory> + factory_; + std::shared_ptr dpcpp_; + std::shared_ptr ref_; +}; + + +TEST_F(RelativeResidualNorm, WaitsTillResidualGoal) +{ + auto res = gko::initialize({100.0}, ref_); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + auto rhs_norm = gko::initialize({0.0}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1 * rhs_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9 * rhs_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(RelativeResidualNorm, WaitsTillResidualGoalMultipleRHS) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + auto rhs_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9 * rhs_norm->at(0, 0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9 * rhs_norm->at(0, 1); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +class ImplicitResidualNorm : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense; + using NormVector = gko::matrix::Dense>; + + ImplicitResidualNorm() + { + ref_ = gko::ReferenceExecutor::create(); + dpcpp_ = gko::DpcppExecutor::create(0, ref_); + factory_ = gko::stop::ImplicitResidualNorm::build() + .with_reduction_factor(tol) + .on(dpcpp_); + } + + std::unique_ptr::Factory> + factory_; + std::shared_ptr dpcpp_; + std::shared_ptr ref_; +}; + + +TEST_F(ImplicitResidualNorm, WaitsTillResidualGoal) +{ + auto res = gko::initialize({100.0}, ref_); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + auto rhs_norm = gko::initialize({0.0}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = std::pow(tol * 1.1 * rhs_norm->at(0), 2); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_FALSE(one_changed); + + res->at(0) = std::pow(tol * 0.9 * rhs_norm->at(0), 2); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ImplicitResidualNorm, WaitsTillResidualGoalMultipleRHS) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + auto rhs_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = std::pow(tol * 0.9 * rhs_norm->at(0, 0), 2); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = std::pow(tol * 0.9 * rhs_norm->at(0, 1), 2); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +class AbsoluteResidualNorm : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense; + using NormVector = gko::matrix::Dense>; + + AbsoluteResidualNorm() + { + ref_ = gko::ReferenceExecutor::create(); + dpcpp_ = gko::DpcppExecutor::create(0, ref_); + factory_ = gko::stop::AbsoluteResidualNorm::build() + .with_tolerance(tol) + .on(dpcpp_); + } + + std::unique_ptr::Factory> + factory_; + std::shared_ptr dpcpp_; + std::shared_ptr ref_; +}; + + +TEST_F(AbsoluteResidualNorm, WaitsTillResidualGoal) +{ + auto res = gko::initialize({100.0}, ref_); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1; + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(AbsoluteResidualNorm, WaitsTillResidualGoalMultipleRHS) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto d_res = Mtx::create(dpcpp_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + std::shared_ptr d_rhs = Mtx::create(dpcpp_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(dpcpp_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(dpcpp_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +} // namespace diff --git a/dpcpp/test/utils.hpp b/dpcpp/test/utils.hpp new file mode 100644 index 00000000000..57e703b8ef1 --- /dev/null +++ b/dpcpp/test/utils.hpp @@ -0,0 +1,56 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_TEST_UTILS_HPP_ +#define GKO_DPCPP_TEST_UTILS_HPP_ + + +#include + + +namespace { + + +#if GINKGO_DPCPP_SINGLE_MODE +#define SKIP_IF_SINGLE_MODE GTEST_SKIP() << "Skip due to single mode" +#else +#define SKIP_IF_SINGLE_MODE \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") +#endif + + +} // namespace + + +#endif // GKO_DPCPP_TEST_UTILS_HPP_ diff --git a/common/solver/ir_kernels.hpp.inc b/dpcpp/test_dpcpp.dp.cpp similarity index 84% rename from common/solver/ir_kernels.hpp.inc rename to dpcpp/test_dpcpp.dp.cpp index 24a66f2795b..33771901780 100644 --- a/common/solver/ir_kernels.hpp.inc +++ b/dpcpp/test_dpcpp.dp.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,12 +30,13 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -__global__ __launch_bounds__(default_block_size) void initialize_kernel( - size_type num_cols, stopping_status *stop_status) -{ - const auto tidx = thread::get_thread_id_flat(); +#include + +namespace sycl = cl::sycl; - if (tidx < num_cols) { - stop_status[tidx].reset(); - } -} \ No newline at end of file +int main() +{ + // Use the queue property `in_order` which is DPC++ only + sycl::queue myQueue{sycl::property::queue::in_order{}}; + return 0; +} diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index ea8a02e9072..57ee5e6e4b7 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,25 +1,74 @@ option(GINKGO_BUILD_EXTLIB_EXAMPLE "Build the external-lib-interfacing with deal.II, you need to link the deal.II library." OFF) -add_subdirectory(custom-logger) -add_subdirectory(custom-matrix-format) -add_subdirectory(custom-stopping-criterion) +option(GINKGO_RUN_EXAMPLES " Compile run and validation targets for the examples." ON) + +set(EXAMPLES_EXEC_LIST + adaptiveprecision-blockjacobi + cb-gmres + custom-logger + ginkgo-ranges + ilu-preconditioned-solver + ir-ilu-preconditioned-solver + inverse-iteration + iterative-refinement + mixed-precision-ir + nine-pt-stencil-solver + poisson-solver + preconditioned-solver + simple-solver + three-pt-stencil-solver) + +set(EXAMPLES_LIST + ${EXAMPLES_EXEC_LIST} + custom-stopping-criterion + ginkgo-overhead + minimal-cuda-solver + mixed-spmv + par-ilu-convergence + performance-debugging + preconditioner-export + simple-solver-logging) + +if(GINKGO_BUILD_CUDA AND GINKGO_BUILD_OMP) + list(APPEND EXAMPLES_LIST custom-matrix-format) +endif() + if(GINKGO_BUILD_EXTLIB_EXAMPLE) - add_subdirectory(external-lib-interfacing) + list(APPEND EXAMPLES_LIST external-lib-interfacing) +endif() + +find_package(OpenCV QUIET) +if(OpenCV_FOUND) + list(APPEND EXAMPLES_LIST heat-equation) +else() + message(STATUS "No OpenCV found, disabling heat-equation example") +endif() + +if(GINKGO_HAVE_PAPI_SDE) + list(APPEND EXAMPLES_LIST papi-logging) +endif() + +foreach(example ${EXAMPLES_LIST}) + add_subdirectory(${example}) +endforeach() + +if(GINKGO_RUN_EXAMPLES) + foreach(example ${EXAMPLES_LIST}) + set(example_path "${CMAKE_CURRENT_BINARY_DIR}/${example}") + file(WRITE ${example_path}/target-wrapper.sh "${example_path}/${example} \$\{EX_ARG\}") + add_custom_target("run-${example}" + COMMAND chmod +x ${example_path}/target-wrapper.sh && ${example_path}/target-wrapper.sh > ${example_path}/${example}.out + WORKING_DIRECTORY ${example_path}) + file(WRITE ${example_path}/diff-command "#!/bin/bash +diff <(sed -n '8,$p' ${example_path}/${example}.out | sed -E 's/([^a-z,\":\\s\\)\\(\\{\\}_]+)//g') <(sed -n '6,$p' ${CMAKE_SOURCE_DIR}/examples/${example}/doc/results.dox | head -n -4 | sed -E 's/([^a-z,\":\\s\\)\\(\\{\\}_]+)//g')") + add_custom_target("validate-${example}" + COMMAND chmod +x ${example_path}/diff-command && ${example_path}/diff-command + WORKING_DIRECTORY ${example_path}) + endforeach() + + add_custom_target(run_all_examples) + add_custom_target(validate_all_examples) + foreach(run_ex ${EXAMPLES_EXEC_LIST}) + add_dependencies(run_all_examples "run-${run_ex}") + add_dependencies(validate_all_examples "validate-${run_ex}") + endforeach() endif() -add_subdirectory(adaptiveprecision-blockjacobi) -add_subdirectory(ginkgo-overhead) -add_subdirectory(ginkgo-ranges) -add_subdirectory(ilu-preconditioned-solver) -add_subdirectory(ir-ilu-preconditioned-solver) -add_subdirectory(inverse-iteration) -add_subdirectory(iterative-refinement) -add_subdirectory(minimal-cuda-solver) -add_subdirectory(mixed-precision-ir) -add_subdirectory(nine-pt-stencil-solver) -add_subdirectory(papi-logging) -add_subdirectory(performance-debugging) -add_subdirectory(poisson-solver) -add_subdirectory(preconditioned-solver) -add_subdirectory(preconditioner-export) -add_subdirectory(simple-solver) -add_subdirectory(simple-solver-logging) -add_subdirectory(three-pt-stencil-solver) diff --git a/examples/adaptiveprecision-blockjacobi/CMakeLists.txt b/examples/adaptiveprecision-blockjacobi/CMakeLists.txt index d3188aaca12..c3cb3183d22 100644 --- a/examples/adaptiveprecision-blockjacobi/CMakeLists.txt +++ b/examples/adaptiveprecision-blockjacobi/CMakeLists.txt @@ -1,4 +1,12 @@ +cmake_minimum_required(VERSION 3.9) +project(adaptiveprecision-blockjacobi) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(adaptiveprecision-blockjacobi adaptiveprecision-blockjacobi.cpp) -target_link_libraries(adaptiveprecision-blockjacobi ginkgo) -target_include_directories(adaptiveprecision-blockjacobi PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(adaptiveprecision-blockjacobi Ginkgo::ginkgo) + configure_file(data/A.mtx data/A.mtx COPYONLY) diff --git a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp index e64107915f8..b3e672e6c1f 100644 --- a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp +++ b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -55,23 +56,36 @@ int main(int argc, char *argv[]) // Print version information std::cout << gko::version_info::get() << std::endl; - // Figure out where to run the code - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { + if (argc == 2 && (std::string(argv[1]) == "--help")) { std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + // Figure out where to run the code + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + // Read data auto A = share(gko::read(std::ifstream("data/A.mtx"), exec)); // Create RHS and initial guess as 1 @@ -97,7 +111,7 @@ int main(int argc, char *argv[]) const RealValueType reduction_factor = 1e-7; auto iter_stop = gko::stop::Iteration::build().with_max_iters(10000u).on(exec); - auto tol_stop = gko::stop::ResidualNormReduction::build() + auto tol_stop = gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec); @@ -119,6 +133,7 @@ int main(int argc, char *argv[]) .on(exec)) .on(exec); // Create solver + solver_gen->add_logger(logger); auto solver = solver_gen->generate(A); @@ -134,11 +149,14 @@ int main(int argc, char *argv[]) auto res = gko::initialize({0.0}, exec); A->apply(lend(one), lend(x), lend(neg_one), lend(b)); b->compute_norm2(lend(res)); + auto impl_res = gko::as(logger->get_implicit_sq_resnorm()); - std::cout << "Initial residual norm sqrt(r^T r): \n"; + std::cout << "Initial residual norm sqrt(r^T r):\n"; write(std::cout, lend(initres)); - std::cout << "Final residual norm sqrt(r^T r): \n"; + std::cout << "Final residual norm sqrt(r^T r):\n"; write(std::cout, lend(res)); + std::cout << "Implicit residual norm squared (r^2):\n"; + write(std::cout, lend(impl_res)); // Print solver statistics std::cout << "CG iteration count: " << logger->get_num_iterations() diff --git a/examples/adaptiveprecision-blockjacobi/doc/kind b/examples/adaptiveprecision-blockjacobi/doc/kind index 53a96d5771f..082f7497da3 100644 --- a/examples/adaptiveprecision-blockjacobi/doc/kind +++ b/examples/adaptiveprecision-blockjacobi/doc/kind @@ -1 +1 @@ -preconditioners +mixed-precision diff --git a/examples/adaptiveprecision-blockjacobi/doc/results.dox b/examples/adaptiveprecision-blockjacobi/doc/results.dox index 22a3a04c827..b15a46994ca 100644 --- a/examples/adaptiveprecision-blockjacobi/doc/results.dox +++ b/examples/adaptiveprecision-blockjacobi/doc/results.dox @@ -11,8 +11,12 @@ Final residual norm sqrt(r^T r): %%MatrixMarket matrix array real general 1 1 5.69384e-06 +Implicit residual norm squared (r^2): +%%MatrixMarket matrix array real general +1 1 +1.27043e-15 CG iteration count: 5 -CG execution time [ms]: 2.04779 +CG execution time [ms]: 0.080041 @endcode diff --git a/examples/build-setup.sh b/examples/build-setup.sh index 8a2305192cb..d5698c8f7e7 100644 --- a/examples/build-setup.sh +++ b/examples/build-setup.sh @@ -1,28 +1,25 @@ #!/bin/bash # copy libraries -LIBRARY_DIRS="core core/device_hooks reference omp cuda hip" -LIBRARY_NAMES="ginkgo ginkgo_reference ginkgo_omp ginkgo_cuda ginkgo_hip" +LIBRARY_NAMES="ginkgo ginkgo_reference ginkgo_omp ginkgo_cuda ginkgo_hip ginkgo_dpcpp ginkgo_device" SUFFIXES=".so .dylib .dll d.so d.dylib d.dll" -VERSION="1.3.0" -for prefix in ${LIBRARY_DIRS}; do - for name in ${LIBRARY_NAMES}; do - for suffix in ${SUFFIXES}; do - cp ${BUILD_DIR}/${prefix}/lib${name}${suffix}.${VERSION} \ - ${THIS_DIR} 2>/dev/null - if [ -e "${THIS_DIR}/lib${name}${suffix}.${VERSION}" ] - then - ln -s ${THIS_DIR}/lib${name}${suffix}.${VERSION} ${THIS_DIR}/lib${name}${suffix} 2>/dev/null - fi - done +VERSION="1.4.0" +for name in ${LIBRARY_NAMES}; do + for suffix in ${SUFFIXES}; do + cp ${BUILD_DIR}/lib/lib${name}${suffix}.${VERSION} \ + ${THIS_DIR} 2>/dev/null + if [ -e "${THIS_DIR}/lib${name}${suffix}.${VERSION}" ] + then + ln -s ${THIS_DIR}/lib${name}${suffix}.${VERSION} ${THIS_DIR}/lib${name}${suffix} 2>/dev/null + fi done done # figure out correct compiler flags if ls ${THIS_DIR} | grep -F "libginkgo." >/dev/null; then - LINK_FLAGS="-lginkgo -lginkgo_omp -lginkgo_cuda -lginkgo_reference -lginkgo_hip -Wl,-rpath,${THIS_DIR}" + LINK_FLAGS="-lginkgo -lginkgo_omp -lginkgo_cuda -lginkgo_reference -lginkgo_hip -lginkgo_dpcpp -lginkgo_device -Wl,-rpath,${THIS_DIR}" else - LINK_FLAGS="-lginkgod -lginkgo_ompd -lginkgo_cudad -lginkgo_referenced -lginkgo_hipd -Wl,-rpath,${THIS_DIR}" + LINK_FLAGS="-lginkgod -lginkgo_ompd -lginkgo_cudad -lginkgo_referenced -lginkgo_hipd -lginkgo_dpcppd -lginkgo_deviced -Wl,-rpath,${THIS_DIR}" fi if [ -z "${CXX}" ]; then CXX="c++" diff --git a/examples/cb-gmres/CMakeLists.txt b/examples/cb-gmres/CMakeLists.txt new file mode 100644 index 00000000000..dc9b4befdf5 --- /dev/null +++ b/examples/cb-gmres/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.9) +project(cb-gmres) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + +add_executable(cb-gmres cb-gmres.cpp) +target_link_libraries(cb-gmres Ginkgo::ginkgo) + +# Copy the data files to the execution directory +configure_file("${Ginkgo_SOURCE_DIR}/matrices/test/ani1.mtx" data/A.mtx COPYONLY) diff --git a/examples/cb-gmres/build.sh b/examples/cb-gmres/build.sh new file mode 100755 index 00000000000..da7e54829b6 --- /dev/null +++ b/examples/cb-gmres/build.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# set up script +if [ $# -ne 1 ]; then + echo -e "Usage: $0 GINKGO_BUILD_DIRECTORY" + exit 1 +fi +BUILD_DIR=$1 +THIS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" &>/dev/null && pwd ) + +source ${THIS_DIR}/../build-setup.sh + +mkdir -p data +cp ${THIS_DIR}/../../matrices/test/ani1.mtx data/A.mtx + +# build +${CXX} -std=c++14 -o ${THIS_DIR}/cb-gmres ${THIS_DIR}/cb-gmres.cpp \ + -I${THIS_DIR}/../../include -I${BUILD_DIR}/include \ + -L${THIS_DIR} ${LINK_FLAGS} diff --git a/examples/cb-gmres/cb-gmres.cpp b/examples/cb-gmres/cb-gmres.cpp new file mode 100644 index 00000000000..ffb52579132 --- /dev/null +++ b/examples/cb-gmres/cb-gmres.cpp @@ -0,0 +1,220 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +// This is the main ginkgo header file. +#include + +#include +#include +#include +#include +#include +#include + + +// Helper function which measures the time of `solver->apply(b, x)` in seconds +// To get an accurate result, the solve is repeated multiple times (while +// ensuring the initial guess is always the same). The result of the solve will +// be written to x. +double measure_solve_time_in_s(const gko::Executor *exec, gko::LinOp *solver, + const gko::LinOp *b, gko::LinOp *x) +{ + constexpr int repeats{5}; + double duration{0}; + // Make a copy of x, so we can re-use the same initial guess multiple times + auto x_copy = clone(x); + for (int i = 0; i < repeats; ++i) { + // No need to copy it in the first iteration + if (i != 0) { + x_copy->copy_from(x); + } + // Make sure all previous executor operations have finished before + // starting the time + exec->synchronize(); + auto tic = std::chrono::steady_clock::now(); + solver->apply(b, lend(x_copy)); + // Make sure all computations are done before stopping the time + exec->synchronize(); + auto tac = std::chrono::steady_clock::now(); + duration += std::chrono::duration(tac - tic).count(); + } + // Copy the solution back to x, so the caller has the result + x->copy_from(lend(x_copy)); + return duration / static_cast(repeats); +} + + +int main(int argc, char *argv[]) +{ + // Use some shortcuts. In Ginkgo, vectors are seen as a gko::matrix::Dense + // with one column/one row. The advantage of this concept is that using + // multiple vectors is a now a natural extension of adding columns/rows are + // necessary. + using ValueType = double; + using RealValueType = gko::remove_complex; + using IndexType = int; + using vec = gko::matrix::Dense; + using real_vec = gko::matrix::Dense; + // The gko::matrix::Csr class is used here, but any other matrix class such + // as gko::matrix::Coo, gko::matrix::Hybrid, gko::matrix::Ell or + // gko::matrix::Sellp could also be used. + using mtx = gko::matrix::Csr; + // The gko::solver::CbGmres is used here, but any other solver class can + // also be used. + using cb_gmres = gko::solver::CbGmres; + + // Print the ginkgo version information. + std::cout << gko::version_info::get() << std::endl; + + if (argc == 2 && (std::string(argv[1]) == "--help")) { + std::cerr << "Usage: " << argv[0] << " [executor] " << std::endl; + std::exit(-1); + } + + // Map which generates the appropriate executor + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + + // Note: this matrix is copied from "SOURCE_DIR/matrices" instead of from + // the local directory. For details, see + // "examples/cb-gmres/CMakeLists.txt" + auto A = share(gko::read(std::ifstream("data/A.mtx"), exec)); + // Create a uniform right-hand side with a norm2 of 1 on the host + // (norm2(b) == 1), followed by copying it to the actual executor + // (to make sure it also works for GPUs) + const auto A_size = A->get_size(); + auto b_host = vec::create(exec->get_master(), gko::dim<2>{A_size[0], 1}); + for (gko::size_type i = 0; i < A_size[0]; ++i) { + b_host->at(i, 0) = + ValueType{1} / std::sqrt(static_cast(A_size[0])); + } + auto b_norm = gko::initialize({0.0}, exec); + b_host->compute_norm2(lend(b_norm)); + auto b = clone(exec, lend(b_host)); + + // As an initial guess, use the right-hand side + auto x_keep = clone(lend(b)); + auto x_reduce = clone(x_keep); + + const RealValueType reduction_factor{1e-6}; + + // Generate two solver factories: `_keep` uses the same precision for the + // krylov basis as the matrix, and `_reduce` uses one precision below it. + // If `ValueType` is double, then `_reduce` uses float as the krylov basis + // storage type + auto solver_gen_keep = + cb_gmres::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1000u).on(exec), + gko::stop::RelativeResidualNorm::build() + .with_tolerance(reduction_factor) + .on(exec)) + .with_krylov_dim(100u) + .with_storage_precision( + gko::solver::cb_gmres::storage_precision::keep) + .on(exec); + + auto solver_gen_reduce = + cb_gmres::build() + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1000u).on(exec), + gko::stop::RelativeResidualNorm::build() + .with_tolerance(reduction_factor) + .on(exec)) + .with_krylov_dim(100u) + .with_storage_precision( + gko::solver::cb_gmres::storage_precision::reduce1) + .on(exec); + // Generate the actual solver from the factory and the matrix. + auto solver_keep = solver_gen_keep->generate(A); + auto solver_reduce = solver_gen_reduce->generate(A); + + // Solve both system and measure the time for each. + auto time_keep = measure_solve_time_in_s(lend(exec), lend(solver_keep), + lend(b), lend(x_keep)); + auto time_reduce = measure_solve_time_in_s(lend(exec), lend(solver_reduce), + lend(b), lend(x_reduce)); + + // Make sure the output is in scientific notation for easier comparison + std::cout << std::scientific; + // Note: The time might not be significantly different since the matrix is + // quite small + std::cout << "Solve time without compression: " << time_keep << " s\n" + << "Solve time with compression: " << time_reduce << " s\n"; + + // To measure if your solution has actually converged, the error of the + // solution is measured. + // one, neg_one are objects that represent the numbers which allow for a + // uniform interface when computing on any device. To compute the residual, + // the (advanced) apply method is used. + auto one = gko::initialize({1.0}, exec); + auto neg_one = gko::initialize({-1.0}, exec); + + auto res_norm_keep = gko::initialize({0.0}, exec); + auto res_norm_reduce = gko::initialize({0.0}, exec); + auto tmp = gko::clone(gko::lend(b)); + + // tmp = Ax - tmp + A->apply(lend(one), lend(x_keep), lend(neg_one), lend(tmp)); + tmp->compute_norm2(lend(res_norm_keep)); + + std::cout << "\nResidual norm without compression:\n"; + write(std::cout, lend(res_norm_keep)); + + tmp->copy_from(lend(b)); + A->apply(lend(one), lend(x_reduce), lend(neg_one), lend(tmp)); + tmp->compute_norm2(lend(res_norm_reduce)); + + std::cout << "\nResidual norm with compression:\n"; + write(std::cout, lend(res_norm_reduce)); +} diff --git a/examples/cb-gmres/doc/builds-on b/examples/cb-gmres/doc/builds-on new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/examples/cb-gmres/doc/builds-on @@ -0,0 +1 @@ + diff --git a/examples/cb-gmres/doc/intro.dox b/examples/cb-gmres/doc/intro.dox new file mode 100644 index 00000000000..f6fe753fb5f --- /dev/null +++ b/examples/cb-gmres/doc/intro.dox @@ -0,0 +1,15 @@ + +

Introduction

+ +

About the example

+This example showcases the usage of the Ginkgo solver CB-GMRES (Compressed +Basis GMRES). A small system is solved with two un-preconditioned CB-GMRES +solvers: + 1. without compressing the krylov basis; it uses double precision for + both the matrix and the krylov basis, and + 2. with a compression of the krylov basis; it uses double precision for the + matrix and all arithmetic operations, while using single precision for the + storage of the krylov basis + +Both solves are timed and the residual norm of each solution is computed to +show that both solutions are correct. diff --git a/examples/cb-gmres/doc/kind b/examples/cb-gmres/doc/kind new file mode 100644 index 00000000000..c1d9154931a --- /dev/null +++ b/examples/cb-gmres/doc/kind @@ -0,0 +1 @@ +techniques diff --git a/examples/cb-gmres/doc/results.dox b/examples/cb-gmres/doc/results.dox new file mode 100644 index 00000000000..268d893c979 --- /dev/null +++ b/examples/cb-gmres/doc/results.dox @@ -0,0 +1,21 @@ +

Results

+The following is the expected result: + +@code{.cpp} + +Solve time without compression: 1.842690e-04 s +Solve time with compression: 1.589936e-04 s + +Residual norm without compression: +%%MatrixMarket matrix array real general +1 1 +2.430544e-07 + +Residual norm with compression: +%%MatrixMarket matrix array real general +1 1 +3.437257e-07 + +@endcode + +

Comments about programming and debugging

diff --git a/examples/cb-gmres/doc/short-intro b/examples/cb-gmres/doc/short-intro new file mode 100644 index 00000000000..1970978c02c --- /dev/null +++ b/examples/cb-gmres/doc/short-intro @@ -0,0 +1 @@ +The CB-GMRES solver example. diff --git a/examples/cb-gmres/doc/tooltip b/examples/cb-gmres/doc/tooltip new file mode 100644 index 00000000000..75201cbf08d --- /dev/null +++ b/examples/cb-gmres/doc/tooltip @@ -0,0 +1 @@ +Solve a linear system with CB-GMRES, both with and without compression. Benchmark the solve time and validate the result. diff --git a/examples/custom-logger/CMakeLists.txt b/examples/custom-logger/CMakeLists.txt index 5f15c06b0da..1bc89d54e63 100644 --- a/examples/custom-logger/CMakeLists.txt +++ b/examples/custom-logger/CMakeLists.txt @@ -1,6 +1,15 @@ +cmake_minimum_required(VERSION 3.9) +project(custom-logger) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(custom-logger custom-logger.cpp) -target_link_libraries(custom-logger ginkgo) -target_include_directories(custom-logger PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(custom-logger Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) configure_file(data/b.mtx data/b.mtx COPYONLY) configure_file(data/x0.mtx data/x0.mtx COPYONLY) diff --git a/examples/custom-logger/custom-logger.cpp b/examples/custom-logger/custom-logger.cpp index e2f73ed4825..39fcb440eb1 100644 --- a/examples/custom-logger/custom-logger.cpp +++ b/examples/custom-logger/custom-logger.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Add the fstream header to read from data from files. #include +// Add the map header for storing the executor map. +#include // Add the C++ iomanip header to prettify the output. #include // Add formatting flag modification capabilities. @@ -88,21 +90,24 @@ struct ResidualLogger : gko::log::Logger { void write() const { // Print a header for the table - std::cout << "Recurrent vs true residual norm:" << std::endl; + std::cout << "Recurrent vs true vs implicit residual norm:" + << std::endl; std::cout << '|' << std::setw(10) << "Iteration" << '|' << std::setw(25) << "Recurrent Residual Norm" << '|' << std::setw(25) - << "True Residual Norm" << '|' << std::endl; + << "True Residual Norm" << '|' << std::setw(25) + << "Implicit Residual Norm" << '|' << std::endl; // Print a separation line. Note that for creating `10` characters // `std::setw()` should be set to `11`. std::cout << '|' << std::setfill('-') << std::setw(11) << '|' << std::setw(26) << '|' << std::setw(26) << '|' - << std::setfill(' ') << std::endl; + << std::setw(26) << '|' << std::setfill(' ') << std::endl; // Print the data one by one in the form std::cout << std::scientific; for (std::size_t i = 0; i < iterations.size(); i++) { std::cout << '|' << std::setw(10) << iterations[i] << '|' << std::setw(25) << recurrent_norms[i] << '|' - << std::setw(25) << real_norms[i] << '|' << std::endl; + << std::setw(25) << real_norms[i] << '|' << std::setw(25) + << implicit_norms[i] << '|' << std::endl; } // std::defaultfloat could be used here but some compilers // do not support it properly, e.g. the Intel compiler @@ -110,19 +115,30 @@ struct ResidualLogger : gko::log::Logger { // Print a separation line std::cout << '|' << std::setfill('-') << std::setw(11) << '|' << std::setw(26) << '|' << std::setw(26) << '|' - << std::setfill(' ') << std::endl; + << std::setw(26) << '|' << std::setfill(' ') << std::endl; } using gko_dense = gko::matrix::Dense; using gko_real_dense = gko::matrix::Dense; - // Customize the logging hook which is called everytime an iteration is - // completed - void on_iteration_complete(const gko::LinOp *, + // This overload is necessary to avoid interface breaks for Ginkgo 2.0 + void on_iteration_complete(const gko::LinOp *solver, const gko::size_type &iteration, const gko::LinOp *residual, const gko::LinOp *solution, const gko::LinOp *residual_norm) const override + { + this->on_iteration_complete(solver, iteration, residual, solution, + residual_norm, nullptr); + } + + // Customize the logging hook which is called everytime an iteration is + // completed + void on_iteration_complete( + const gko::LinOp *, const gko::size_type &iteration, + const gko::LinOp *residual, const gko::LinOp *solution, + const gko::LinOp *residual_norm, + const gko::LinOp *implicit_sq_residual_norm) const override { // If the solver shares a residual norm, log its value if (residual_norm) { @@ -162,6 +178,18 @@ struct ResidualLogger : gko::log::Logger { real_norms.push_back(-1.0); } + if (implicit_sq_residual_norm) { + auto dense_norm = + gko::as(implicit_sq_residual_norm); + // Add the norm to the `implicit_norms` vector + implicit_norms.push_back( + std::sqrt(get_first_element(gko::lend(dense_norm)))); + } else { + // Add to the `implicit_norms` vector the value -1.0 if it could not + // be computed + implicit_norms.push_back(-1.0); + } + // Add the current iteration number to the `iterations` vector iterations.push_back(iteration); } @@ -183,6 +211,8 @@ struct ResidualLogger : gko::log::Logger { mutable std::vector recurrent_norms{}; // Vector which stores all the real residual norms mutable std::vector real_norms{}; + // Vector which stores all the implicit residual norms + mutable std::vector implicit_norms{}; // Vector which stores all the iteration numbers mutable std::vector iterations{}; }; @@ -190,79 +220,93 @@ struct ResidualLogger : gko::log::Logger { int main(int argc, char *argv[]) { - // Use some shortcuts. In Ginkgo, vectors are seen as a gko::matrix::Dense - // with one column/one row. The advantage of this concept is that using - // multiple vectors is a now a natural extension of adding columns/rows are - // necessary. + // Use some shortcuts. In Ginkgo, vectors are seen as a + // gko::matrix::Dense with one column/one row. The advantage of this + // concept is that using multiple vectors is a now a natural extension + // of adding columns/rows are necessary. using ValueType = double; using RealValueType = gko::remove_complex; using IndexType = int; using vec = gko::matrix::Dense; using real_vec = gko::matrix::Dense; - // The gko::matrix::Csr class is used here, but any other matrix class such - // as gko::matrix::Coo, gko::matrix::Hybrid, gko::matrix::Ell or + // The gko::matrix::Csr class is used here, but any other matrix class + // such as gko::matrix::Coo, gko::matrix::Hybrid, gko::matrix::Ell or // gko::matrix::Sellp could also be used. using mtx = gko::matrix::Csr; - // The gko::solver::Cg is used here, but any other solver class can also be - // used. + // The gko::solver::Cg is used here, but any other solver class can also + // be used. using cg = gko::solver::Cg; // Print the ginkgo version information. std::cout << gko::version_info::get() << std::endl; // @sect3{Where do you want to run your solver ?} - // The gko::Executor class is one of the cornerstones of Ginkgo. Currently, - // we have support for - // an gko::OmpExecutor, which uses OpenMP multi-threading in most of its - // kernels, a gko::ReferenceExecutor, a single threaded specialization of - // the OpenMP executor and a gko::CudaExecutor which runs the code on a - // NVIDIA GPU if available. - // @note With the help of C++, you see that you only ever need to change the - // executor and all the other functions/ routines within Ginkgo should - // automatically work and run on the executor with any other changes. - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create()); - } else { + // The gko::Executor class is one of the cornerstones of Ginkgo. + // Currently, we have support for an gko::OmpExecutor, which uses OpenMP + // multi-threading in most of its kernels, a gko::ReferenceExecutor, a + // single threaded specialization of the OpenMP executor and a + // gko::CudaExecutor which runs the code on a NVIDIA GPU if available. + // @note With the help of C++, you see that you only ever need to change + // the executor and all the other functions/ routines within Ginkgo + // should automatically work and run on the executor with any other + // changes. + if (argc == 2 && (std::string(argv[1]) == "--help")) { std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + // Figure out where to run the code + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + // @sect3{Reading your data and transfer to the proper device.} - // Read the matrix, right hand side and the initial solution using the @ref - // read function. - // @note Ginkgo uses C++ smart pointers to automatically manage memory. To - // this end, we use our own object ownership transfer functions that under - // the hood call the required smart pointer functions to manage object - // ownership. The gko::share , gko::give and gko::lend are the functions - // that you would need to use. + // Read the matrix, right hand side and the initial solution using the + // @ref read function. + // @note Ginkgo uses C++ smart pointers to automatically manage memory. + // To this end, we use our own object ownership transfer functions that + // under the hood call the required smart pointer functions to manage + // object ownership. The gko::share , gko::give and gko::lend are the + // functions that you would need to use. auto A = share(gko::read(std::ifstream("data/A.mtx"), exec)); auto b = gko::read(std::ifstream("data/b.mtx"), exec); auto x = gko::read(std::ifstream("data/x0.mtx"), exec); const RealValueType reduction_factor = 1e-7; // @sect3{Creating the solver} - // Generate the gko::solver factory. Ginkgo uses the concept of Factories to - // build solvers with certain - // properties. Observe the Fluent interface used here. Here a cg solver is - // generated with a stopping criteria of maximum iterations of 20 and a - // residual norm reduction of 1e-15. You also observe that the stopping - // criteria(gko::stop) are also generated from factories using their build - // methods. You need to specify the executors which each of the object needs - // to be built on. + // Generate the gko::solver factory. Ginkgo uses the concept of + // Factories to build solvers with certain properties. Observe the + // Fluent interface used here. Here a cg solver is generated with a + // stopping criteria of maximum iterations of 20 and a residual norm + // reduction of 1e-15. You also observe that the stopping + // criteria(gko::stop) are also generated from factories using their + // build methods. You need to specify the executors which each of the + // object needs to be built on. auto solver_gen = cg::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(20u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) .on(exec); @@ -271,8 +315,9 @@ int main(int argc, char *argv[]) auto logger = std::make_shared>( exec, gko::lend(A), gko::lend(b)); - // Add the previously created logger to the solver factory. The logger will - // be automatically propagated to all solvers created from this factory. + // Add the previously created logger to the solver factory. The logger + // will be automatically propagated to all solvers created from this + // factory. solver_gen->add_logger(logger); // Generate the solver from the matrix. The solver factory built in the @@ -286,31 +331,30 @@ int main(int argc, char *argv[]) auto solver = solver_gen->generate(A); - // Finally, solve the system. The solver, being a gko::LinOp, can be applied - // to a right hand side, b to - // obtain the solution, x. + // Finally, solve the system. The solver, being a gko::LinOp, can be + // applied to a right hand side, b to obtain the solution, x. solver->apply(gko::lend(b), gko::lend(x)); // Print the solution to the command line. - std::cout << "Solution (x): \n"; + std::cout << "Solution (x):\n"; write(std::cout, gko::lend(x)); // Print the table of the residuals obtained from the logger logger->write(); - // To measure if your solution has actually converged, you can measure the - // error of the solution. - // one, neg_one are objects that represent the numbers which allow for a - // uniform interface when computing on any device. To compute the residual, - // all you need to do is call the apply method, which in this case is an - // spmv and equivalent to the LAPACK z_spmv routine. Finally, you compute - // the euclidean 2-norm with the compute_norm2 function. + // To measure if your solution has actually converged, you can measure + // the error of the solution. one, neg_one are objects that represent + // the numbers which allow for a uniform interface when computing on any + // device. To compute the residual, all you need to do is call the apply + // method, which in this case is an spmv and equivalent to the LAPACK + // z_spmv routine. Finally, you compute the euclidean 2-norm with the + // compute_norm2 function. auto one = gko::initialize({1.0}, exec); auto neg_one = gko::initialize({-1.0}, exec); auto res = gko::initialize({0.0}, exec); A->apply(gko::lend(one), gko::lend(x), gko::lend(neg_one), gko::lend(b)); b->compute_norm2(gko::lend(res)); - std::cout << "Residual norm sqrt(r^T r): \n"; + std::cout << "Residual norm sqrt(r^T r):\n"; write(std::cout, gko::lend(res)); } diff --git a/examples/custom-logger/doc/results.dox b/examples/custom-logger/doc/results.dox index 13ef8f5b746..2d8185091a7 100644 --- a/examples/custom-logger/doc/results.dox +++ b/examples/custom-logger/doc/results.dox @@ -25,30 +25,30 @@ Solution (x): 0.0107016 0.0121141 0.0123025 -Recurrent vs true residual norm: -| Iteration| Recurrent Residual Norm| True Residual Norm| -|----------|-------------------------|-------------------------| -| 0| 4.358899e+00| 4.358899e+00| -| 1| 2.304548e+00| 2.304548e+00| -| 2| 1.467706e+00| 1.467706e+00| -| 3| 9.848751e-01| 9.848751e-01| -| 4| 7.418330e-01| 7.418330e-01| -| 5| 5.136231e-01| 5.136231e-01| -| 6| 3.841650e-01| 3.841650e-01| -| 7| 3.164394e-01| 3.164394e-01| -| 8| 2.277088e-01| 2.277088e-01| -| 9| 1.703121e-01| 1.703121e-01| -| 10| 9.737220e-02| 9.737220e-02| -| 11| 6.168306e-02| 6.168306e-02| -| 12| 4.541231e-02| 4.541231e-02| -| 13| 3.195304e-02| 3.195304e-02| -| 14| 1.616058e-02| 1.616058e-02| -| 15| 6.570152e-03| 6.570152e-03| -| 16| 2.643669e-03| 2.643669e-03| -| 17| 8.588089e-04| 8.588089e-04| -| 18| 2.864613e-04| 2.864613e-04| -| 19| 1.641952e-15| 2.107881e-15| -|----------|-------------------------|-------------------------| +Recurrent vs true vs implicit residual norm: +| Iteration| Recurrent Residual Norm| True Residual Norm| Implicit Residual Norm| +|----------|-------------------------|-------------------------|-------------------------| +| 0| 4.358899e+00| 4.358899e+00| 4.358899e+00| +| 1| 2.304548e+00| 2.304548e+00| 2.304548e+00| +| 2| 1.467706e+00| 1.467706e+00| 1.467706e+00| +| 3| 9.848751e-01| 9.848751e-01| 9.848751e-01| +| 4| 7.418330e-01| 7.418330e-01| 7.418330e-01| +| 5| 5.136231e-01| 5.136231e-01| 5.136231e-01| +| 6| 3.841650e-01| 3.841650e-01| 3.841650e-01| +| 7| 3.164394e-01| 3.164394e-01| 3.164394e-01| +| 8| 2.277088e-01| 2.277088e-01| 2.277088e-01| +| 9| 1.703121e-01| 1.703121e-01| 1.703121e-01| +| 10| 9.737220e-02| 9.737220e-02| 9.737220e-02| +| 11| 6.168306e-02| 6.168306e-02| 6.168306e-02| +| 12| 4.541231e-02| 4.541231e-02| 4.541231e-02| +| 13| 3.195304e-02| 3.195304e-02| 3.195304e-02| +| 14| 1.616058e-02| 1.616058e-02| 1.616058e-02| +| 15| 6.570152e-03| 6.570152e-03| 6.570152e-03| +| 16| 2.643669e-03| 2.643669e-03| 2.643669e-03| +| 17| 8.588089e-04| 8.588089e-04| 8.588089e-04| +| 18| 2.864613e-04| 2.864613e-04| 2.864613e-04| +| 19| 1.641952e-15| 2.107881e-15| 1.641952e-15| +|----------|-------------------------|-------------------------|-------------------------| Residual norm sqrt(r^T r): %%MatrixMarket matrix array real general 1 1 diff --git a/examples/custom-matrix-format/CMakeLists.txt b/examples/custom-matrix-format/CMakeLists.txt index 81d8c9f7afb..b8e98cfe3d3 100644 --- a/examples/custom-matrix-format/CMakeLists.txt +++ b/examples/custom-matrix-format/CMakeLists.txt @@ -1,13 +1,30 @@ -if (GINKGO_BUILD_CUDA AND GINKGO_BUILD_OMP) - enable_language(CUDA) - set(CMAKE_CUDA_STANDARD 14) - set(CMAKE_CUDA_STANDARD_REQUIRED ON) - add_executable(custom-matrix-format - custom-matrix-format.cpp - stencil_kernel.cu) - target_link_libraries(custom-matrix-format ginkgo) - target_include_directories(custom-matrix-format PRIVATE - ${PROJECT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) - # workaround for clang-cuda/g++ interaction - set_target_properties(custom-matrix-format PROPERTIES POSITION_INDEPENDENT_CODE ON) +cmake_minimum_required(VERSION 3.9) +project(custom-matrix-format CXX CUDA) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) + find_package(OpenMP 3.0 REQUIRED) endif() + +if(NOT (GINKGO_BUILD_CUDA AND GINKGO_BUILD_OMP)) + message(FATAL_ERROR + "This example needs Ginkgo built with CUDA and OpenMP support") +endif() + +set(CMAKE_CUDA_STANDARD 14) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) + +add_executable(custom-matrix-format custom-matrix-format.cpp stencil_kernel.cu) +target_link_libraries(custom-matrix-format Ginkgo::ginkgo OpenMP::OpenMP_CXX) + +# inherit CUDA architecture flags from Ginkgo +target_compile_options(custom-matrix-format + PRIVATE "$<$:${GINKGO_CUDA_ARCH_FLAGS}>") +# we handle CUDA architecture flags for now, disable CMake handling +if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) + set_target_properties(custom-matrix-format PROPERTIES CUDA_ARCHITECTURES OFF) +endif() + +# workaround for clang-cuda/g++ interaction +set_target_properties(custom-matrix-format PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/examples/custom-matrix-format/build.sh b/examples/custom-matrix-format/build.sh index e56cb0cd25d..10c2d188666 100755 --- a/examples/custom-matrix-format/build.sh +++ b/examples/custom-matrix-format/build.sh @@ -12,13 +12,8 @@ source ${THIS_DIR}/../build-setup.sh CXX="nvcc" -# figure out correct compiler flags -if ls ${THIS_DIR} | grep -F "libginkgo." >/dev/null; then - LINK_FLAGS="-lginkgo -lginkgo_omp -lginkgo_cuda -lginkgo_reference -lginkgo_hip -Xlinker -rpath -Xlinker ${THIS_DIR}" -else - LINK_FLAGS="-lginkgod -lginkgo_ompd -lginkgo_cudad -lginkgo_referenced -lginkgo_hipd -Xlinker -rpath -Xlinker ${THIS_DIR}" -fi - +# adjust to nvcc style link flags +LINK_FLAGS="${LINK_FLAGS/-Wl,-rpath,/-Xlinker -rpath -Xlinker }" # build ${CXX} -std=c++14 -o ${THIS_DIR}/custom-matrix-format \ diff --git a/examples/custom-matrix-format/custom-matrix-format.cpp b/examples/custom-matrix-format/custom-matrix-format.cpp index 3f73aeafca1..cfad1b3bedd 100644 --- a/examples/custom-matrix-format/custom-matrix-format.cpp +++ b/examples/custom-matrix-format/custom-matrix-format.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -241,18 +241,15 @@ int main(int argc, char *argv[]) using mtx = gko::matrix::Csr; using cg = gko::solver::Cg; - if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " DISCRETIZATION_POINTS [executor]" - << std::endl; + // Figure out where to run the code + if (argc == 2 && (std::string(argv[1]) == "--help")) { + std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } - // Get number of discretization points + const auto executor_string = argc >= 2 ? argv[1] : "reference"; const unsigned int discretization_points = - argc >= 2 ? std::atoi(argv[1]) : 100u; - const auto executor_string = argc >= 3 ? argv[2] : "reference"; - - // Figure out where to run the code + argc >= 3 ? std::atoi(argv[2]) : 100u; std::map()>> exec_map{ {"omp", [] { return gko::OmpExecutor::create(); }}, @@ -266,6 +263,11 @@ int main(int argc, char *argv[]) return gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; // executor where Ginkgo will perform the computation @@ -293,7 +295,7 @@ int main(int argc, char *argv[]) .with_criteria(gko::stop::Iteration::build() .with_max_iters(discretization_points) .on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) .on(exec) @@ -303,8 +305,8 @@ int main(int argc, char *argv[]) -1, 2, -1)) ->apply(lend(rhs), lend(u)); - print_solution(u0, u1, lend(u)); - std::cout << "The average relative error is " + std::cout << "\nSolve complete." + << "\nThe average relative error is " << calculate_error(discretization_points, lend(u), correct_u) / discretization_points << std::endl; diff --git a/examples/custom-matrix-format/stencil_kernel.cu b/examples/custom-matrix-format/stencil_kernel.cu index fa8b9deb9d1..47cd2540def 100644 --- a/examples/custom-matrix-format/stencil_kernel.cu +++ b/examples/custom-matrix-format/stencil_kernel.cu @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -75,7 +75,7 @@ template void stencil_kernel(std::size_t size, const ValueType *coefs, const ValueType *b, ValueType *x) { - constexpr auto block_size = 512; + constexpr int block_size = 512; const auto grid_size = (size + block_size - 1) / block_size; stencil_kernel_impl<<>>(size, coefs, b, x); } diff --git a/examples/custom-stopping-criterion/CMakeLists.txt b/examples/custom-stopping-criterion/CMakeLists.txt index d04f3b1a071..7009e575bc5 100644 --- a/examples/custom-stopping-criterion/CMakeLists.txt +++ b/examples/custom-stopping-criterion/CMakeLists.txt @@ -1,11 +1,20 @@ +cmake_minimum_required(VERSION 3.9) +project(custom-stopping-criterion) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) + add_executable(custom-stopping-criterion custom-stopping-criterion.cpp) -target_link_libraries(custom-stopping-criterion ginkgo + +target_link_libraries(custom-stopping-criterion Ginkgo::ginkgo Threads::Threads) -target_include_directories(custom-stopping-criterion - PRIVATE ${PROJECT_SOURCE_DIR}) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) configure_file(data/b.mtx data/b.mtx COPYONLY) configure_file(data/x0.mtx data/x0.mtx COPYONLY) diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp index b9a4519a027..7c5074009bc 100644 --- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp +++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include @@ -143,22 +144,38 @@ int main(int argc, char *argv[]) std::cout << gko::version_info::get() << std::endl; // Figure out where to run the code - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { + if (argc == 2 && (std::string(argv[1]) == "--help")) { std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } + // Figure out where to run the code + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + + // Figure out where to run the code + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + // Declare a user controled boolean for the iteration process volatile bool stop_iteration_process{}; diff --git a/examples/external-lib-interfacing/CMakeLists.txt b/examples/external-lib-interfacing/CMakeLists.txt index 7f97a03e630..9f2a75dcfbd 100644 --- a/examples/external-lib-interfacing/CMakeLists.txt +++ b/examples/external-lib-interfacing/CMakeLists.txt @@ -14,8 +14,9 @@ if(GINKGO_BUILD_EXTLIB_EXAMPLE) set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + set(run_target "external-lib-interfacing") add_executable(${PROJECT_NAME} "") - target_sources(${PROJECT_NAME} PRIVATE external-lib-interfacing.cpp) + target_sources(${PROJECT_NAME} PRIVATE ${run_target}.cpp) target_compile_options(${PROJECT_NAME} PRIVATE -g -Wall) target_compile_definitions(${PROJECT_NAME} PRIVATE OMPI_SKIP_MPICXX) diff --git a/examples/external-lib-interfacing/external-lib-interfacing.cpp b/examples/external-lib-interfacing/external-lib-interfacing.cpp index b885eeee77f..122b73b155b 100644 --- a/examples/external-lib-interfacing/external-lib-interfacing.cpp +++ b/examples/external-lib-interfacing/external-lib-interfacing.cpp @@ -881,7 +881,7 @@ void AdvectionProblem::solve() bicgstab::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(1000).on(exec), - gko::stop::ResidualNormReduction<>::build() + gko::stop::ResidualNorm<>::build() .with_reduction_factor(1e-12) .on(exec)) .with_preconditioner(bj::build().on(exec)) diff --git a/examples/ginkgo-overhead/CMakeLists.txt b/examples/ginkgo-overhead/CMakeLists.txt index a52a2e31b3a..82512ea0567 100644 --- a/examples/ginkgo-overhead/CMakeLists.txt +++ b/examples/ginkgo-overhead/CMakeLists.txt @@ -1,3 +1,10 @@ +cmake_minimum_required(VERSION 3.9) +project(ginkgo-overhead) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(ginkgo-overhead ginkgo-overhead.cpp) -target_link_libraries(ginkgo-overhead ginkgo) -target_include_directories(ginkgo-overhead PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(ginkgo-overhead Ginkgo::ginkgo) diff --git a/examples/ginkgo-overhead/doc/results.dox b/examples/ginkgo-overhead/doc/results.dox index 7386ccd43da..431c19e88c5 100644 --- a/examples/ginkgo-overhead/doc/results.dox +++ b/examples/ginkgo-overhead/doc/results.dox @@ -3,8 +3,8 @@ This is the expected output: @code{.cpp} -Running 1000000 iterations of the CG solver took a total of 1.62987 seconds. - Average library overhead: 1629.87 [nanoseconds / iteration] +Running 1000000 iterations of the CG solver took a total of 1.60337 seconds. + Average library overhead: 1603.37 [nanoseconds / iteration] @endcode diff --git a/examples/ginkgo-overhead/ginkgo-overhead.cpp b/examples/ginkgo-overhead/ginkgo-overhead.cpp index b8bc7acc2b1..ba41b01acf4 100644 --- a/examples/ginkgo-overhead/ginkgo-overhead.cpp +++ b/examples/ginkgo-overhead/ginkgo-overhead.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/examples/ginkgo-ranges/CMakeLists.txt b/examples/ginkgo-ranges/CMakeLists.txt index 34d5fd27654..c3cd06954c9 100644 --- a/examples/ginkgo-ranges/CMakeLists.txt +++ b/examples/ginkgo-ranges/CMakeLists.txt @@ -1,3 +1,9 @@ +cmake_minimum_required(VERSION 3.9) +project(ginkgo-ranges) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() add_executable(ginkgo-ranges ginkgo-ranges.cpp) -target_link_libraries(ginkgo-ranges ginkgo) -target_include_directories(ginkgo-ranges PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(ginkgo-ranges Ginkgo::ginkgo) diff --git a/examples/ginkgo-ranges/ginkgo-ranges.cpp b/examples/ginkgo-ranges/ginkgo-ranges.cpp index c471f967d60..0853e74f3fc 100644 --- a/examples/ginkgo-ranges/ginkgo-ranges.cpp +++ b/examples/ginkgo-ranges/ginkgo-ranges.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/examples/heat-equation/CMakeLists.txt b/examples/heat-equation/CMakeLists.txt new file mode 100644 index 00000000000..c13a69e8fb7 --- /dev/null +++ b/examples/heat-equation/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.9) +project(heat-equation) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() +find_package(OpenCV REQUIRED) + +add_executable(heat-equation heat-equation.cpp) +target_link_libraries(heat-equation Ginkgo::ginkgo ${OpenCV_LIBS}) + +# Copy the data files to the execution directory +configure_file(data/initial.mtx data/initial.mtx COPYONLY) +configure_file(data/source.mtx data/source.mtx COPYONLY) diff --git a/examples/heat-equation/build.sh b/examples/heat-equation/build.sh new file mode 100755 index 00000000000..495d054d477 --- /dev/null +++ b/examples/heat-equation/build.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# set up script +if [ $# -ne 1 ]; then + echo -e "Usage: $0 GINKGO_BUILD_DIRECTORY" + exit 1 +fi +BUILD_DIR=$1 +THIS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" &>/dev/null && pwd ) + +source ${THIS_DIR}/../build-setup.sh + +# build +${CXX} -std=c++14 -o ${THIS_DIR}/heat-equation \ + ${THIS_DIR}/heat-equation.cpp \ + -I${THIS_DIR}/../../include -I${BUILD_DIR}/include \ + `pkg-config --cflags opencv4` \ + -L${THIS_DIR} ${LINK_FLAGS} \ + `pkg-config --libs opencv4` diff --git a/examples/heat-equation/data/initial.mtx b/examples/heat-equation/data/initial.mtx new file mode 100644 index 00000000000..3d4fdcbecb5 --- /dev/null +++ b/examples/heat-equation/data/initial.mtx @@ -0,0 +1,65538 @@ +%%MatrixMarket matrix array real general +65536 1 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.09700788686685691 +0.24134981294511437 +0.3866003304555665 +0.46493975774341134 +0.5023772328168228 +0.5091424310999448 +0.4879897630331653 +0.3944474308679604 +0.19000670232238637 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.1174721498628637 +0.19641728348258702 +0.2296101192536426 +0.19641728348258702 +0.1293633932048852 +0.08652970395641724 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1174721498628637 +0.4402850125061395 +0.6222706499630242 +0.5952918676771857 +3.4134121992978272 +3.448736817474918 +4.515387913750165 +4.547905436471331 +4.531406566217803 +4.49967437303024 +3.4508560466680547 +3.4084134232106233 +0.6099759108185445 +0.593379484990824 +0.2616727643865282 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.2555541939391164 +0.5995223290890301 +0.6151073061225478 +3.3999084873857006 +3.405282092316791 +3.408257823745231 +3.4030586686795825 +3.398331392047578 +0.6048683038797913 +0.6306810902599107 +0.5728484679595169 +0.2819871622377592 +0.04986894719131918 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.08652970395641724 +0.4949361895255393 +0.6027748234445571 +3.465580337420984 +4.771893282393301 +4.914400272933426 +4.876422053888234 +4.871259604324209 +4.927710408327659 +4.914400272933426 +4.797007948311492 +4.760252191586986 +4.858812151936161 +4.954466537076697 +4.954466537076697 +4.524936356535483 +3.400810101543841 +0.6323850363479462 +0.2175816996331532 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.157275361322311 +0.6189656491157116 +3.40192156799998 +4.496400533324626 +4.706661305356881 +4.706661305356881 +4.706661305356881 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.488924457521423 +3.4028602887940522 +0.6379045049580707 +0.2935510911988004 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.2250745829383396 +0.6315576238645662 +3.4625939578942924 +4.927710408327659 +4.927710408327659 +4.914400272933426 +4.836643969491252 +4.779773978458947 +4.799397058532644 +4.914400272933426 +4.914400272933426 +4.779773978458947 +4.760252191586986 +4.856526425443426 +4.927710408327659 +4.942733045803132 +4.858812151936161 +4.74320020196667 +4.5073296469186275 +0.5958542710129855 +0.478329130669521 +0.0576866479940108 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.3397734141787456 +0.6077216126175999 +4.517639698678814 +4.706661305356881 +4.706661305356881 +4.706661305356881 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.687379768913745 +4.687379768913745 +4.545406035164644 +3.3964945993160724 +0.5973002521507231 +0.12563154621256611 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.25327535793473643 +0.6170782619076309 +4.546129552465073 +4.856526425443426 +4.914400272933426 +4.927710408327659 +4.90900814782395 +4.799397058532644 +4.779773978458947 +4.760252191586986 +4.851876601914066 +4.871259604324209 +4.760252191586986 +4.740838689292398 +4.836643969491252 +4.927710408327659 +4.927710408327659 +4.876422053888234 +4.7425356663700775 +4.74320020196667 +4.72456262033899 +3.413367042843766 +0.601668839099629 +0.08652970395641724 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.4061375121005799 +0.5899185767461592 +4.799131843864706 +4.7807449689252595 +4.706661305356881 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +3.520256162140694 +0.623527224719381 +0.36996027523321917 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.2342163616980768 +0.6239900704573369 +4.565463683036979 +4.799397058532644 +4.799397058532644 +4.836643969491252 +4.914400272933426 +4.914400272933426 +4.799397058532644 +4.760252191586986 +4.760252191586986 +4.779773978458947 +4.779773978458947 +4.740838689292398 +4.740838689292398 +4.816798834118136 +4.927710408327659 +4.927710408327659 +4.890373906609362 +4.740838689292398 +4.723513570758677 +4.72456262033899 +4.72456262033899 +3.4311617867798176 +0.6078689180196122 +0.08652970395641724 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.36632506944213183 +3.396125464650726 +4.7807449689252595 +4.917254376411946 +4.724731264199313 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.629292470505407 +3.402013881744261 +0.6221120681140002 +0.14584680845313702 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.3047863981217178 +0.6176669519964975 +4.565026059278837 +4.836643969491252 +4.779773978458947 +4.779773978458947 +4.779773978458947 +4.836643969491252 +4.914400272933426 +4.816798834118136 +4.760252191586986 +4.760252191586986 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.797007948311492 +4.927710408327659 +4.927710408327659 +4.89873092730281 +4.760252191586986 +4.721541071319574 +4.721541071319574 +4.723513570758677 +4.72456262033899 +3.4092709205692344 +0.5101841475615974 +0.0576866479940108 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.2815945019063122 +0.60697618875888 +4.706661305356881 +4.858680873820008 +4.878241460327516 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.741920921617843 +4.515513748627862 +0.6122451008034862 +0.36838302101761844 +0.02884340803556551 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.5462859882677904 +0.5899413179355788 +4.637805882449454 +4.914400272933426 +4.890373906609362 +4.799397058532644 +4.760252191586986 +4.760252191586986 +4.760252191586986 +4.856526425443426 +4.797007948311492 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.7772844700957515 +4.914400272933426 +4.927710408327659 +4.914400272933426 +4.7772844700957515 +4.721541071319574 +4.721541071319574 +4.702367606407748 +4.70461635049022 +4.70461635049022 +0.6033346157208195 +0.29090644896388973 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.15433651373052548 +0.6468371917588174 +4.517180948736314 +4.724731264199313 +4.8977894537919315 +4.8196627570631865 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.705374712396305 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.6862809068484905 +4.839146489353515 +4.979648002510261 +4.8052586551908245 +3.4055507670466887 +0.6313873661534748 +0.12563154621256611 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.3804579250412381 +0.6283636673209996 +3.4560886589405255 +4.779773978458947 +4.816798834118136 +4.89873092730281 +4.914400272933426 +4.871259604324209 +4.760252191586986 +4.760252191586986 +4.760252191586986 +4.760252191586986 +4.760252191586986 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.721541071319574 +4.760252191586986 +4.89873092730281 +4.927710408327659 +4.927710408327659 +4.797007948311492 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.492830953572344 +0.6731571051476077 +0.10651401691436746 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.08652970395641724 +0.6505752464000345 +3.482448277995883 +4.705374712396305 +4.741640717081259 +4.939112184045445 +4.779464206950053 +4.705374712396305 +4.705374712396305 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.6862809068484905 +4.6862809068484905 +4.723266788376083 +4.939112184045445 +4.979648002510261 +4.939112184045445 +4.723266788376083 +3.5213697935196873 +0.6257788837436884 +0.3288024034009762 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1174721498628637 +0.4378629551218145 +0.6242252728394968 +3.416892015768177 +4.6396512859558126 +4.7576395655368735 +4.7576395655368735 +4.760252191586986 +4.816798834118136 +4.914400272933426 +4.914400272933426 +4.851876601914066 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.721541071319574 +4.721541071319574 +4.721541071319574 +4.740838689292398 +4.890373906609362 +4.927710408327659 +4.927710408327659 +4.812732846572471 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +3.3904710512449823 +0.34789026551600166 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.3986814192409289 +3.390093982583272 +4.705374712396305 +4.705374712396305 +4.779464206950053 +4.939112184045445 +4.741920921617843 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.6862809068484905 +4.6862809068484905 +4.6862809068484905 +4.667980307053825 +4.800247028092315 +4.979648002510261 +4.979648002510261 +4.858680873820008 +4.667980307053825 +4.667980307053825 +4.550296376868264 +3.394306358192898 +0.5998540234643901 +0.12563154621256611 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.08652970395641724 +0.5023285856438795 +0.6032473782512451 +3.4555084947063524 +4.7321680251582725 +4.818365174589247 +4.7342759124717695 +4.7576395655368735 +4.7576395655368735 +4.7576395655368735 +4.738083404508356 +4.851876601914066 +4.914400272933426 +4.914400272933426 +4.797007948311492 +4.721541071319574 +4.721541071319574 +4.740838689292398 +4.740838689292398 +4.740838689292398 +4.721541071319574 +4.721541071319574 +4.721541071319574 +4.721541071319574 +4.721541071319574 +4.871259604324209 +4.927710408327659 +4.927710408327659 +4.851876601914066 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.481931321951778 +0.6819464310925891 +0.08652970395641724 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.17305757537147226 +0.6477421764976778 +4.580603563978908 +4.6862809068484905 +4.6862809068484905 +4.839146489353515 +4.918935027068591 +4.705374712396305 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.6862809068484905 +4.6862809068484905 +4.667980307053825 +4.667980307053825 +4.6862809068484905 +4.918935027068591 +4.979648002510261 +4.959350993808612 +4.760369351195106 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +3.5115025769659662 +0.6271487773617475 +0.37691273948061976 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.0576866479940108 +0.5806138431058043 +3.42369249567201 +4.773452659709149 +4.76833752096446 +4.845146974506639 +4.845146974506639 +4.748902697275286 +4.7342759124717695 +4.714774087646149 +4.738083404508356 +4.738083404508356 +4.7576395655368735 +4.870754749200952 +4.900088766172104 +4.89873092730281 +4.7772844700957515 +4.721541071319574 +4.721541071319574 +4.702367606407748 +4.721541071319574 +4.721541071319574 +4.721541071319574 +4.721541071319574 +4.721541071319574 +4.702367606407748 +4.832343613752159 +4.927710408327659 +4.927710408327659 +4.890373906609362 +4.721541071319574 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.683327299089502 +4.683327299089502 +0.6372361654016805 +0.19262923125003173 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.07799929245714843 +0.5902957830399753 +3.4346281852775626 +4.6862809068484905 +4.6862809068484905 +4.6862809068484905 +4.858812151936161 +4.878780841567253 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.6862809068484905 +4.6862809068484905 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.779602577154106 +4.979648002510261 +4.979648002510261 +4.8977894537919315 +4.667980307053825 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.573804961511842 +3.400674372454345 +0.6446648696800334 +0.25687811066009925 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.4380371529532674 +3.404076937033252 +4.858049419420977 +4.79984516624872 +4.7342759124717695 +4.79984516624872 +4.858049419420977 +4.818365174589247 +4.714774087646149 +4.714774087646149 +4.714774087646149 +4.714774087646149 +4.718625762219989 +4.793091516779753 +4.872529839105629 +4.900088766172104 +4.863636363636363 +4.740838689292398 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.721541071319574 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.797007948311492 +4.927710408327659 +4.927710408327659 +4.89873092730281 +4.740838689292398 +4.702367606407748 +4.702367606407748 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +3.3871959829706197 +0.38541578130400733 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.2406471531816594 +0.6130836407996663 +4.7047148084587604 +4.6862809068484905 +4.6862809068484905 +4.7047148084587604 +4.898823242738641 +4.838921850181822 +4.6862809068484905 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.6862809068484905 +4.6862809068484905 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.6862809068484905 +4.878241460327516 +4.979648002510261 +4.979648002510261 +4.779602577154106 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.4707363627284495 +3.3932655188344025 +0.64079515659542 +0.302207141430471 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.6323293455201187 +4.781042241478235 +4.858049419420977 +4.8584171544605805 +4.748902697275286 +4.7342759124717695 +4.842936919059271 +4.845146974506639 +4.748902697275286 +4.714774087646149 +4.695350099572867 +4.695350099572867 +4.695350099572867 +4.718625762219989 +4.836483399978973 +4.8848078319332995 +4.8848078319332995 +4.826171391227169 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.702367606407748 +4.683327299089502 +4.683327299089502 +4.702367606407748 +4.702367606407748 +4.7576395655368735 +4.914400272933426 +4.927710408327659 +4.914400272933426 +4.7576395655368735 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +3.4635252048046143 +0.6299960845817676 +0.10651401691436746 +0.02884340803556551 +0.0 +0.0 +0.02884340803556551 +0.08652970395641724 +0.654849727357143 +4.4950100072473465 +4.6862809068484905 +4.6862809068484905 +4.6862809068484905 +4.723266788376083 +4.935591242636505 +4.8196627570631865 +4.6862809068484905 +4.6862809068484905 +4.6862809068484905 +4.687379768913745 +4.687379768913745 +4.687379768913745 +4.6862809068484905 +4.6862809068484905 +4.6862809068484905 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.741226342122529 +4.959350993808612 +4.979648002510261 +4.918935027068591 +4.6862809068484905 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.614029116375104 +4.614029116375104 +4.46955485671531 +3.3971231599860086 +0.6391238293857269 +0.2060605753082827 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.08652970395641724 +0.5684107529392541 +3.4426681588491452 +4.7342759124717695 +4.79984516624872 +4.858049419420977 +4.79984516624872 +4.714774087646149 +4.76833752096446 +4.858049419420977 +4.826446087163245 +4.714774087646149 +4.695350099572867 +4.695350099572867 +4.695350099572867 +4.676017577221279 +4.729454156416239 +4.8584171544605805 +4.8848078319332995 +4.872529839105629 +4.773452659709149 +4.702367606407748 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.740838689292398 +4.89873092730281 +4.927710408327659 +4.927710408327659 +4.7772844700957515 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.50990487550728 +0.7108893910022562 +0.14421518948927226 +0.0576866479940108 +0.0 +0.0 +0.0576866479940108 +0.3099984596405294 +0.5941860158497647 +4.779773978458947 +4.6862809068484905 +4.6862809068484905 +4.6862809068484905 +4.760369351195106 +4.935591242636505 +4.779602577154106 +4.6862809068484905 +4.6862809068484905 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.6862809068484905 +4.6862809068484905 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.858680873820008 +4.979648002510261 +4.979648002510261 +4.798912023055375 +4.649828001134684 +4.649828001134684 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.462504916850652 +0.6063959649582049 +0.37691273948061976 +0.02884340803556551 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.2406471531816594 +0.6213226895949919 +4.817997864018289 +4.742906577502044 +4.729454156416239 +4.79984516624872 +4.809208233051305 +4.714774087646149 +4.714774087646149 +4.818365174589247 +4.858049419420977 +4.76833752096446 +4.695350099572867 +4.695350099572867 +4.676017577221279 +4.676017577221279 +4.676017577221279 +4.781042241478235 +4.858049419420977 +4.8848078319332995 +4.870754749200952 +4.718625762219989 +4.680045399669822 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.702367606407748 +4.871259604324209 +4.927710408327659 +4.927710408327659 +4.812732846572471 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +0.6263147453996627 +0.2595829718925291 +0.0576866479940108 +0.0 +0.02884340803556551 +0.10651401691436746 +0.6688907068459828 +4.5272956191935645 +4.720184565238619 +4.684462903548131 +4.684462903548131 +4.6862809068484905 +4.798912023055375 +4.935591242636505 +4.741226342122529 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.649828001134684 +4.723266788376083 +4.939112184045445 +4.979648002510261 +4.918935027068591 +4.6862809068484905 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.596414173087077 +4.6862809068484905 +3.401787032682269 +0.4134958711015573 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.11537255792817191 +0.6801637532859157 +4.5 +4.81729793871423 +4.781970581815818 +4.71001832849629 +4.71001832849629 +4.729454156416239 +4.695350099572867 +4.695350099572867 +4.729454156416239 +4.845146974506639 +4.842936919059271 +4.714774087646149 +4.676017577221279 +4.676017577221279 +4.676017577221279 +4.676017577221279 +4.695350099572867 +4.826446087163245 +4.858049419420977 +4.8848078319332995 +4.807023268125679 +4.680045399669822 +4.680045399669822 +4.680045399669822 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.6644299631397725 +4.812732846572471 +4.914400272933426 +4.927710408327659 +4.871259604324209 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.683327299089502 +4.681900682162952 +4.681900682162952 +3.3936590485265534 +0.5102083351063529 +0.08652970395641724 +0.02884340803556551 +0.0576866479940108 +0.26944739896229564 +0.6039406390696822 +4.90900814782395 +4.700980573576612 +4.681900682162952 +4.681900682162952 +4.684462903548131 +4.838921850181822 +4.935591242636505 +4.722186242078544 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.667980307053825 +4.667980307053825 +4.667980307053825 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.839146489353515 +4.979648002510261 +4.979648002510261 +4.800247028092315 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.596414173087077 +4.6862809068484905 +4.898823242738641 +4.979648002510261 +0.6007651230049555 +0.3016368135971197 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.08652970395641724 +0.6361009339160937 +3.424880372594676 +4.754700337540463 +4.81729793871423 +4.81729793871423 +4.754700337540463 +4.690617259906991 +4.690617259906991 +4.690617259906991 +4.695350099572867 +4.695350099572867 +4.781042241478235 +4.858049419420977 +4.781042241478235 +4.676017577221279 +4.676017577221279 +4.676017577221279 +4.676017577221279 +4.656789715712701 +4.729454156416239 +4.845146974506639 +4.858049419420977 +4.845146974506639 +4.7342759124717695 +4.680045399669822 +4.680045399669822 +4.680045399669822 +4.683327299089502 +4.6644299631397725 +4.6644299631397725 +4.7576395655368735 +4.914400272933426 +4.914400272933426 +4.890373906609362 +4.702367606407748 +4.683327299089502 +4.683327299089502 +4.681900682162952 +4.681900682162952 +4.681900682162952 +4.681900682162952 +4.4850662655980855 +0.7006178728830168 +0.11537255792817191 +0.0576866479940108 +0.10651401691436746 +0.6027146065606575 +3.451331861236609 +4.890373906609362 +4.681900682162952 +4.681900682162952 +4.681900682162952 +4.681900682162952 +4.851876601914066 +4.916065859983668 +4.7047148084587604 +4.667980307053825 +4.667980307053825 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.7047148084587604 +4.939112184045445 +4.979648002510261 +4.939112184045445 +4.6862809068484905 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.596414173087077 +4.594488612832219 +4.684268621444632 +4.918935027068591 +4.979648002510261 +4.798912023055375 +4.4578260227203375 +0.6748309658588463 +0.11537255792817191 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.27722759130246555 +0.6389578534659792 +3.4502024765421035 +4.735962024272876 +4.690617259906991 +4.781970581815818 +4.81729793871423 +4.803471126886287 +4.685126324463377 +4.671270154154301 +4.690617259906991 +4.690617259906991 +4.671270154154301 +4.690617259906991 +4.826446087163245 +4.845146974506639 +4.729454156416239 +4.676017577221279 +4.656789715712701 +4.656789715712701 +4.656789715712701 +4.676017577221279 +4.791416189934912 +4.858049419420977 +4.858049419420977 +4.809208233051305 +4.676017577221279 +4.660943261085074 +4.660943261085074 +4.660943261085074 +4.660943261085074 +4.6644299631397725 +4.718625762219989 +4.89873092730281 +4.914400272933426 +4.89873092730281 +4.718625762219989 +4.662955652364289 +4.681900682162952 +4.681900682162952 +4.681900682162952 +4.681900682162952 +4.681900682162952 +4.681900682162952 +0.6076346759138609 +0.2790850420626718 +0.11537255792817191 +0.17305757537147226 +0.6657145261825592 +4.765366729850828 +4.851876601914066 +4.681900682162952 +4.681900682162952 +4.662955652364289 +4.681900682162952 +4.890373906609362 +4.890373906609362 +4.662955652364289 +4.647295911553856 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.649828001134684 +4.8196627570631865 +4.979648002510261 +4.979648002510261 +4.8196627570631865 +4.631839101577405 +4.631839101577405 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.596414173087077 +4.594488612832219 +4.594488612832219 +4.6862809068484905 +4.918935027068591 +4.979648002510261 +4.779602577154106 +4.594488612832219 +4.577072911270099 +3.3871856774329334 +0.4164115895697904 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.07802269801585293 +0.3200076833933503 +0.6284694025551278 +3.3962455403760017 +4.522735510237164 +4.791447617972283 +4.760933045899426 +4.665846151051314 +4.685126324463377 +4.803471126886287 +4.81729793871423 +4.764423495930857 +4.671270154154301 +4.671270154154301 +4.6519944403256295 +4.671270154154301 +4.671270154154301 +4.742906577502044 +4.845146974506639 +4.809208233051305 +4.676017577221279 +4.656789715712701 +4.656789715712701 +4.656789715712701 +4.656789715712701 +4.71001832849629 +4.845146974506639 +4.858049419420977 +4.858049419420977 +4.742906577502044 +4.63767970403356 +4.63767970403356 +4.660943261085074 +4.660943261085074 +4.660943261085074 +4.680045399669822 +4.863636363636363 +4.914400272933426 +4.914400272933426 +4.7367234133223 +4.662955652364289 +4.662955652364289 +4.681900682162952 +4.681900682162952 +4.684462903548131 +4.681900682162952 +4.681900682162952 +3.4066666666666667 +0.5470912582584093 +0.17305757537147226 +0.3367206068372681 +0.598100425917039 +4.927710408327659 +4.832343613752159 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.700980573576612 +4.89873092730281 +4.851876601914066 +4.644156904882541 +4.644156904882541 +4.647295911553856 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.649828001134684 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.665803664493659 +4.918935027068591 +4.979648002510261 +4.939112184045445 +4.6862809068484905 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.596414173087077 +4.594488612832219 +4.594488612832219 +4.684462903548131 +4.918935027068591 +4.959350993808612 +4.779602577154106 +4.594488612832219 +4.577072911270099 +4.647885375522203 +4.624023941631759 +0.6441774988331241 +0.17305757537147226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17811199232491148 +0.40250640871114296 +0.6209395623165124 +0.612528122475215 +3.4121573149713003 +4.5225662367935255 +4.672145963635914 +4.672145963635914 +4.74974235645692 +4.791447617972283 +4.709682610573851 +4.646596735835308 +4.698038989919562 +4.804805898398897 +4.804805898398897 +4.704414572748163 +4.6519944403256295 +4.6519944403256295 +4.6519944403256295 +4.6519944403256295 +4.671270154154301 +4.781970581815818 +4.843246627448329 +4.76204169956615 +4.656789715712701 +4.656789715712701 +4.656789715712701 +4.656789715712701 +4.656789715712701 +4.781042241478235 +4.858049419420977 +4.858049419420977 +4.826446087163245 +4.676017577221279 +4.63767970403356 +4.63767970403356 +4.63767970403356 +4.660943261085074 +4.660943261085074 +4.826171391227169 +4.900088766172104 +4.914400272933426 +4.797007948311492 +4.662955652364289 +4.662955652364289 +4.665803664493659 +4.662955652364289 +4.681900682162952 +4.681900682162952 +4.681900682162952 +4.47682675213889 +0.7302790609412355 +0.22140021244840558 +0.5660760801461455 +3.3996686346662637 +4.927710408327659 +4.797007948311492 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.7367234133223 +4.914400272933426 +4.832343613752159 +4.644156904882541 +4.644156904882541 +4.644156904882541 +4.644156904882541 +4.647295911553856 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.614029116375104 +4.779773978458947 +4.954466537076697 +4.954466537076697 +4.800247028092315 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.596414173087077 +4.594488612832219 +4.594488612832219 +4.594488612832219 +4.703261124315225 +4.918935027068591 +4.959350993808612 +4.760369351195106 +4.577072911270099 +4.594488612832219 +4.722186242078544 +4.918935027068591 +4.979648002510261 +4.5078487634365185 +0.6635237943349184 +0.24229536681363495 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.10654702030214391 +0.35504606621873336 +0.5918444305940929 +0.6175436328753916 +3.4041864441530785 +3.4488877508772764 +4.506066750912535 +4.653211143840427 +4.734199167934266 +4.753120216906312 +4.710759302443869 +4.640648550355574 +4.672145963635914 +4.74974235645692 +4.74473950518707 +4.646596735835308 +4.646596735835308 +4.709682610573851 +4.804805898398897 +4.746468359034328 +4.6519944403256295 +4.6519944403256295 +4.6519944403256295 +4.6519944403256295 +4.6519944403256295 +4.685126324463377 +4.817997864018289 +4.817997864018289 +4.690617259906991 +4.656789715712701 +4.656789715712701 +4.656789715712701 +4.63767970403356 +4.671270154154301 +4.842936919059271 +4.858049419420977 +4.858049419420977 +4.76204169956615 +4.63767970403356 +4.63767970403356 +4.63767970403356 +4.63767970403356 +4.640410514489808 +4.748902697275286 +4.8848078319332995 +4.8848078319332995 +4.826171391227169 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.681900682162952 +4.681900682162952 +4.681900682162952 +0.6956563972819012 +0.30777693354095703 +0.7152110594945071 +3.4769566928057696 +4.927710408327659 +4.775931851598417 +4.662955652364289 +4.662955652364289 +4.644156904882541 +4.756290519524092 +4.927710408327659 +4.797007948311492 +4.644156904882541 +4.644156904882541 +4.644156904882541 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.62895277070343 +4.631839101577405 +4.631839101577405 +4.631839101577405 +4.614029116375104 +4.614029116375104 +4.647295911553856 +4.896292145056855 +4.954466537076697 +4.916065859983668 +4.665803664493659 +4.596414173087077 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.596414173087077 +4.594488612832219 +4.594488612832219 +4.594488612832219 +4.594488612832219 +4.703261124315225 +4.939112184045445 +4.959350993808612 +4.760369351195106 +4.577072911270099 +4.647885375522203 +4.818281302257531 +4.959350993808612 +4.979648002510261 +4.918935027068591 +4.703261124315225 +4.442054446236271 +3.3900069940231603 +0.6493211432484394 +0.502381006373251 +0.19918352929138816 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.17811199232491148 +0.5838932072864029 +0.6060061846830029 +3.4270860452442005 +4.534349385336478 +4.728397247583595 +4.753120216906312 +4.753120216906312 +4.693155811682875 +4.640648550355574 +4.664824166136957 +4.752498445713435 +4.752498445713435 +4.653211143840427 +4.621503709837636 +4.653211143840427 +4.710759302443869 +4.659803272550208 +4.62739859759647 +4.62739859759647 +4.719749765469021 +4.760933045899426 +4.646596735835308 +4.6519944403256295 +4.6519944403256295 +4.6519944403256295 +4.6328065440738575 +4.6328065440738575 +4.735962024272876 +4.828385320427853 +4.781970581815818 +4.6519944403256295 +4.656789715712701 +4.63767970403356 +4.63767970403356 +4.63767970403356 +4.742906577502044 +4.858049419420977 +4.858049419420977 +4.842936919059271 +4.671270154154301 +4.63767970403356 +4.636068948163684 +4.636068948163684 +4.636068948163684 +4.689147125339842 +4.872529839105629 +4.8848078319332995 +4.8540296586129035 +4.678585478412721 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.681900682162952 +0.6330885343407324 +0.40378354047257137 +0.779888710483915 +4.508833305020784 +4.927710408327659 +4.756290519524092 +4.644156904882541 +4.644156904882541 +4.644156904882541 +4.793091516779753 +4.927710408327659 +4.775931851598417 +4.644156904882541 +4.644156904882541 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.610788162315073 +4.614029116375104 +4.614029116375104 +4.614029116375104 +4.779773978458947 +4.954466537076697 +4.954466537076697 +4.778439516729418 +4.596414173087077 +4.596414173087077 +4.596414173087077 +4.596414173087077 +4.614029116375104 +4.596414173087077 +4.596414173087077 +4.594488612832219 +4.594488612832219 +4.594488612832219 +4.594488612832219 +4.722186242078544 +4.939112184045445 +4.959350993808612 +4.741226342122529 +4.612107748250274 +4.741226342122529 +4.918935027068591 +4.979648002510261 +4.979648002510261 +4.898823242738641 +4.66600971208682 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.4958606434723105 +3.4909161000893336 +3.395088895732531 +0.6229909793731245 +0.22937674882331027 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.07802269801585293 +0.5168240056601512 +0.5957551901477602 +4.499558173439981 +4.716549695711502 +4.715349243520801 +4.675060389383624 +4.6667668946905065 +4.726826567571509 +4.740587601014347 +4.740409145179578 +4.693155811682875 +4.621503709837636 +4.693155811682875 +4.753120216906312 +4.718039453057842 +4.621503709837636 +4.621503709837636 +4.621503709837636 +4.621503709837636 +4.621503709837636 +4.621503709837636 +4.646596735835308 +4.709682610573851 +4.646596735835308 +4.62739859759647 +4.6328065440738575 +4.6328065440738575 +4.6328065440738575 +4.6328065440738575 +4.6519944403256295 +4.7879891805359165 +4.81729793871423 +4.717058602539717 +4.6328065440738575 +4.6328065440738575 +4.63767970403356 +4.63767970403356 +4.656789715712701 +4.826446087163245 +4.858049419420977 +4.858049419420977 +4.76204169956615 +4.636068948163684 +4.636068948163684 +4.636068948163684 +4.636068948163684 +4.655242633831636 +4.826446087163245 +4.869137689341459 +4.872529839105629 +4.713366915704603 +4.6594318456833586 +4.6594318456833586 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.662955652364289 +4.662955652364289 +3.3739082146644273 +0.6058076208999745 +0.7084001799010258 +4.6072998258226505 +4.914400272933426 +4.7367234133223 +4.644156904882541 +4.644156904882541 +4.644156904882541 +4.851876601914066 +4.927710408327659 +4.7367234133223 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.610788162315073 +4.614029116375104 +4.647295911553856 +4.896292145056855 +4.954466537076697 +4.896292145056855 +4.647295911553856 +4.596414173087077 +4.596414173087077 +4.596414173087077 +4.596414173087077 +4.596414173087077 +4.594488612832219 +4.594488612832219 +4.594488612832219 +4.594488612832219 +4.594488612832219 +4.722186242078544 +4.939112184045445 +4.959350993808612 +4.760369351195106 +4.703261124315225 +4.878241460327516 +4.979648002510261 +4.979648002510261 +4.979648002510261 +4.837689816634932 +4.629912424656536 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.665803664493659 +4.898823242738641 +4.599546042034984 +0.5987856305456649 +0.3200076833933503 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.12563154621256611 +0.640989063402218 +3.446276131575725 +4.7039874056290465 +4.7039874056290465 +4.701532155459308 +4.657071354732281 +4.6084885702460685 +4.6084885702460685 +4.638697904486414 +4.701532155459308 +4.728275832763921 +4.740409145179578 +4.675240430792547 +4.640648550355574 +4.710759302443869 +4.740409145179578 +4.653211143840427 +4.602391518498605 +4.602391518498605 +4.602391518498605 +4.602391518498605 +4.621503709837636 +4.621503709837636 +4.621503709837636 +4.62739859759647 +4.608270933569005 +4.608270933569005 +4.608270933569005 +4.6328065440738575 +4.6328065440738575 +4.6328065440738575 +4.698038989919562 +4.804805898398897 +4.803471126886287 +4.665846151051314 +4.6328065440738575 +4.6328065440738575 +4.6328065440738575 +4.63767970403356 +4.742906577502044 +4.858049419420977 +4.858049419420977 +4.842936919059271 +4.669748954253991 +4.636068948163684 +4.636068948163684 +4.636068948163684 +4.636068948163684 +4.781042241478235 +4.858049419420977 +4.858049419420977 +4.747524644977324 +4.6594318456833586 +4.6594318456833586 +4.6594318456833586 +4.6594318456833586 +4.644156904882541 +4.644156904882541 +4.644156904882541 +4.464972813772094 +0.8092215597727375 +0.6578501295517025 +4.832343613752159 +4.914400272933426 +4.717242610399388 +4.644156904882541 +4.644156904882541 +4.662955652364289 +4.871259604324209 +4.914400272933426 +4.717242610399388 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.758924030677352 +4.954466537076697 +4.954466537076697 +4.760369351195106 +4.596414173087077 +4.596414173087077 +4.596414173087077 +4.596414173087077 +4.577072911270099 +4.577072911270099 +4.577072911270099 +4.577072911270099 +4.594488612832219 +4.594488612832219 +4.722186242078544 +4.939112184045445 +4.959350993808612 +4.839146489353515 +4.858680873820008 +4.959350993808612 +4.979648002510261 +4.979648002510261 +4.959350993808612 +4.798912023055375 +4.594488612832219 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.703261124315225 +4.939112184045445 +4.979648002510261 +4.979648002510261 +4.7370538152010715 +0.602084246074603 +0.3249276534532015 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1351645434877229 +0.6344548999581567 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8360663886365511 +0.8647921722156005 +0.9219773019841471 +0.6742277222771449 +4.4974714903326305 +4.69281418415078 +4.7039874056290465 +4.7039874056290465 +4.69066493212302 +4.6580308995468975 +4.6084885702460685 +4.6084885702460685 +4.589568314126108 +4.620162389412384 +4.697448484045342 +4.728275832763921 +4.728397247583595 +4.675240430792547 +4.621503709837636 +4.693155811682875 +4.664824166136957 +4.602391518498605 +4.602391518498605 +4.602391518498605 +4.602391518498605 +4.602391518498605 +4.602391518498605 +4.602391518498605 +4.602391518498605 +4.608270933569005 +4.608270933569005 +4.608270933569005 +4.608270933569005 +4.608270933569005 +4.613722468658828 +4.6328065440738575 +4.764423495930857 +4.81729793871423 +4.764423495930857 +4.6328065440738575 +4.613722468658828 +4.6328065440738575 +4.6328065440738575 +4.650413336097011 +4.826446087163245 +4.858049419420977 +4.858049419420977 +4.741519592641198 +4.636068948163684 +4.636068948163684 +4.636068948163684 +4.636068948163684 +4.7280566877824075 +4.858049419420977 +4.858049419420977 +4.781042241478235 +4.636068948163684 +4.6594318456833586 +4.6594318456833586 +4.640410514489808 +4.640410514489808 +4.644156904882541 +4.644156904882541 +4.644156904882541 +3.367293261192396 +3.3772367930894895 +4.851876601914066 +4.89873092730281 +4.700980573576612 +4.644156904882541 +4.644156904882541 +4.681900682162952 +4.89873092730281 +4.914400272933426 +4.681900682162952 +4.625516671887187 +4.625516671887187 +4.625516671887187 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.644156904882541 +4.890373906609362 +4.927710408327659 +4.876422053888234 +4.631839101577405 +4.596414173087077 +4.596414173087077 +4.577072911270099 +4.577072911270099 +4.577072911270099 +4.577072911270099 +4.577072911270099 +4.5598793734488305 +4.577072911270099 +4.722186242078544 +4.959350993808612 +4.979648002510261 +4.959350993808612 +4.959350993808612 +4.979648002510261 +4.979648002510261 +4.979648002510261 +4.959350993808612 +4.741226342122529 +4.577072911270099 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.542927748567686 +4.577072911270099 +4.760369351195106 +4.959350993808612 +4.979648002510261 +4.939112184045445 +4.741226342122529 +4.557739919850564 +4.455161226213266 +0.6152045717011259 +0.27346584048501477 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.14421518948927226 +1.3457522347649986 +2.0609275239094176 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.140313153331892 +2.1491682045832015 +2.173019078934724 +2.4622677624114453 +4.475400723987205 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.7039874056290465 +4.7039874056290465 +4.665703966605539 +4.601501635773668 +4.589568314126108 +4.589568314126108 +4.620162389412384 +4.697448484045342 +4.716549695711502 +4.728397247583595 +4.657071354732281 +4.602391518498605 +4.621503709837636 +4.602391518498605 +4.602391518498605 +4.602391518498605 +4.602391518498605 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.602391518498605 +4.602391518498605 +4.608270933569005 +4.608270933569005 +4.608270933569005 +4.589232276438436 +4.594758254119056 +4.665846151051314 +4.804805898398897 +4.804805898398897 +4.698038989919562 +4.613722468658828 +4.611996828309391 +4.631157180365171 +4.631157180365171 +4.715635347795578 +4.843246627448329 +4.858049419420977 +4.826446087163245 +4.655242633831636 +4.636068948163684 +4.636068948163684 +4.636068948163684 +4.669748954253991 +4.830804040442458 +4.858049419420977 +4.826446087163245 +4.655242633831636 +4.6170189734379745 +4.6170189734379745 +4.640410514489808 +4.640410514489808 +4.640410514489808 +4.640410514489808 +4.644156904882541 +4.644156904882541 +4.644156904882541 +4.851876601914066 +4.8816267976576375 +4.662955652364289 +4.625516671887187 +4.625516671887187 +4.697859544470582 +4.914400272933426 +4.89873092730281 +4.662955652364289 +4.625516671887187 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.588765585089522 +4.752475355734676 +4.927710408327659 +4.927710408327659 +4.7367234133223 +4.592816956402557 +4.57306015489294 +4.577072911270099 +4.577072911270099 +4.577072911270099 +4.577072911270099 +4.577072911270099 +4.5598793734488305 +4.5598793734488305 +4.720184565238619 +4.935591242636505 +4.954466537076697 +4.959350993808612 +4.979648002510261 +4.979648002510261 +4.979648002510261 +4.979648002510261 +4.918935027068591 +4.703261124315225 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.542927748567686 +4.612107748250274 +4.837689816634932 +4.979648002510261 +4.979648002510261 +4.898823242738641 +4.663789262481713 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.418012974920095 +0.6258535861392627 +0.26251218737318904 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8156864016436192 +2.1277860562756055 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.261761202020505 +2.289181849292641 +3.569141620103143 +4.716549695711502 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.7039874056290465 +4.6870297623887245 +4.620162389412384 +4.570624651733303 +4.589568314126108 +4.601501635773668 +4.681832734435403 +4.716549695711502 +4.715349243520801 +4.6309464772963524 +4.596137437755187 +4.602391518498605 +4.602391518498605 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.602391518498605 +4.602391518498605 +4.583333333333333 +4.589232276438436 +4.589232276438436 +4.589232276438436 +4.589232276438436 +4.728197166242947 +4.81729793871423 +4.7879891805359165 +4.644983451029647 +4.611996828309391 +4.611996828309391 +4.611996828309391 +4.650413336097011 +4.803471126886287 +4.843246627448329 +4.858049419420977 +4.722273263744934 +4.636068948163684 +4.636068948163684 +4.636068948163684 +4.655242633831636 +4.809208233051305 +4.858049419420977 +4.845146974506639 +4.669748954253991 +4.6170189734379745 +4.6170189734379745 +4.6170189734379745 +4.640410514489808 +4.640410514489808 +4.640410514489808 +4.621534006797198 +4.621534006797198 +4.644156904882541 +4.871259604324209 +4.8816267976576375 +4.644156904882541 +4.607048139239902 +4.607048139239902 +4.717242610399388 +4.927710408327659 +4.871259604324209 +4.625516671887187 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.588765585089522 +4.588765585089522 +4.644156904882541 +4.8816267976576375 +4.927710408327659 +4.871259604324209 +4.607048139239902 +4.5686346228467 +4.5686346228467 +4.57306015489294 +4.577072911270099 +4.577072911270099 +4.5598793734488305 +4.5598793734488305 +4.57306015489294 +4.739502382932878 +4.935591242636505 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.979648002510261 +4.979648002510261 +4.878780841567253 +4.647295911553856 +4.5598793734488305 +4.5598793734488305 +4.5598793734488305 +4.542927748567686 +4.542927748567686 +4.647295911553856 +4.898823242738641 +4.979648002510261 +4.979648002510261 +4.837689816634932 +4.594488612832219 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.404238805072985 +0.619900467181778 +0.39308664550572203 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7652212687958828 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2710578765326823 +2.627737733120367 +4.62793792443204 +4.676655864461275 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.701532155459308 +4.6309464772963524 +4.570624651733303 +4.5516827258491075 +4.582748243886765 +4.681832734435403 +4.716549695711502 +4.715349243520801 +4.6309464772963524 +4.577112015096106 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.564349777616885 +4.583333333333333 +4.583333333333333 +4.589232276438436 +4.589232276438436 +4.589232276438436 +4.621503709837636 +4.776441943741247 +4.81729793871423 +4.728197166242947 +4.611996828309391 +4.611996828309391 +4.611996828309391 +4.611996828309391 +4.715635347795578 +4.828385320427853 +4.828385320427853 +4.798991798454706 +4.655242633831636 +4.636068948163684 +4.636068948163684 +4.636068948163684 +4.76204169956615 +4.858049419420977 +4.858049419420977 +4.722273263744934 +4.6170189734379745 +4.6170189734379745 +4.6170189734379745 +4.6170189734379745 +4.6170189734379745 +4.621534006797198 +4.621534006797198 +4.621534006797198 +4.6594318456833586 +4.863636363636363 +4.871259604324209 +4.625516671887187 +4.607048139239902 +4.607048139239902 +4.7576395655368735 +4.927710408327659 +4.832343613752159 +4.625516671887187 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.588765585089522 +4.588765585089522 +4.588765585089522 +4.772087701603047 +4.927710408327659 +4.927710408327659 +4.717242610399388 +4.5686346228467 +4.5686346228467 +4.5686346228467 +4.5686346228467 +4.550742156987144 +4.555500056192404 +4.555500056192404 +4.57306015489294 +4.758924030677352 +4.935591242636505 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.837439329515021 +4.608817106569179 +4.5598793734488305 +4.5598793734488305 +4.542927748567686 +4.542927748567686 +4.5598793734488305 +4.722186242078544 +4.939112184045445 +4.979648002510261 +4.959350993808612 +4.760369351195106 +4.557739919850564 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.411424193686267 +3.3919459957853 +0.6318804001419362 +0.2175816996331532 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +1.4498872085464836 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2631715931889724 +2.3059535643590703 +4.396434317608271 +4.669446132552235 +4.63606643082577 +4.613200121218277 +4.6562728773757405 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69066493212302 +4.640652808961297 +4.5516827258491075 +4.5516827258491075 +4.589568314126108 +4.665703966605539 +4.716549695711502 +4.701532155459308 +4.620162389412384 +4.577112015096106 +4.577112015096106 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.583333333333333 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.583333333333333 +4.583333333333333 +4.587375226740174 +4.587375226740174 +4.670612366078799 +4.791447617972283 +4.791122580013401 +4.664297236339458 +4.611996828309391 +4.611996828309391 +4.611996828309391 +4.631157180365171 +4.7879891805359165 +4.828385320427853 +4.81729793871423 +4.683632848810578 +4.636068948163684 +4.636068948163684 +4.6170189734379745 +4.708589746017948 +4.858049419420977 +4.858049419420977 +4.781042241478235 +4.6170189734379745 +4.6170189734379745 +4.6170189734379745 +4.598107147542447 +4.598107147542447 +4.598107147542447 +4.598107147542447 +4.621534006797198 +4.655242633831636 +4.870754749200952 +4.836483399978973 +4.621534006797198 +4.607048139239902 +4.607048139239902 +4.793091516779753 +4.927710408327659 +4.793091516779753 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.607048139239902 +4.588765585089522 +4.588765585089522 +4.588765585089522 +4.588765585089522 +4.640410514489808 +4.8816267976576375 +4.927710408327659 +4.85029320494125 +4.586736589139169 +4.5686346228467 +4.5686346228467 +4.5686346228467 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.5686346228467 +4.756290519524092 +4.935591242636505 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.935591242636505 +4.778439516729418 +4.57306015489294 +4.538177492226159 +4.538177492226159 +4.542927748567686 +4.542927748567686 +4.594488612832219 +4.798912023055375 +4.959350993808612 +4.979648002510261 +4.898823242738641 +4.663789262481713 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.577072911270099 +4.818281302257531 +4.5663028571346675 +0.5995445676728011 +0.37691273948061976 +0.0576866479940108 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2018996889437794 +2.339414265862871 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2753823920420055 +2.585740560405326 +4.576767997638424 +4.6356545149438695 +4.6356545149438695 +4.620764512081358 +4.585070500657054 +4.62297914451095 +4.676655864461275 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.649129891979821 +4.570624651733303 +4.558125621117782 +4.570624651733303 +4.675060389383624 +4.716549695711502 +4.701532155459308 +4.601501635773668 +4.558125621117782 +4.577112015096106 +4.577112015096106 +4.583333333333333 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.562343756184406 +4.581430992457462 +4.581430992457462 +4.606503407028626 +4.754870956928874 +4.791447617972283 +4.754870956928874 +4.606503407028626 +4.611996828309391 +4.611996828309391 +4.611996828309391 +4.715635347795578 +4.81729793871423 +4.81729793871423 +4.764423495930857 +4.611996828309391 +4.611996828309391 +4.6170189734379745 +4.655242633831636 +4.842936919059271 +4.858049419420977 +4.826446087163245 +4.636068948163684 +4.598107147542447 +4.598107147542447 +4.598107147542447 +4.598107147542447 +4.598107147542447 +4.598107147542447 +4.5793486723868595 +4.636068948163684 +4.872529839105629 +4.818365174589247 +4.602815691902671 +4.584269967083597 +4.607048139239902 +4.832343613752159 +4.914400272933426 +4.7576395655368735 +4.588765585089522 +4.588765585089522 +4.607048139239902 +4.588765585089522 +4.588765585089522 +4.588765585089522 +4.588765585089522 +4.588765585089522 +4.588765585089522 +4.772087701603047 +4.927710408327659 +4.914400272933426 +4.697859544470582 +4.5686346228467 +4.5686346228467 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.5686346228467 +4.772087701603047 +4.927710408327659 +4.942733045803132 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.954466537076697 +4.935591242636505 +4.720184565238619 +4.555500056192404 +4.52111316046745 +4.52111316046745 +4.52111316046745 +4.52111316046745 +4.626979007463358 +4.878780841567253 +4.979648002510261 +4.979648002510261 +4.837689816634932 +4.594488612832219 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.626979007463358 +4.876999764984348 +4.979648002510261 +4.979648002510261 +4.798912023055375 +3.386350825680307 +0.40992356387120665 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.547964844339119 +2.0275402225265795 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.261761202020505 +2.288299328844452 +3.5828241956895424 +4.557095632157165 +4.5799545547395955 +4.631830491259161 +4.6356545149438695 +4.626881615898075 +4.566659309953631 +4.573804961511842 +4.64015341288857 +4.679949111556313 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.6562728773757405 +4.570624651733303 +4.53920052169893 +4.558125621117782 +4.6580308995468975 +4.716549695711502 +4.701532155459308 +4.601501635773668 +4.558125621117782 +4.558125621117782 +4.558125621117782 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.564349777616885 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.581430992457462 +4.581430992457462 +4.651613169058767 +4.778506435350758 +4.791447617972283 +4.670612366078799 +4.587375226740174 +4.611996828309391 +4.611996828309391 +4.644983451029647 +4.7879891805359165 +4.81729793871423 +4.803471126886287 +4.644983451029647 +4.611996828309391 +4.611996828309391 +4.611996828309391 +4.791416189934912 +4.858049419420977 +4.845146974506639 +4.650413336097011 +4.598107147542447 +4.598107147542447 +4.598107147542447 +4.598107147542447 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.631157180365171 +4.858049419420977 +4.781042241478235 +4.584269967083597 +4.584269967083597 +4.621534006797198 +4.863636363636363 +4.914400272933426 +4.697859544470582 +4.588765585089522 +4.570684518657339 +4.570684518657339 +4.570684518657339 +4.588765585089522 +4.588765585089522 +4.588765585089522 +4.570684518657339 +4.640410514489808 +4.89873092730281 +4.927710408327659 +4.830860339288274 +4.586736589139169 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.586736589139169 +4.791710163401963 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.942733045803132 +4.954466537076697 +4.896292145056855 +4.681900682162952 +4.538177492226159 +4.52111316046745 +4.52111316046745 +4.52111316046745 +4.538177492226159 +4.700980573576612 +4.896292145056855 +4.971271205771669 +4.939112184045445 +4.741226342122529 +4.557739919850564 +4.524135426742258 +4.524135426742258 +4.524135426742258 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.524135426742258 +4.701147711411036 +4.939112184045445 +5.0 +4.939112184045445 +4.701147711411036 +4.507737436138109 +4.491641118521462 +0.6257802964995784 +0.22147900774992912 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.983771971076508 +2.2211465944647593 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.272607966463304 +2.4627138022106574 +4.522366093046264 +4.541142071059501 +4.498352535656417 +4.541142071059501 +4.612010239449515 +4.6356545149438695 +4.626881615898075 +4.5480985287219635 +4.517290388513929 +4.585070500657054 +4.662029905531306 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.671919701176808 +4.563931631528289 +4.53920052169893 +4.558125621117782 +4.640652808961297 +4.7039874056290465 +4.701532155459308 +4.582748243886765 +4.53920052169893 +4.53920052169893 +4.53920052169893 +4.558125621117782 +4.564349777616885 +4.564349777616885 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.581430992457462 +4.710759302443869 +4.778088299149454 +4.760933045899426 +4.606503407028626 +4.587375226740174 +4.587375226740174 +4.611996828309391 +4.735962024272876 +4.81729793871423 +4.81729793871423 +4.717058602539717 +4.5929485459444255 +4.5929485459444255 +4.611996828309391 +4.717058602539717 +4.843246627448329 +4.858049419420977 +4.722273263744934 +4.598107147542447 +4.598107147542447 +4.598107147542447 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.650413336097011 +4.858049419420977 +4.76204169956615 +4.560759765732997 +4.5659124333978545 +4.636068948163684 +4.8584171544605805 +4.872529839105629 +4.6594318456833586 +4.570684518657339 +4.570684518657339 +4.570684518657339 +4.570684518657339 +4.570684518657339 +4.570684518657339 +4.5686346228467 +4.5686346228467 +4.791710163401963 +4.927710408327659 +4.914400272933426 +4.6594318456833586 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.586736589139169 +4.791710163401963 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.871259604324209 +4.62349952932741 +4.52111316046745 +4.52111316046745 +4.52111316046745 +4.52111316046745 +4.57306015489294 +4.778439516729418 +4.935591242636505 +4.954466537076697 +4.876422053888234 +4.64212860845057 +4.518950034056942 +4.524135426742258 +4.524135426742258 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.557739919850564 +4.779602577154106 +4.959350993808612 +4.979648002510261 +4.876999764984348 +4.608817106569179 +4.491641118521462 +4.491641118521462 +4.491641118521462 +3.5280397530367322 +0.6614334550488712 +0.08652970395641724 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7147544722886581 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.284853433531523 +3.434543599944379 +4.624042963856418 +4.596668132191088 +4.510643118126104 +4.498352535656417 +4.510643118126104 +4.588824563121978 +4.6356545149438695 +4.6050017018392575 +4.498352535656417 +4.498352535656417 +4.5361952346332135 +4.632170355178927 +4.666403422086015 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.676655864461275 +4.575743797536524 +4.53920052169893 +4.53920052169893 +4.62297914451095 +4.7039874056290465 +4.6870297623887245 +4.582748243886765 +4.53920052169893 +4.53920052169893 +4.53920052169893 +4.556070973540779 +4.556070973540779 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.6134253459783015 +4.752498445713435 +4.778088299149454 +4.701674543591646 +4.587375226740174 +4.587375226740174 +4.587375226740174 +4.625712366844445 +4.803471126886287 +4.81729793871423 +4.771738592817006 +4.611996828309391 +4.574028932345933 +4.574028932345933 +4.644983451029647 +4.81729793871423 +4.828385320427853 +4.773211711786747 +4.598107147542447 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.669748954253991 +4.858049419420977 +4.742906577502044 +4.560759765732997 +4.560759765732997 +4.650413336097011 +4.872529839105629 +4.8584171544605805 +4.6170189734379745 +4.5659124333978545 +4.570684518657339 +4.570684518657339 +4.570684518657339 +4.5528218213026905 +4.550742156987144 +4.550742156987144 +4.640410514489808 +4.89873092730281 +4.927710408327659 +4.811314015577191 +4.5686346228467 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.586736589139169 +4.811314015577191 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.811314015577191 +4.582193139573988 +4.51566364335077 +4.51566364335077 +4.52111316046745 +4.52111316046745 +4.626979007463358 +4.856526425443426 +4.954466537076697 +4.954466537076697 +4.798038464273939 +4.57306015489294 +4.502167670798816 +4.502167670798816 +4.502167670798816 +4.502167670798816 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.507737436138109 +4.608817106569179 +4.857210833107264 +4.979648002510261 +4.979648002510261 +4.798912023055375 +4.557739919850564 +4.491641118521462 +4.491641118521462 +4.475872421177181 +4.475872421177181 +4.475872421177181 +0.6272847219477233 +0.2018996889437794 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +1.53524057630655 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2694046972337256 +2.3917108314050513 +4.328370131613414 +4.548238985236116 +4.590893017970352 +4.553310052157446 +4.491801074676181 +4.4794059114735 +4.4794059114735 +4.552289840148148 +4.612010239449515 +4.541142071059501 +4.498352535656417 +4.498352535656417 +4.517290388513929 +4.566659309953631 +4.6562728773757405 +4.6562728773757405 +4.586925143439678 +4.671919701176808 +4.69281418415078 +4.69281418415078 +4.676655864461275 +4.594289814835405 +4.513899622194286 +4.513899622194286 +4.62297914451095 +4.7039874056290465 +4.6870297623887245 +4.582748243886765 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.556070973540779 +4.556070973540779 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.655506333614664 +4.753120216906312 +4.752498445713435 +4.6197826327175235 +4.5683462678453175 +4.5683462678453175 +4.5683462678453175 +4.719749765469021 +4.81729793871423 +4.804805898398897 +4.644983451029647 +4.574028932345933 +4.574028932345933 +4.5929485459444255 +4.7879891805359165 +4.828385320427853 +4.803471126886287 +4.611996828309391 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.5793486723868595 +4.689147125339842 +4.858049419420977 +4.723684925163794 +4.560759765732997 +4.560759765732997 +4.683632848810578 +4.858049419420977 +4.826446087163245 +4.5793486723868595 +4.5659124333978545 +4.5659124333978545 +4.5659124333978545 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.772087701603047 +4.927710408327659 +4.89873092730281 +4.640410514489808 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.60503037786163 +4.830860339288274 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.772087701603047 +4.550742156987144 +4.51566364335077 +4.51566364335077 +4.51566364335077 +4.533078274185074 +4.697859544470582 +4.916065859983668 +4.954466537076697 +4.935591242636505 +4.720184565238619 +4.536004443501877 +4.502167670798816 +4.502167670798816 +4.502167670798816 +4.485682117209293 +4.485682117209293 +4.485682117209293 +4.507737436138109 +4.491641118521462 +4.507737436138109 +4.682407358487153 +4.918935027068591 +5.0 +4.939112184045445 +4.701147711411036 +4.507737436138109 +4.491641118521462 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.475872421177181 +3.3796199770798063 +0.47265059722130953 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3172647300558147 +2.275992453631977 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.281922839209146 +3.3931182886259084 +4.491801074676181 +4.482822888508683 +4.5461808736533715 +4.577601706931892 +4.51854399108187 +4.47291585209415 +4.4794059114735 +4.4794059114735 +4.504058708979198 +4.522663592490754 +4.498352535656417 +4.498352535656417 +4.4794059114735 +4.4794059114735 +4.517290388513929 +4.523942749265335 +4.513899622194286 +4.545078913708497 +4.6562728773757405 +4.69281418415078 +4.69281418415078 +4.69066493212302 +4.594289814835405 +4.513899622194286 +4.513899622194286 +4.62297914451095 +4.7039874056290465 +4.671919701176808 +4.568639765064591 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.556070973540779 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.575166900574203 +4.723400793471849 +4.753120216906312 +4.693155811682875 +4.562343756184406 +4.562343756184406 +4.5683462678453175 +4.6390051220405155 +4.778506435350758 +4.803215945464247 +4.728197166242947 +4.574028932345933 +4.574028932345933 +4.574028932345933 +4.728197166242947 +4.81729793871423 +4.804805898398897 +4.644983451029647 +4.574028932345933 +4.574028932345933 +4.5793486723868595 +4.5793486723868595 +4.560759765732997 +4.560759765732997 +4.704414572748163 +4.858049419420977 +4.702967088304484 +4.560759765732997 +4.54235790142653 +4.702967088304484 +4.869137689341459 +4.773211711786747 +4.54235790142653 +4.547760071805961 +4.545622089347077 +4.545622089347077 +4.545622089347077 +4.545622089347077 +4.550742156987144 +4.640410514489808 +4.89873092730281 +4.914400272933426 +4.772087701603047 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.550742156987144 +4.60503037786163 +4.830860339288274 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.914400272933426 +4.717242610399388 +4.533078274185074 +4.51566364335077 +4.51566364335077 +4.51566364335077 +4.5686346228467 +4.772087701603047 +4.914400272933426 +4.927710408327659 +4.871259604324209 +4.64212860845057 +4.502167670798816 +4.502167670798816 +4.502167670798816 +4.485682117209293 +4.485682117209293 +4.485682117209293 +4.485682117209293 +4.485682117209293 +4.4695199194396285 +4.518950034056942 +4.760369351195106 +4.959350993808612 +4.979648002510261 +4.876999764984348 +4.608817106569179 +4.491641118521462 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.460458853299791 +4.460458853299791 +3.5760771683002512 +0.7121090555268879 +0.11537255792817191 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6632793931627319 +2.100723074504167 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2694046972337256 +2.3589310087373994 +4.304317460058428 +4.491801074676181 +4.463924095533918 +4.482822888508683 +4.555438561670694 +4.5853224209629255 +4.51854399108187 +4.444957838142898 +4.47291585209415 +4.4794059114735 +4.4794059114735 +4.4794059114735 +4.4794059114735 +4.4794059114735 +4.4794059114735 +4.4794059114735 +4.4794059114735 +4.504950980439749 +4.513899622194286 +4.526215661270152 +4.6236189421086475 +4.69281418415078 +4.69281418415078 +4.69066493212302 +4.586925143439678 +4.513899622194286 +4.511538266366625 +4.62297914451095 +4.7039874056290465 +4.681832734435403 +4.568639765064591 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.556070973540779 +4.556070973540779 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.562343756184406 +4.625661610351658 +4.753120216906312 +4.738328274948345 +4.594292603304557 +4.562343756184406 +4.562343756184406 +4.581430992457462 +4.74473950518707 +4.791447617972283 +4.760933045899426 +4.587375226740174 +4.555255275433425 +4.555255275433425 +4.65822873316879 +4.81729793871423 +4.81729793871423 +4.696574961377673 +4.555255275433425 +4.574028932345933 +4.574028932345933 +4.560759765732997 +4.560759765732997 +4.560759765732997 +4.723684925163794 +4.858049419420977 +4.664297236339458 +4.54235790142653 +4.54235790142653 +4.760666938236282 +4.858049419420977 +4.722273263744934 +4.5219170311723484 +4.5219170311723484 +4.5219170311723484 +4.545622089347077 +4.545622089347077 +4.545622089347077 +4.563809733321463 +4.805612835725778 +4.914400272933426 +4.8816267976576375 +4.600754584726002 +4.533078274185074 +4.533078274185074 +4.533078274185074 +4.60503037786163 +4.85029320494125 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.89873092730281 +4.678585478412721 +4.51566364335077 +4.51566364335077 +4.51566364335077 +4.51566364335077 +4.61947756989829 +4.851876601914066 +4.927710408327659 +4.927710408327659 +4.811314015577191 +4.582193139573988 +4.496296093052938 +4.502167670798816 +4.502167670798816 +4.485682117209293 +4.485682117209293 +4.485682117209293 +4.485682117209293 +4.485682117209293 +4.4695199194396285 +4.57306015489294 +4.837439329515021 +4.954466537076697 +4.954466537076697 +4.779602577154106 +4.540810622745219 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.37907711329853 +0.6436808059256123 +0.34662776824208574 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8553382116227111 +2.2211465944647593 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.261761202020505 +2.281922839209146 +3.3644985134317262 +4.479063031276068 +4.444957838142898 +4.463924095533918 +4.463924095533918 +4.457765303161006 +4.555438561670694 +4.5853224209629255 +4.500777606857457 +4.444957838142898 +4.4512597008547115 +4.454011608466691 +4.460474670771755 +4.460474670771755 +4.460474670771755 +4.4794059114735 +4.4794059114735 +4.4794059114735 +4.4794059114735 +4.504950980439749 +4.4859857040911795 +4.507366574133323 +4.632170355178927 +4.679949111556313 +4.69281418415078 +4.69066493212302 +4.603299912056848 +4.511538266366625 +4.511538266366625 +4.62297914451095 +4.7039874056290465 +4.671919701176808 +4.5495826726201285 +4.5180563038110195 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.556070973540779 +4.556070973540779 +4.562343756184406 +4.562343756184406 +4.543342744992878 +4.543342744992878 +4.562343756184406 +4.701360778922976 +4.753120216906312 +4.675240430792547 +4.543342744992878 +4.524448559224209 +4.543342744992878 +4.664824166136957 +4.778088299149454 +4.791447617972283 +4.651613169058767 +4.549435230703424 +4.555255275433425 +4.606503407028626 +4.791122580013401 +4.81729793871423 +4.764423495930857 +4.555255275433425 +4.536645877449656 +4.555255275433425 +4.555255275433425 +4.555255275433425 +4.560759765732997 +4.742906577502044 +4.858049419420977 +4.650413336097011 +4.54235790142653 +4.560759765732997 +4.791416189934912 +4.858049419420977 +4.664297236339458 +4.5219170311723484 +4.5219170311723484 +4.5219170311723484 +4.5219170311723484 +4.5219170311723484 +4.5276493769360435 +4.655242633831636 +4.872529839105629 +4.8848078319332995 +4.752475355734676 +4.533078274185074 +4.533078274185074 +4.51566364335077 +4.600754584726002 +4.851876601914066 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.927710408327659 +4.871259604324209 +4.640410514489808 +4.51566364335077 +4.51566364335077 +4.51566364335077 +4.533078274185074 +4.697859544470582 +4.89873092730281 +4.927710408327659 +4.914400272933426 +4.732895578140565 +4.530851648554603 +4.496296093052938 +4.496296093052938 +4.479437422437865 +4.479437422437865 +4.485682117209293 +4.485682117209293 +4.485682117209293 +4.4695199194396285 +4.485682117209293 +4.64212860845057 +4.896292145056855 +4.971271205771669 +4.916065859983668 +4.660902998104733 +4.485682117209293 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.475872421177181 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.404615629412585 +0.6039035554536563 +0.6252108790857731 +0.157275361322311 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +1.323460538475258 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.246067985320125 +2.2587488089142114 +2.276297393963519 +2.625116350348653 +4.451641543756818 +4.543452396465089 +4.470438427378001 +4.419975864651762 +4.444957838142898 +4.444957838142898 +4.457765303161006 +4.539257872104618 +4.5853224209629255 +4.512109974050057 +4.444957838142898 +4.425946698109906 +4.454011608466691 +4.460474670771755 +4.460474670771755 +4.460474670771755 +4.460474670771755 +4.4794059114735 +4.4794059114735 +4.460474670771755 +4.460474670771755 +4.467070908185214 +4.504950980439749 +4.614878214028467 +4.679949111556313 +4.69281418415078 +4.679949111556313 +4.603299912056848 +4.511538266366625 +4.511538266366625 +4.614878214028467 +4.69281418415078 +4.671919701176808 +4.5495826726201285 +4.5180563038110195 +4.5180563038110195 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.543342744992878 +4.543342744992878 +4.543342744992878 +4.606705029314043 +4.740409145179578 +4.738328274948345 +4.575166900574203 +4.524448559224209 +4.524448559224209 +4.575166900574203 +4.752498445713435 +4.778088299149454 +4.710759302443869 +4.549435230703424 +4.530661494648408 +4.549435230703424 +4.754870956928874 +4.81729793871423 +4.803471126886287 +4.5683462678453175 +4.536645877449656 +4.536645877449656 +4.536645877449656 +4.555255275433425 +4.555255275433425 +4.726800481807874 +4.858049419420977 +4.631157180365171 +4.54235790142653 +4.5793486723868595 +4.826446087163245 +4.858049419420977 +4.631157180365171 +4.5219170311723484 +4.5219170311723484 +4.5219170311723484 +4.5219170311723484 +4.503888260008801 +4.540163595729319 +4.79984516624872 +4.8848078319332995 +4.8584171544605805 +4.596006379601555 +4.5276493769360435 +4.51566364335077 +4.600754584726002 +4.851876601914066 +4.914400272933426 +4.914400272933426 +4.914400272933426 +4.927710408327659 +4.927710408327659 +4.832343613752159 +4.582193139573988 +4.51566364335077 +4.51566364335077 +4.51566364335077 +4.5686346228467 +4.791710163401963 +4.927710408327659 +4.927710408327659 +4.8816267976576375 +4.676464247475493 +4.513442545183218 +4.496296093052938 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.462893717258573 +4.4695199194396285 +4.518950034056942 +4.739502382932878 +4.935591242636505 +4.954466537076697 +4.837439329515021 +4.590838511188699 +4.453709526215379 +4.453709526215379 +4.453709526215379 +4.453709526215379 +4.475872421177181 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.458442905224089 +4.44343410958372 +4.44343410958372 +3.529378058130801 +0.6122074521481151 +0.5714120409432386 +0.1174721498628637 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.14421518948927226 +2.4239620560826447 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.261761202020505 +2.351362644082894 +3.4741062982310638 +4.506921741467265 +4.551801604129618 +4.550140350585692 +4.5350396637789485 +4.464612354157994 +4.419975864651762 +4.419975864651762 +4.444957838142898 +4.463924095533918 +4.529291803534183 +4.5853224209629255 +4.529291803534183 +4.438908795687557 +4.425946698109906 +4.432218388162097 +4.454011608466691 +4.460474670771755 +4.460474670771755 +4.460474670771755 +4.460474670771755 +4.460474670771755 +4.460474670771755 +4.460474670771755 +4.457759882887575 +4.4958606434723105 +4.605035406704348 +4.679949111556313 +4.679949111556313 +4.679949111556313 +4.603299912056848 +4.511538266366625 +4.530541158577594 +4.632170355178927 +4.69281418415078 +4.6562728773757405 +4.5495826726201285 +4.5180563038110195 +4.5180563038110195 +4.537026822756646 +4.537026822756646 +4.537026822756646 +4.5180563038110195 +4.5180563038110195 +4.5180563038110195 +4.537026822756646 +4.537026822756646 +4.543342744992878 +4.543342744992878 +4.684246317290842 +4.753120216906312 +4.657071354732281 +4.524448559224209 +4.524448559224209 +4.524448559224209 +4.701360778922976 +4.764851533139823 +4.752498445713435 +4.581430992457462 +4.530661494648408 +4.530661494648408 +4.683348043725572 +4.803215945464247 +4.804805898398897 +4.6390051220405155 +4.536645877449656 +4.536645877449656 +4.536645877449656 +4.518220365758712 +4.536645877449656 +4.746468359034328 +4.81729793871423 +4.606503407028626 +4.5219170311723484 +4.590812073003308 +4.830804040442458 +4.830804040442458 +4.590812073003308 +4.5219170311723484 +4.5219170311723484 +4.5219170311723484 +4.503888260008801 +4.503888260008801 +4.644983451029647 +4.858049419420977 +4.858049419420977 +4.722273263744934 +4.50991243088939 +4.50991243088939 +4.6149276187135 +4.836483399978973 +4.914400272933426 +4.914400272933426 +4.914400272933426 +4.914400272933426 +4.914400272933426 +4.791710163401963 +4.5686346228467 +4.51566364335077 +4.51566364335077 +4.51566364335077 +4.61947756989829 +4.85029320494125 +4.927710408327659 +4.927710408327659 +4.85029320494125 +4.600754584726002 +4.496296093052938 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.545622089347077 +4.815381064171493 +4.954466537076697 +4.935591242636505 +4.758924030677352 +4.518950034056942 +4.453709526215379 +4.453709526215379 +4.453709526215379 +4.453709526215379 +4.453709526215379 +4.453709526215379 +4.4382813720654735 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.460458853299791 +4.458442905224089 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +3.4943066622976655 +0.6279912341196221 +0.3486431132071166 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3749445413935266 +2.1701297193934703 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.261761202020505 +2.386513800755304 +4.360589293829445 +4.4642115202475745 +4.489080743021098 +4.530776542855393 +4.550140350585692 +4.550140350585692 +4.543029917126171 +4.47698040719883 +4.419975864651762 +4.419975864651762 +4.419975864651762 +4.425946698109906 +4.500777606857457 +4.5853224209629255 +4.555438561670694 +4.457765303161006 +4.425946698109906 +4.432218388162097 +4.432218388162097 +4.435112702591826 +4.441583484616433 +4.441583484616433 +4.441583484616433 +4.457759882887575 +4.457759882887575 +4.457759882887575 +4.457759882887575 +4.4768097253305825 +4.605035406704348 +4.679949111556313 +4.679949111556313 +4.666403422086015 +4.585070500657054 +4.511538266366625 +4.530541158577594 +4.632170355178927 +4.69281418415078 +4.6562728773757405 +4.5495826726201285 +4.5180563038110195 +4.5180563038110195 +4.5180563038110195 +4.5180563038110195 +4.5180563038110195 +4.511538266366625 +4.511538266366625 +4.511538266366625 +4.511538266366625 +4.5180563038110195 +4.524448559224209 +4.575166900574203 +4.752498445713435 +4.723400793471849 +4.556070973540779 +4.524448559224209 +4.5056825520772295 +4.606705029314043 +4.753120216906312 +4.753120216906312 +4.625661610351658 +4.5056825520772295 +4.5120455101596315 +4.6134253459783015 +4.791447617972283 +4.791447617972283 +4.6909830056250525 +4.536645877449656 +4.518220365758712 +4.518220365758712 +4.518220365758712 +4.536645877449656 +4.737508505270368 +4.81729793871423 +4.579235806949711 +4.491168995656453 +4.625712366844445 +4.858049419420977 +4.809208233051305 +4.540163595729319 +4.5219170311723484 +4.503888260008801 +4.503888260008801 +4.503888260008801 +4.540163595729319 +4.809208233051305 +4.858049419420977 +4.815335102610012 +4.553054939770582 +4.486100377805714 +4.6170189734379745 +4.842936919059271 +4.8848078319332995 +4.8848078319332995 +4.900088766172104 +4.914400272933426 +4.89873092730281 +4.732895578140565 +4.533078274185074 +4.498520697110432 +4.498520697110432 +4.513442545183218 +4.676464247475493 +4.89873092730281 +4.927710408327659 +4.927710408327659 +4.791710163401963 +4.566366390735457 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.462893717258573 +4.462893717258573 +4.479437422437865 +4.61947756989829 +4.863636363636363 +4.937237907553747 +4.890373906609362 +4.660902998104733 +4.4695199194396285 +4.453709526215379 +4.453709526215379 +4.453709526215379 +4.453709526215379 +4.453709526215379 +4.4382813720654735 +4.4382813720654735 +4.4382813720654735 +4.4382813720654735 +4.4382813720654735 +4.458442905224089 +4.458442905224089 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.338887181586376 +3.3820332600309895 +0.5334979361207957 +0.0576866479940108 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8156864016436192 +2.1277860562756055 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2631715931889724 +2.3145873379003574 +3.6047343523426454 +4.546907521378909 +4.436098794671977 +4.376515254795292 +4.461419383366735 +4.517386266312412 +4.5437863107195415 +4.550140350585692 +4.543029917126171 +4.471464242517082 +4.400988345734394 +4.400988345734394 +4.400988345734394 +4.40691351739986 +4.488865705516085 +4.571165324280725 +4.578780008896242 +4.470438427378001 +4.432218388162097 +4.432218388162097 +4.432218388162097 +4.432218388162097 +4.438734902106786 +4.438734902106786 +4.438734902106786 +4.438734902106786 +4.457759882887575 +4.457759882887575 +4.457759882887575 +4.4768097253305825 +4.5968888406633095 +4.668188175579596 +4.679949111556313 +4.666403422086015 +4.585070500657054 +4.511538266366625 +4.530541158577594 +4.632170355178927 +4.69281418415078 +4.6562728773757405 +4.530541158577594 +4.499181667202394 +4.499181667202394 +4.492597061738637 +4.492597061738637 +4.511538266366625 +4.511538266366625 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.511538266366625 +4.5180563038110195 +4.6667668946905065 +4.753120216906312 +4.61844277751581 +4.5056825520772295 +4.5056825520772295 +4.537026822756646 +4.738328274948345 +4.753120216906312 +4.684246317290842 +4.5056825520772295 +4.5056825520772295 +4.556070973540779 +4.76455519928248 +4.791447617972283 +4.74473950518707 +4.5120455101596315 +4.518220365758712 +4.518220365758712 +4.518220365758712 +4.530661494648408 +4.754870956928874 +4.804805898398897 +4.5601096139456985 +4.491168995656453 +4.651613169058767 +4.828385320427853 +4.771843886220461 +4.5219170311723484 +4.503888260008801 +4.503888260008801 +4.503888260008801 +4.503888260008801 +4.683632848810578 +4.858049419420977 +4.858049419420977 +4.702967088304484 +4.486100377805714 +4.611996828309391 +4.830804040442458 +4.869137689341459 +4.8848078319332995 +4.8848078319332995 +4.8848078319332995 +4.886289060517825 +4.693905508121947 +4.51566364335077 +4.498520697110432 +4.479437422437865 +4.513442545183218 +4.732895578140565 +4.914400272933426 +4.927710408327659 +4.914400272933426 +4.732895578140565 +4.513442545183218 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.496296093052938 +4.713366915704603 +4.914400272933426 +4.927710408327659 +4.830860339288274 +4.563809733321463 +4.446694387067265 +4.446694387067265 +4.453709526215379 +4.453709526215379 +4.453709526215379 +4.4382813720654735 +4.4382813720654735 +4.4382813720654735 +4.4382813720654735 +4.4382813720654735 +4.4382813720654735 +4.43622316904972 +4.43622316904972 +4.43622316904972 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.428834675719275 +4.428834675719275 +4.428834675719275 +3.3833587151986046 +0.5567962144720973 +0.0576866479940108 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7853909093635885 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2551629429654723 +2.2757804963122665 +3.3579079718332823 +4.528339335359163 +4.525148982297988 +4.494288594971252 +4.390394399682197 +4.376515254795292 +4.4181306177488215 +4.508785647745265 +4.530776542855393 +4.5437863107195415 +4.536738078402639 +4.483266161362857 +4.3954670809967755 +4.400988345734394 +4.400988345734394 +4.400988345734394 +4.451863182214335 +4.56377683095028 +4.5853224209629255 +4.500777606857457 +4.432218388162097 +4.413190127900948 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.438734902106786 +4.438734902106786 +4.438734902106786 +4.438734902106786 +4.457759882887575 +4.457759882887575 +4.4702902099390895 +4.6050017018392575 +4.657957256979294 +4.668188175579596 +4.666403422086015 +4.566659309953631 +4.492597061738637 +4.523942749265335 +4.64015341288857 +4.69281418415078 +4.64015341288857 +4.504950980439749 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.542937271084943 +4.726826567571509 +4.707820717224033 +4.524448559224209 +4.5056825520772295 +4.5056825520772295 +4.684246317290842 +4.753120216906312 +4.738328274948345 +4.537026822756646 +4.480426073856573 +4.499181667202394 +4.701360778922976 +4.778088299149454 +4.76455519928248 +4.562343756184406 +4.493609179742273 +4.493609179742273 +4.491168995656453 +4.528362374312382 +4.771738592817006 +4.791122580013401 +4.541058036271199 +4.491168995656453 +4.689504627805389 +4.828385320427853 +4.689504627805389 +4.497626528797163 +4.503888260008801 +4.503888260008801 +4.503888260008801 +4.553054939770582 +4.826446087163245 +4.858049419420977 +4.815335102610012 +4.553054939770582 +4.631157180365171 +4.830804040442458 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.869137689341459 +4.8584171544605805 +4.650413336097011 +4.486100377805714 +4.472934417851474 +4.479437422437865 +4.545622089347077 +4.791710163401963 +4.914400272933426 +4.914400272933426 +4.8816267976576375 +4.676464247475493 +4.496296093052938 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.545622089347077 +4.791710163401963 +4.927710408327659 +4.914400272933426 +4.732895578140565 +4.496296093052938 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.4382813720654735 +4.4382813720654735 +4.4382813720654735 +4.4382813720654735 +4.4382813720654735 +4.43622316904972 +4.43622316904972 +4.43622316904972 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.44343410958372 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.428834675719275 +3.378969527964049 +0.429071160666906 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +1.4126304624017485 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2713027004785546 +2.447931302334326 +4.4222248304064395 +4.50429677271331 +4.502694043727065 +4.507264102733968 +4.449037409925407 +4.376515254795292 +4.376515254795292 +4.385805856764533 +4.468584860676432 +4.525148982297988 +4.5437863107195415 +4.550140350585692 +4.494381313041727 +4.414356383420913 +4.381968556808062 +4.400988345734394 +4.400988345734394 +4.433162067515307 +4.5227409419139475 +4.577601706931892 +4.529291803534183 +4.423232002361576 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.432218388162097 +4.438734902106786 +4.438734902106786 +4.438734902106786 +4.457759882887575 +4.457759882887575 +4.489286681787702 +4.596668132191088 +4.6356545149438695 +4.657957256979294 +4.666403422086015 +4.555042161857102 +4.492597061738637 +4.504950980439749 +4.64015341288857 +4.69281418415078 +4.632170355178927 +4.504950980439749 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.4737411954365065 +4.4737411954365065 +4.492597061738637 +4.492597061738637 +4.492597061738637 +4.4737411954365065 +4.4737411954365065 +4.640652808961297 +4.728397247583595 +4.587688824985208 +4.5056825520772295 +4.487067282446096 +4.606705029314043 +4.753120216906312 +4.753120216906312 +4.599691079805129 +4.480426073856573 +4.480426073856573 +4.637060067730612 +4.764851533139823 +4.764851533139823 +4.625661610351658 +4.491168995656453 +4.491168995656453 +4.4728501212062755 +4.5096810473294475 +4.776441943741247 +4.791122580013401 +4.541058036271199 +4.491168995656453 +4.736141839889283 +4.81729793871423 +4.6197826327175235 +4.491168995656453 +4.491168995656453 +4.497626528797163 +4.503888260008801 +4.702967088304484 +4.858049419420977 +4.858049419420977 +4.702967088304484 +4.625712366844445 +4.830804040442458 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.815335102610012 +4.609875135493686 +4.486100377805714 +4.466207198930331 +4.466207198930331 +4.596006379601555 +4.84508484236849 +4.914400272933426 +4.914400272933426 +4.863636363636363 +4.600754584726002 +4.462893717258573 +4.462893717258573 +4.479437422437865 +4.479437422437865 +4.479437422437865 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.600754584726002 +4.863636363636363 +4.937237907553747 +4.8816267976576375 +4.638346592840813 +4.462893717258573 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.4382813720654735 +4.4382813720654735 +4.43622316904972 +4.43622316904972 +4.43622316904972 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.44343410958372 +0.6273720592915861 +0.2118577334327867 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.17305757537147226 +2.3625603076388697 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.282467208230445 +3.4027943810712595 +4.510525149050915 +4.4811159587853355 +4.475186563700894 +4.475186563700894 +4.452791429304008 +4.371568891778836 +4.3575227068135405 +4.3575227068135405 +4.371568891778836 +4.444561438329306 +4.517386266312412 +4.525148982297988 +4.5437863107195415 +4.520058934112502 +4.409131784852644 +4.381968556808062 +4.400988345734394 +4.400988345734394 +4.398150499106636 +4.488817061485224 +4.577601706931892 +4.555438561670694 +4.436280735418166 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.432218388162097 +4.438734902106786 +4.438734902106786 +4.438734902106786 +4.438734902106786 +4.489286681787702 +4.596668132191088 +4.6356545149438695 +4.648026828159334 +4.641235384144579 +4.5361952346332135 +4.4737411954365065 +4.504950980439749 +4.6562728773757405 +4.69281418415078 +4.632170355178927 +4.511538266366625 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.523942749265335 +4.715349243520801 +4.649129891979821 +4.4737411954365065 +4.487067282446096 +4.5180563038110195 +4.738328274948345 +4.753120216906312 +4.684246317290842 +4.480426073856573 +4.461814107815801 +4.5495826726201285 +4.740409145179578 +4.764851533139823 +4.68279634091494 +4.466027460208323 +4.466027460208323 +4.4728501212062755 +4.5221019260413 +4.776441943741247 +4.791122580013401 +4.541058036271199 +4.5096810473294475 +4.771738592817006 +4.804805898398897 +4.562343756184406 +4.4728501212062755 +4.4728501212062755 +4.4728501212062755 +4.5601096139456985 +4.817997864018289 +4.858049419420977 +4.830804040442458 +4.715635347795578 +4.830804040442458 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.809208233051305 +4.571866014030257 +4.466207198930331 +4.466207198930331 +4.466207198930331 +4.629037720920056 +4.8584171544605805 +4.8848078319332995 +4.8848078319332995 +4.8247089911549255 +4.563809733321463 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.462893717258573 +4.479437422437865 +4.693905508121947 +4.89873092730281 +4.927710408327659 +4.811314015577191 +4.545622089347077 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.428749545454485 +4.43622316904972 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.428834675719275 +4.473829599409482 +4.701147711411036 +4.502155701517554 +0.6574162717298266 +0.08652970395641724 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4997231449871 +2.070157598741992 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2713027004785546 +2.3818535613912446 +4.399964825372442 +4.4811159587853355 +4.392755007565345 +4.3484375085333085 +4.3484375085333085 +4.363331244589073 +4.3484375085333085 +4.352675830528556 +4.3575227068135405 +4.3575227068135405 +4.3575227068135405 +4.4181306177488215 +4.494288594971252 +4.525148982297988 +4.525148982297988 +4.514260294902164 +4.427760861995566 +4.378997457380755 +4.359813865820298 +4.378997457380755 +4.378997457380755 +4.456666181535883 +4.563841360958199 +4.55656589041628 +4.467971184044464 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.438734902106786 +4.438734902106786 +4.438734902106786 +4.438734902106786 +4.489286681787702 +4.612010239449515 +4.6356545149438695 +4.6356545149438695 +4.626881615898075 +4.508225910052507 +4.4737411954365065 +4.504950980439749 +4.646852001521213 +4.69281418415078 +4.614878214028467 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.4549954627911825 +4.4549954627911825 +4.4549954627911825 +4.4737411954365065 +4.4737411954365065 +4.632170355178927 +4.69066493212302 +4.523942749265335 +4.4549954627911825 +4.4737411954365065 +4.684246317290842 +4.753120216906312 +4.723400793471849 +4.511538266366625 +4.461814107815801 +4.492597061738637 +4.723400793471849 +4.753120216906312 +4.723400793471849 +4.490078739911335 +4.459144616499847 +4.466027460208323 +4.5221019260413 +4.76455519928248 +4.776441943741247 +4.50326278167366 +4.5221019260413 +4.791122580013401 +4.776441943741247 +4.5096810473294475 +4.454750524866162 +4.454750524866162 +4.4728501212062755 +4.726800481807874 +4.828385320427853 +4.843246627448329 +4.845146974506639 +4.845146974506639 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.790040730028535 +4.553054939770582 +4.466207198930331 +4.448945275450309 +4.466207198930331 +4.642838795116047 +4.845146974506639 +4.869137689341459 +4.8848078319332995 +4.779665527134621 +4.5219170311723484 +4.456023364726183 +4.456023364726183 +4.462893717258573 +4.462893717258573 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.513442545183218 +4.772087701603047 +4.927710408327659 +4.914400272933426 +4.713366915704603 +4.496296093052938 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.428749545454485 +4.428749545454485 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.421219991103758 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.428834675719275 +4.4146775790370745 +4.428834675719275 +4.570838353420942 +4.8177095132940115 +4.837439329515021 +4.553307770795782 +0.6091995609230558 +0.26944739896229564 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.9379779678995588 +2.2037404301444643 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.279953976130526 +3.3145332729137547 +4.486334337166265 +4.397344032056895 +4.30404791709206 +4.329641292087303 +4.329641292087303 +4.329641292087303 +4.329641292087303 +4.329641292087303 +4.329641292087303 +4.333736762813394 +4.3575227068135405 +4.3575227068135405 +4.385805856764533 +4.468584860676432 +4.525148982297988 +4.525148982297988 +4.522944253053671 +4.436098794671977 +4.359813865820298 +4.359813865820298 +4.359813865820298 +4.378997457380755 +4.425150693598551 +4.541557094775911 +4.563841360958199 +4.500777606857457 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.432218388162097 +4.438734902106786 +4.438734902106786 +4.438734902106786 +4.482822888508683 +4.61789817246824 +4.6356545149438695 +4.6356545149438695 +4.612010239449515 +4.489286681787702 +4.445436290543406 +4.517290388513929 +4.662029905531306 +4.679949111556313 +4.586925143439678 +4.4737411954365065 +4.4737411954365065 +4.4737411954365065 +4.4549954627911825 +4.4549954627911825 +4.4549954627911825 +4.4549954627911825 +4.4549954627911825 +4.436386382000617 +4.517290388513929 +4.7039874056290465 +4.603299912056848 +4.4549954627911825 +4.4549954627911825 +4.592455460018538 +4.740587601014347 +4.753120216906312 +4.561910464792896 +4.459144616499847 +4.459144616499847 +4.68279634091494 +4.753120216906312 +4.740409145179578 +4.528188482906883 +4.440585238149008 +4.440585238149008 +4.530541158577594 +4.7657943350103915 +4.76455519928248 +4.50326278167366 +4.543342744992878 +4.791447617972283 +4.754870956928874 +4.4728501212062755 +4.454750524866162 +4.454750524866162 +4.562343756184406 +4.804805898398897 +4.81729793871423 +4.828385320427853 +4.828385320427853 +4.843246627448329 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.790040730028535 +4.553054939770582 +4.466207198930331 +4.448945275450309 +4.466207198930331 +4.683632848810578 +4.845146974506639 +4.858049419420977 +4.858049419420977 +4.734569035115524 +4.501557647515675 +4.448945275450309 +4.448945275450309 +4.448945275450309 +4.456023364726183 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.563809733321463 +4.830860339288274 +4.927710408327659 +4.863636363636363 +4.61947756989829 +4.462893717258573 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.428749545454485 +4.428749545454485 +4.428749545454485 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.421219991103758 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.421219991103758 +4.518950034056942 +4.741226342122529 +4.876999764984348 +4.682407358487153 +4.458442905224089 +4.4146775790370745 +3.4692915679189387 +0.6531094688252859 +0.08652970395641724 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7319868307667621 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2631715931889724 +2.2982112549716662 +3.5917018858924608 +4.479107382974854 +4.352495164972238 +4.30404791709206 +4.30404791709206 +4.310799240903608 +4.310799240903608 +4.310799240903608 +4.310799240903608 +4.329641292087303 +4.329641292087303 +4.329641292087303 +4.352675830528556 +4.3575227068135405 +4.3575227068135405 +4.444561438329306 +4.517386266312412 +4.525148982297988 +4.517386266312412 +4.466270560425329 +4.373530165834871 +4.359813865820298 +4.359813865820298 +4.359813865820298 +4.392622212842695 +4.51043569835049 +4.556828312189985 +4.5227409419139475 +4.423232002361576 +4.410395058116138 +4.410395058116138 +4.410395058116138 +4.404092515167324 +4.404092515167324 +4.404092515167324 +4.413190127900948 +4.419759557158932 +4.419759557158932 +4.482822888508683 +4.61789817246824 +4.6356545149438695 +4.6356545149438695 +4.596668132191088 +4.4512597008547115 +4.426558867884018 +4.510643118126104 +4.662029905531306 +4.666403422086015 +4.566659309953631 +4.4549954627911825 +4.448231417474431 +4.448231417474431 +4.448231417474431 +4.448231417474431 +4.436386382000617 +4.436386382000617 +4.436386382000617 +4.4549954627911825 +4.649129891979821 +4.6870297623887245 +4.467070908185214 +4.436386382000617 +4.504950980439749 +4.715349243520801 +4.728275832763921 +4.639065325410318 +4.459144616499847 +4.459144616499847 +4.599691079805129 +4.753120216906312 +4.753120216906312 +4.61844277751581 +4.440585238149008 +4.4222123806403095 +4.530541158577594 +4.753120216906312 +4.752498445713435 +4.50326278167366 +4.594292603304557 +4.791447617972283 +4.681857368058384 +4.454750524866162 +4.454750524866162 +4.4728501212062755 +4.736141839889283 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.828385320427853 +4.828385320427853 +4.843246627448329 +4.798991798454706 +4.553054939770582 +4.448945275450309 +4.448945275450309 +4.4837541190057655 +4.702967088304484 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.715635347795578 +4.466207198930331 +4.431999507328421 +4.431999507328421 +4.431999507328421 +4.431999507328421 +4.431999507328421 +4.431999507328421 +4.439444868219007 +4.462893717258573 +4.61947756989829 +4.863636363636363 +4.914400272933426 +4.791710163401963 +4.525371911714212 +4.446694387067265 +4.446694387067265 +4.446694387067265 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.428749545454485 +4.428749545454485 +4.428749545454485 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.398374751597663 +4.398374751597663 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.406660686573971 +4.421219991103758 +4.530851648554603 +4.739502382932878 +4.83515291684364 +4.6988319217883205 +4.502167670798816 +4.4146775790370745 +4.400997163296846 +4.400997163296846 +4.400997163296846 +0.6749169490212346 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +1.5018024598202486 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2784211733545465 +2.4353146260153733 +4.410795463131559 +4.469240513255388 +4.446713403848863 +4.423669530065321 +4.399300493996371 +4.352495164972238 +4.320286335236165 +4.307610503395612 +4.310799240903608 +4.310799240903608 +4.310799240903608 +4.329641292087303 +4.329641292087303 +4.326291782126872 +4.330384925242272 +4.335175833863043 +4.397327275394801 +4.494288594971252 +4.525148982297988 +4.525148982297988 +4.473136969025019 +4.373530165834871 +4.359813865820298 +4.359813865820298 +4.359813865820298 +4.378997457380755 +4.488817061485224 +4.556828312189985 +4.526170400590518 +4.436280735418166 +4.404092515167324 +4.404092515167324 +4.404092515167324 +4.404092515167324 +4.38493610611664 +4.38493610611664 +4.38493610611664 +4.413190127900948 +4.419759557158932 +4.495160837080689 +4.609159296939702 +4.6356545149438695 +4.6356545149438695 +4.572275762357333 +4.432218388162097 +4.419759557158932 +4.520325741539703 +4.666403422086015 +4.676655864461275 +4.510643118126104 +4.448231417474431 +4.448231417474431 +4.429493503939355 +4.429493503939355 +4.429493503939355 +4.429493503939355 +4.436386382000617 +4.436386382000617 +4.5361952346332135 +4.69066493212302 +4.555042161857102 +4.436386382000617 +4.452260404566443 +4.665703966605539 +4.728275832763921 +4.6870297623887245 +4.471121086488168 +4.440585238149008 +4.530541158577594 +4.740409145179578 +4.753120216906312 +4.68279634091494 +4.440585238149008 +4.4222123806403095 +4.542937271084943 +4.753120216906312 +4.738328274948345 +4.471121086488168 +4.637060067730612 +4.778088299149454 +4.6134253459783015 +4.436898945829201 +4.436898945829201 +4.575166900574203 +4.804805898398897 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.828385320427853 +4.803471126886287 +4.5601096139456985 +4.441702784036244 +4.448945275450309 +4.477153658161236 +4.715635347795578 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.702967088304484 +4.466207198930331 +4.431999507328421 +4.431999507328421 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.431999507328421 +4.431999507328421 +4.466207198930331 +4.689147125339842 +4.886289060517825 +4.89873092730281 +4.691713156761751 +4.462893717258573 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.43087120951505 +4.428749545454485 +4.428749545454485 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.406660686573971 +4.406660686573971 +4.43622316904972 +4.584430242518478 +4.778439516729418 +4.854921788484206 +4.6988319217883205 +4.485682117209293 +4.392582383584325 +4.392582383584325 +4.392582383584325 +4.392582383584325 +4.400997163296846 +4.400997163296846 +0.6083917582347658 +0.2982551909566391 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2884240647434684 +2.309585675329008 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.285534687051869 +2.689922886317072 +4.450262109391606 +4.40784695706688 +4.421815150590111 +4.446713403848863 +4.459631613915226 +4.4569885833271 +4.446713403848863 +4.423669530065321 +4.383033768797953 +4.357406875112937 +4.326251880415926 +4.310799240903608 +4.307217958916177 +4.326291782126872 +4.326291782126872 +4.330384925242272 +4.335175833863043 +4.364189297235156 +4.473136969025019 +4.525148982297988 +4.525148982297988 +4.484110954809075 +4.382978344722738 +4.359813865820298 +4.359813865820298 +4.359813865820298 +4.378997457380755 +4.456666181535883 +4.543029917126171 +4.549643730922412 +4.44374109684803 +4.404092515167324 +4.38493610611664 +4.38493610611664 +4.38493610611664 +4.38493610611664 +4.38493610611664 +4.38493610611664 +4.394200048734022 +4.413190127900948 +4.507121697681015 +4.6226847512678555 +4.6226847512678555 +4.6356545149438695 +4.555394310843138 +4.413190127900948 +4.394200048734022 +4.531977395390569 +4.655021703819993 +4.662029905531306 +4.498352535656417 +4.429493503939355 +4.429493503939355 +4.429493503939355 +4.429493503939355 +4.429493503939355 +4.429493503939355 +4.410885543205496 +4.448231417474431 +4.6562728773757405 +4.649129891979821 +4.43352271756291 +4.43352271756291 +4.585070500657054 +4.716549695711502 +4.7039874056290465 +4.498352535656417 +4.414936323624801 +4.471121086488168 +4.723400793471849 +4.753120216906312 +4.738328274948345 +4.471121086488168 +4.4040579122219 +4.561910464792896 +4.753120216906312 +4.723400793471849 +4.471121086488168 +4.68279634091494 +4.764851533139823 +4.5495826726201285 +4.436898945829201 +4.4728501212062755 +4.7266202754230955 +4.791447617972283 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.791122580013401 +4.579235806949711 +4.434345980166132 +4.434345980166132 +4.470390948165326 +4.715635347795578 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.683632848810578 +4.448945275450309 +4.431999507328421 +4.431999507328421 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.477153658161236 +4.741519592641198 +4.8848078319332995 +4.825007070248277 +4.596006379601555 +4.446694387067265 +4.43087120951505 +4.415458429428599 +4.415458429428599 +4.415458429428599 +4.415458429428599 +4.43087120951505 +4.43087120951505 +4.428749545454485 +4.428749545454485 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.444561438329306 +4.61947756989829 +4.815381064171493 +4.854921788484206 +4.6798086704348965 +4.485682117209293 +4.392582383584325 +4.392582383584325 +4.392582383584325 +4.392582383584325 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.400997163296846 +3.3674002013493793 +0.5008125698600319 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6056272403357319 +2.0609275239094176 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.261761202020505 +2.290883657014198 +3.4939392236770477 +4.32539149528159 +4.264296363513784 +4.281840839476571 +4.3001441763240145 +4.334884057988927 +4.3666145665109415 +4.410905534283973 +4.449201693600046 +4.469240513255388 +4.459631613915226 +4.451997806809301 +4.401672253049345 +4.3571598314339965 +4.326291782126872 +4.326291782126872 +4.330384925242272 +4.311196423331765 +4.311196423331765 +4.3495197621021 +4.444561438329306 +4.517386266312412 +4.525148982297988 +4.508785647745265 +4.415569757481522 +4.354374661974843 +4.359813865820298 +4.359813865820298 +4.359813865820298 +4.425150693598551 +4.5350396637789485 +4.556828312189985 +4.456666181535883 +4.378997457380755 +4.38493610611664 +4.38493610611664 +4.38493610611664 +4.38493610611664 +4.38493610611664 +4.365786209121337 +4.365786209121337 +4.394200048734022 +4.536064520713749 +4.6226847512678555 +4.6226847512678555 +4.6226847512678555 +4.525181448575433 +4.394200048734022 +4.394200048734022 +4.543017950066887 +4.657957256979294 +4.629658286286764 +4.460474670771755 +4.410885543205496 +4.410885543205496 +4.429493503939355 +4.410885543205496 +4.410885543205496 +4.410885543205496 +4.410885543205496 +4.566659309953631 +4.69066493212302 +4.4958606434723105 +4.396532494921532 +4.498352535656417 +4.7039874056290465 +4.716549695711502 +4.585070500657054 +4.414936323624801 +4.414936323624801 +4.630575200519795 +4.740587601014347 +4.753120216906312 +4.542937271084943 +4.4040579122219 +4.599691079805129 +4.753120216906312 +4.723400793471849 +4.490078739911335 +4.707820717224033 +4.738328274948345 +4.471121086488168 +4.4222123806403095 +4.587688824985208 +4.791447617972283 +4.791447617972283 +4.791447617972283 +4.791447617972283 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.6134253459783015 +4.416711565401162 +4.416711565401162 +4.452249827320327 +4.700234519030086 +4.828385320427853 +4.843246627448329 +4.858049419420977 +4.683632848810578 +4.448945275450309 +4.431999507328421 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.515914357186943 +4.790040730028535 +4.858049419420977 +4.771843886220461 +4.513544964723501 +4.415404133782649 +4.415404133782649 +4.407420763274691 +4.415458429428599 +4.415458429428599 +4.415458429428599 +4.415458429428599 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.472934417851474 +4.6531316008140005 +4.8435411247040765 +4.861973736884253 +4.672383068369923 +4.479437422437865 +4.392582383584325 +4.392582383584325 +4.392582383584325 +4.392582383584325 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +3.4282319347219907 +0.6264258425886293 +0.11537255792817191 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8867520004449787 +2.212645948944707 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2694046972337256 +2.317851220971419 +4.278262000226555 +4.270244682654806 +4.245536640869366 +4.245536640869366 +4.264296363513784 +4.264296363513784 +4.264296363513784 +4.285099320258072 +4.301694080793463 +4.351015773715063 +4.410905534283973 +4.459631613915226 +4.469240513255388 +4.462569174514068 +4.433425791049117 +4.3571598314339965 +4.307217958916177 +4.307217958916177 +4.307217958916177 +4.307217958916177 +4.330384925242272 +4.397327275394801 +4.508785647745265 +4.525148982297988 +4.517386266312412 +4.444561438329306 +4.3495197621021 +4.340622378893027 +4.359813865820298 +4.359813865820298 +4.392622212842695 +4.494381313041727 +4.550140350585692 +4.483266161362857 +4.378997457380755 +4.38493610611664 +4.38493610611664 +4.365786209121337 +4.365786209121337 +4.365786209121337 +4.365786209121337 +4.365786209121337 +4.38493610611664 +4.5461808736533715 +4.6226847512678555 +4.6226847512678555 +4.609159296939702 +4.476523478952587 +4.394200048734022 +4.394200048734022 +4.553310052157446 +4.657957256979294 +4.5968888406633095 +4.422758162648074 +4.410885543205496 +4.410885543205496 +4.410885543205496 +4.410885543205496 +4.407795588564746 +4.407795588564746 +4.457759882887575 +4.676655864461275 +4.595530339448932 +4.396532494921532 +4.426558867884018 +4.671919701176808 +4.716549695711502 +4.647609269642364 +4.414936323624801 +4.396532494921532 +4.5480985287219635 +4.716549695711502 +4.740587601014347 +4.629292470505407 +4.4222123806403095 +4.610962775441974 +4.753120216906312 +4.723400793471849 +4.521563209510312 +4.726826567571509 +4.69168036796802 +4.4222123806403095 +4.43352271756291 +4.707820717224033 +4.764851533139823 +4.778088299149454 +4.791447617972283 +4.791447617972283 +4.791447617972283 +4.803215945464247 +4.681857368058384 +4.434345980166132 +4.416711565401162 +4.434345980166132 +4.670612366078799 +4.81729793871423 +4.828385320427853 +4.828385320427853 +4.670612366078799 +4.441702784036244 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.566145733226151 +4.815335102610012 +4.858049419420977 +4.683632848810578 +4.441702784036244 +4.399196764220106 +4.399196764220106 +4.399196764220106 +4.399196764220106 +4.399196764220106 +4.407420763274691 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.413341468587734 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.413341468587734 +4.507635367903604 +4.711105210397496 +4.863636363636363 +4.8435411247040765 +4.6531316008140005 +4.456023364726183 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.392582383584325 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +3.5115871017929865 +0.6938078777165453 +0.11537255792817191 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6968440009240968 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2784211733545465 +2.3775439650252617 +4.307047904522636 +4.320220278148516 +4.2636275033140185 +4.245120702397898 +4.245536640869366 +4.245536640869366 +4.264296363513784 +4.2610653657318265 +4.2610653657318265 +4.282040951144447 +4.282040951144447 +4.332417955746382 +4.410905534283973 +4.4569885833271 +4.4812489691440245 +4.477376686372026 +4.417615488324072 +4.3229225252815375 +4.288085371739211 +4.307217958916177 +4.307217958916177 +4.311196423331765 +4.364189297235156 +4.468584860676432 +4.525148982297988 +4.525148982297988 +4.468584860676432 +4.364189297235156 +4.335175833863043 +4.359813865820298 +4.340622378893027 +4.354374661974843 +4.466270560425329 +4.556828312189985 +4.51043569835049 +4.392622212842695 +4.340622378893027 +4.365786209121337 +4.365786209121337 +4.365786209121337 +4.365786209121337 +4.365786209121337 +4.346668293077823 +4.378997457380755 +4.548365008356385 +4.613831733428877 +4.6226847512678555 +4.603447413405319 +4.438908795687557 +4.375275290067039 +4.387882008174667 +4.564743476593351 +4.6356545149438695 +4.560601716090231 +4.400860279484698 +4.392438741386968 +4.392438741386968 +4.389176702879745 +4.407795588564746 +4.407795588564746 +4.407795588564746 +4.622018630619488 +4.676655864461275 +4.438734902106786 +4.396532494921532 +4.613200121218277 +4.7039874056290465 +4.6870297623887245 +4.445436290543406 +4.396532494921532 +4.4794059114735 +4.7039874056290465 +4.728275832763921 +4.671919701176808 +4.43352271756291 +4.647405571565398 +4.753120216906312 +4.723400793471849 +4.599691079805129 +4.753120216906312 +4.629292470505407 +4.386157512505592 +4.561910464792896 +4.753120216906312 +4.753120216906312 +4.764851533139823 +4.778088299149454 +4.791447617972283 +4.791447617972283 +4.7266202754230955 +4.447681790841982 +4.399382355503642 +4.434345980166132 +4.651613169058767 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.670612366078799 +4.434345980166132 +4.399382355503642 +4.399382355503642 +4.407435659740823 +4.415404133782649 +4.415404133782649 +4.415404133782649 +4.431999507328421 +4.623573129679567 +4.845146974506639 +4.830804040442458 +4.60435607626104 +4.415404133782649 +4.399196764220106 +4.399196764220106 +4.399196764220106 +4.383418627512178 +4.399196764220106 +4.399196764220106 +4.396937585433321 +4.396937585433321 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.413341468587734 +4.413341468587734 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.413341468587734 +4.543333818464117 +4.752475355734676 +4.8816267976576375 +4.8435411247040765 +4.638346592840813 +4.446694387067265 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.3699279208395785 +4.3699279208395785 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.379023901017087 +4.366025403784438 +3.5211359667324267 +0.7189354001027244 +0.14421518948927226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.11537255792817191 +2.441092972129308 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2840022396629216 +2.4620011054082873 +4.401911550094636 +4.427387113724792 +4.405171091330308 +4.392926544974844 +4.350020346986972 +4.299316960299984 +4.242051477075593 +4.242051477075593 +4.2610653657318265 +4.2610653657318265 +4.282040951144447 +4.262824069542097 +4.282040951144447 +4.34870927122472 +4.435608687796262 +4.469240513255388 +4.484738687018837 +4.4538393189583685 +4.372425188923552 +4.307217958916177 +4.288085371739211 +4.307217958916177 +4.307217958916177 +4.326291782126872 +4.444561438329306 +4.517386266312412 +4.525148982297988 +4.508785647745265 +4.411424193686267 +4.330384925242272 +4.315955355853033 +4.340622378893027 +4.340622378893027 +4.4201516731586725 +4.543029917126171 +4.520058934112502 +4.406415144859124 +4.340622378893027 +4.340622378893027 +4.365786209121337 +4.365786209121337 +4.346668293077823 +4.346668293077823 +4.346668293077823 +4.392622212842695 +4.55656589041628 +4.612171103755108 +4.613831733428877 +4.580899023800447 +4.40691351739986 +4.356445868152194 +4.40691351739986 +4.596668132191088 +4.6356545149438695 +4.488865705516085 +4.37907711329853 +4.37907711329853 +4.389176702879745 +4.389176702879745 +4.389176702879745 +4.389176702879745 +4.510643118126104 +4.69281418415078 +4.541142071059501 +4.370736046613753 +4.529417439370056 +4.7039874056290465 +4.7039874056290465 +4.498352535656417 +4.378346264281328 +4.426558867884018 +4.6870297623887245 +4.716549695711502 +4.7039874056290465 +4.4794059114735 +4.647609269642364 +4.740587601014347 +4.7124748910849155 +4.665257422053748 +4.753120216906312 +4.542937271084943 +4.43352271756291 +4.707820717224033 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.764851533139823 +4.752498445713435 +4.515666105800818 +4.391307496138788 +4.391307496138788 +4.594292603304557 +4.804805898398897 +4.81729793871423 +4.81729793871423 +4.681857368058384 +4.434345980166132 +4.399382355503642 +4.399382355503642 +4.399382355503642 +4.399382355503642 +4.407435659740823 +4.415404133782649 +4.431999507328421 +4.683632848810578 +4.858049419420977 +4.798991798454706 +4.553054939770582 +4.399196764220106 +4.399196764220106 +4.399196764220106 +4.383418627512178 +4.383418627512178 +4.383418627512178 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.389854478801128 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.398374751597663 +4.428749545454485 +4.5798483268413275 +4.791710163401963 +4.89873092730281 +4.830860339288274 +4.61947756989829 +4.444561438329306 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.379023901017087 +4.379023901017087 +4.366025403784438 +4.3643454850561305 +4.421219991103758 +3.4706170303476207 +0.6960283355891734 +0.17305757537147226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3461049112705705 +2.2060580360966195 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.2868502656880048 +2.514473207177659 +4.392918295777071 +4.405239266363791 +4.416720743103606 +4.416380983659691 +4.427387113724792 +4.417257564352126 +4.364961598717095 +4.260323041219008 +4.242051477075593 +4.2610653657318265 +4.2610653657318265 +4.2610653657318265 +4.262824069542097 +4.262824069542097 +4.280004643579906 +4.383033768797953 +4.446713403848863 +4.469240513255388 +4.4756868585019225 +4.417615488324072 +4.3229225252815375 +4.288085371739211 +4.307217958916177 +4.307217958916177 +4.307217958916177 +4.393499185832402 +4.503640969757196 +4.525148982297988 +4.517386266312412 +4.440466121467073 +4.326291782126872 +4.311196423331765 +4.315955355853033 +4.321447907498091 +4.387552875155975 +4.520058934112502 +4.528831176637506 +4.4201516731586725 +4.340622378893027 +4.321447907498091 +4.340622378893027 +4.346668293077823 +4.346668293077823 +4.346668293077823 +4.346668293077823 +4.425150693598551 +4.571165324280725 +4.590893017970352 +4.613831733428877 +4.548238985236116 +4.375275290067039 +4.356445868152194 +4.419975864651762 +4.603447413405319 +4.61789817246824 +4.438908795687557 +4.353245755530516 +4.37907711329853 +4.370736046613753 +4.370736046613753 +4.370736046613753 +4.419759557158932 +4.662029905531306 +4.646852001521213 +4.407795588564746 +4.438734902106786 +4.69066493212302 +4.69281418415078 +4.577606921643228 +4.378346264281328 +4.378346264281328 +4.630575200519795 +4.716549695711502 +4.716549695711502 +4.577606921643228 +4.671919701176808 +4.728275832763921 +4.728397247583595 +4.726826567571509 +4.740409145179578 +4.471121086488168 +4.580837290747216 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.592455460018538 +4.383281307781995 +4.373948985744692 +4.534705884651435 +4.76455519928248 +4.791447617972283 +4.803215945464247 +4.700234519030086 +4.434345980166132 +4.3823982849430525 +4.3823982849430525 +4.3823982849430525 +4.3823982849430525 +4.3823982849430525 +4.3823982849430525 +4.452249827320327 +4.708250006998824 +4.858049419420977 +4.771843886220461 +4.497626528797163 +4.399196764220106 +4.399196764220106 +4.383418627512178 +4.383418627512178 +4.383418627512178 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.389854478801128 +4.398374751597663 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.439444868219007 +4.61947756989829 +4.830860339288274 +4.914400272933426 +4.811314015577191 +4.600754584726002 +4.428749545454485 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.356534660165839 +4.383889436681951 +4.460742127895382 +4.64212860845057 +4.83515291684364 +3.4115167815690217 +0.6398616579338059 +0.17305757537147226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7683375209644598 +2.100723074504167 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.290474143048891 +2.5242140993240794 +4.23457751825265 +4.230127781115758 +4.263553618346956 +4.282391253599503 +4.300525727563219 +4.316206246689385 +4.316206246689385 +4.260543986543284 +4.242051477075593 +4.242051477075593 +4.242051477075593 +4.2610653657318265 +4.2610653657318265 +4.2610653657318265 +4.262824069542097 +4.262824069542097 +4.315731378555368 +4.410905534283973 +4.459631613915226 +4.469240513255388 +4.451997806809301 +4.372425188923552 +4.288085371739211 +4.288085371739211 +4.307217958916177 +4.288085371739211 +4.3387810056485865 +4.464382022636259 +4.525148982297988 +4.525148982297988 +4.479478935593598 +4.360377731938609 +4.291975060537432 +4.291975060537432 +4.296737346030571 +4.3495197621021 +4.494381313041727 +4.543029917126171 +4.433633609264543 +4.315955355853033 +4.315955355853033 +4.315955355853033 +4.340622378893027 +4.346668293077823 +4.346668293077823 +4.327610812997139 +4.44374109684803 +4.577601706931892 +4.590893017970352 +4.590893017970352 +4.500777606857457 +4.353245755530516 +4.334367732518834 +4.438908795687557 +4.6226847512678555 +4.58852136234472 +4.38493610611664 +4.334367732518834 +4.334367732518834 +4.360278408571066 +4.370736046613753 +4.370736046613753 +4.570132142342086 +4.679949111556313 +4.491801074676181 +4.389176702879745 +4.646852001521213 +4.704934473220487 +4.629658286286764 +4.389176702879745 +4.360417319901349 +4.5480985287219635 +4.7039874056290465 +4.716549695711502 +4.701532155459308 +4.69066493212302 +4.716549695711502 +4.716549695711502 +4.728275832763921 +4.6870297623887245 +4.4834255156601905 +4.707820717224033 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.690283055605783 +4.412180874957363 +4.34817212384323 +4.452260404566443 +4.738328274948345 +4.778088299149454 +4.791447617972283 +4.709360105415576 +4.4269312816945705 +4.3823982849430525 +4.3823982849430525 +4.3823982849430525 +4.3823982849430525 +4.3823982849430525 +4.3823982849430525 +4.445120250365041 +4.736141839889283 +4.828385320427853 +4.708250006998824 +4.452249827320327 +4.399196764220106 +4.399196764220106 +4.383418627512178 +4.383418627512178 +4.383418627512178 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.365815996925577 +4.374952900097492 +4.383889436681951 +4.462893717258573 +4.633974596215562 +4.8247089911549255 +4.89873092730281 +4.791710163401963 +4.582193139573988 +4.428749545454485 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.383889436681951 +4.437253921568383 +4.5798483268413275 +4.75 +4.890373906609362 +4.955048454425975 +4.954466537076697 +3.383068941784411 +0.519387512494303 +0.14421518948927226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8165844264500439 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.290474143048891 +2.4912522817563203 +4.189445410195999 +4.209330964305835 +4.206280547491993 +4.206280547491993 +4.2243343199280865 +4.2243343199280865 +4.2243343199280865 +4.2243343199280865 +4.2243343199280865 +4.223061383739665 +4.242051477075593 +4.242051477075593 +4.2610653657318265 +4.2610653657318265 +4.2610653657318265 +4.2610653657318265 +4.262824069542097 +4.262824069542097 +4.332417955746382 +4.437940884635671 +4.469240513255388 +4.472314528156556 +4.415390784022273 +4.3229225252815375 +4.288085371739211 +4.288085371739211 +4.288085371739211 +4.30404791709206 +4.436755038723429 +4.512159965648277 +4.525148982297988 +4.503640969757196 +4.392755007565345 +4.311196423331765 +4.291975060537432 +4.291975060537432 +4.330384925242272 +4.461419383366735 +4.536738078402639 +4.466270560425329 +4.330384925242272 +4.315955355853033 +4.315955355853033 +4.321447907498091 +4.327610812997139 +4.327610812997139 +4.327610812997139 +4.474628088285788 +4.590893017970352 +4.590893017970352 +4.577601706931892 +4.425150693598551 +4.334367732518834 +4.334367732518834 +4.488865705516085 +4.6226847512678555 +4.5461808736533715 +4.353245755530516 +4.334367732518834 +4.334367732518834 +4.360278408571066 +4.360278408571066 +4.4702902099390895 +4.679949111556313 +4.587726262875978 +4.370736046613753 +4.570132142342086 +4.69281418415078 +4.679949111556313 +4.438734902106786 +4.334548778452816 +4.4794059114735 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.638630209477494 +4.585070500657054 +4.740587601014347 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.740409145179578 +4.4834255156601905 +4.34817212384323 +4.3936790662814325 +4.673607649283862 +4.764851533139823 +4.764851533139823 +4.723400793471849 +4.459144616499847 +4.356951756012255 +4.356951756012255 +4.356951756012255 +4.3823982849430525 +4.3823982849430525 +4.365804009510418 +4.466027460208323 +4.753526533936163 +4.81729793871423 +4.651613169058767 +4.416711565401162 +4.365804009510418 +4.374649654129857 +4.374649654129857 +4.383418627512178 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.381142838860653 +4.477153658161236 +4.686946241913178 +4.863636363636363 +4.89873092730281 +4.772087701603047 +4.543333818464117 +4.398374751597663 +4.3699279208395785 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.383889436681951 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.383889436681951 +4.456023364726183 +4.582193139573988 +4.752475355734676 +4.863636363636363 +4.927710408327659 +4.937237907553747 +4.89873092730281 +4.772087701603047 +4.561499360541137 +0.6347160311526148 +0.40378354047257137 +0.14421518948927226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +1.3791909450596869 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.289362850895306 +2.408217453072381 +4.2924919737237754 +4.308499281988651 +4.269324112263929 +4.229957398299591 +4.226680526381007 +4.2243343199280865 +4.2243343199280865 +4.2243343199280865 +4.2243343199280865 +4.2243343199280865 +4.223061383739665 +4.242051477075593 +4.242051477075593 +4.242051477075593 +4.242051477075593 +4.242051477075593 +4.242051477075593 +4.243532542904225 +4.262824069542097 +4.280004643579906 +4.3666145665109415 +4.446713403848863 +4.469240513255388 +4.451997806809301 +4.352495164972238 +4.268914746068856 +4.268914746068856 +4.268914746068856 +4.288085371739211 +4.369978307763559 +4.498827660612522 +4.525148982297988 +4.525148982297988 +4.456924017946896 +4.326291782126872 +4.291975060537432 +4.272744302947785 +4.291975060537432 +4.429161646579058 +4.536738078402639 +4.478034066281278 +4.330384925242272 +4.296737346030571 +4.296737346030571 +4.296737346030571 +4.321447907498091 +4.327610812997139 +4.340622378893027 +4.514317882790707 +4.590893017970352 +4.590893017970352 +4.571165324280725 +4.373530165834871 +4.308646412990123 +4.346668293077823 +4.529291803534183 +4.6226847512678555 +4.488865705516085 +4.334367732518834 +4.334367732518834 +4.334367732518834 +4.315633110904785 +4.372230331526471 +4.620764512081358 +4.652239127360683 +4.419759557158932 +4.491801074676181 +4.69281418415078 +4.69281418415078 +4.510643118126104 +4.334548778452816 +4.419759557158932 +4.676655864461275 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.716549695711502 +4.716549695711502 +4.69066493212302 +4.7039874056290465 +4.716549695711502 +4.728275832763921 +4.740587601014347 +4.753120216906312 +4.753120216906312 +4.629292470505407 +4.365563274055217 +4.365563274055217 +4.592455460018538 +4.753120216906312 +4.753120216906312 +4.726826567571509 +4.4834255156601905 +4.34817212384323 +4.34817212384323 +4.356951756012255 +4.356951756012255 +4.356951756012255 +4.340367332701628 +4.484563222116503 +4.771738592817006 +4.804805898398897 +4.6134253459783015 +4.3823982849430525 +4.349649451498392 +4.349649451498392 +4.365804009510418 +4.363362166374713 +4.372291676433294 +4.381142838860653 +4.381142838860653 +4.381142838860653 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.381142838860653 +4.495245211892045 +4.702967088304484 +4.8584171544605805 +4.872529839105629 +4.741519592641198 +4.519591849810563 +4.389854478801128 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.383889436681951 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.398374751597663 +4.472934417851474 +4.6149276187135 +4.772087701603047 +4.8816267976576375 +4.927710408327659 +4.937237907553747 +4.914400272933426 +4.8435411247040765 +4.672383068369923 +4.470708196465817 +4.38210182753176 +4.294309041013602 +0.7504206761995382 +0.3172647300558147 +0.11537255792817191 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.14421518948927226 +2.388738116386298 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.285534687051869 +2.325359830869213 +3.617318141801828 +4.416211745185024 +4.404716270919072 +4.3815185041079445 +4.3549764147420245 +4.287843642057993 +4.206280547491993 +4.2243343199280865 +4.2243343199280865 +4.2243343199280865 +4.2243343199280865 +4.2243343199280865 +4.223061383739665 +4.222982331221807 +4.222982331221807 +4.222982331221807 +4.242051477075593 +4.242051477075593 +4.242051477075593 +4.262824069542097 +4.262824069542097 +4.297355012936745 +4.410905534283973 +4.469240513255388 +4.472314528156556 +4.397344032056895 +4.30404791709206 +4.268914746068856 +4.268914746068856 +4.268914746068856 +4.3387810056485865 +4.460527625640681 +4.519855048515984 +4.525148982297988 +4.494288594971252 +4.3571598314339965 +4.272744302947785 +4.272744302947785 +4.272744302947785 +4.393499185832402 +4.522944253053671 +4.504730153028199 +4.3495197621021 +4.277549565796887 +4.296737346030571 +4.296737346030571 +4.293673850523379 +4.299288615843937 +4.357161204883953 +4.548365008356385 +4.590893017970352 +4.590893017970352 +4.532577772405431 +4.337867549100633 +4.280057633263803 +4.359813865820298 +4.58852136234472 +4.603447413405319 +4.419975864651762 +4.315633110904785 +4.315633110904785 +4.315633110904785 +4.315633110904785 +4.51854399108187 +4.657957256979294 +4.531977395390569 +4.419759557158932 +4.652239127360683 +4.69281418415078 +4.570132142342086 +4.352512026646508 +4.370736046613753 +4.6311819590611725 +4.69281418415078 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.728275832763921 +4.7124748910849155 +4.43352271756291 +4.34817212384323 +4.5024876237639 +4.740409145179578 +4.753120216906312 +4.740409145179578 +4.540628945133276 +4.331160119833364 +4.331160119833364 +4.34817212384323 +4.34817212384323 +4.340367332701628 +4.340367332701628 +4.4778617586030505 +4.760933045899426 +4.791122580013401 +4.572949016875158 +4.365804009510418 +4.349649451498392 +4.349649451498392 +4.347162035888704 +4.347162035888704 +4.347162035888704 +4.363362166374713 +4.363362166374713 +4.356531416969382 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.396937585433321 +4.513544964723501 +4.715635347795578 +4.845146974506639 +4.830804040442458 +4.702967088304484 +4.495245211892045 +4.381142838860653 +4.351006892703883 +4.351006892703883 +4.360574686677896 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.3699279208395785 +4.356534660165839 +4.3699279208395785 +4.413341468587734 +4.507635367903604 +4.6531316008140005 +4.811314015577191 +4.89873092730281 +4.927710408327659 +4.937237907553747 +4.927710408327659 +4.861973736884253 +4.711105210397496 +4.543333818464117 +4.413341468587734 +4.354800864958 +4.342042743020706 +4.342042743020706 +3.3511576086498795 +0.8011534860348615 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4614592400945998 +2.1290389837345884 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2548582107881368 +2.2827562121821066 +2.316860727329738 +3.3708624975062507 +4.372000496216216 +4.356991511156253 +4.3549764147420245 +4.32489895623564 +4.234012918746753 +4.209330964305835 +4.209330964305835 +4.206280547491993 +4.2243343199280865 +4.2243343199280865 +4.206280547491993 +4.206280547491993 +4.206280547491993 +4.204412773112099 +4.222982331221807 +4.223061383739665 +4.223061383739665 +4.241708610495863 +4.241708610495863 +4.242051477075593 +4.243532542904225 +4.262824069542097 +4.3314640165552545 +4.435608687796262 +4.469240513255388 +4.435608687796262 +4.334884057988927 +4.2497299242386894 +4.2497299242386894 +4.2497299242386894 +4.30404791709206 +4.433425791049117 +4.507264102733968 +4.519855048515984 +4.517386266312412 +4.407725684983931 +4.288085371739211 +4.272744302947785 +4.253531640965672 +4.3571598314339965 +4.522944253053671 +4.514260294902164 +4.345288092527255 +4.274291885177432 +4.274291885177432 +4.274291885177432 +4.274291885177432 +4.280057633263803 +4.368581807425848 +4.571165324280725 +4.590893017970352 +4.590893017970352 +4.486557454816782 +4.299288615843937 +4.280057633263803 +4.406415144859124 +4.609159296939702 +4.557404016105389 +4.346668293077823 +4.2970852712950425 +4.315633110904785 +4.315633110904785 +4.419975864651762 +4.6226847512678555 +4.612010239449515 +4.394200048734022 +4.613471566824906 +4.679949111556313 +4.6311819590611725 +4.370736046613753 +4.334548778452816 +4.587726262875978 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.545836543402008 +4.339552889545689 +4.412180874957363 +4.706471321315166 +4.753120216906312 +4.753120216906312 +4.610962775441974 +4.34817212384323 +4.331160119833364 +4.331160119833364 +4.331160119833364 +4.331160119833364 +4.331160119833364 +4.490078739911335 +4.74974235645692 +4.76455519928248 +4.528188482906883 +4.340367332701628 +4.349649451498392 +4.347162035888704 +4.347162035888704 +4.347162035888704 +4.347162035888704 +4.347162035888704 +4.347162035888704 +4.347162035888704 +4.347162035888704 +4.347162035888704 +4.356531416969382 +4.365815996925577 +4.365815996925577 +4.365815996925577 +4.396937585433321 +4.5471908537001 +4.7533262051837655 +4.845146974506639 +4.830804040442458 +4.677428238159029 +4.477153658161236 +4.365815996925577 +4.3367692831103835 +4.3367692831103835 +4.351006892703883 +4.351006892703883 +4.351006892703883 +4.360574686677896 +4.360574686677896 +4.3699279208395785 +4.383889436681951 +4.439444868219007 +4.543333818464117 +4.691713156761751 +4.8247089911549255 +4.914400272933426 +4.927710408327659 +4.937237907553747 +4.927710408327659 +4.8816267976576375 +4.752475355734676 +4.5798483268413275 +4.4210464010109 +4.368169508740839 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +0.6559621560518574 +0.4902956806169465 +0.2018996889437794 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8825278501371363 +2.194405655016403 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.2740761375503107 +2.311089856465691 +2.417643118526024 +4.2206086405597025 +4.197071138300812 +4.17835618067723 +4.172308385784067 +4.189445410195999 +4.189445410195999 +4.209330964305835 +4.209330964305835 +4.209330964305835 +4.206280547491993 +4.206280547491993 +4.206280547491993 +4.206280547491993 +4.206280547491993 +4.206280547491993 +4.223061383739665 +4.223061383739665 +4.223061383739665 +4.223061383739665 +4.223061383739665 +4.223061383739665 +4.222982331221807 +4.243532542904225 +4.2788791060630365 +4.380505452216383 +4.459631613915226 +4.4569885833271 +4.397344032056895 +4.283023643208255 +4.2497299242386894 +4.2497299242386894 +4.2470628308633005 +4.384942201292258 +4.494343167426363 +4.519855048515984 +4.525148982297988 +4.452791429304008 +4.30404791709206 +4.253531640965672 +4.253531640965672 +4.3387810056485865 +4.508785647745265 +4.522944253053671 +4.360377731938609 +4.2549350594948505 +4.2549350594948505 +4.274291885177432 +4.274291885177432 +4.274291885177432 +4.401638244326036 +4.577601706931892 +4.590893017970352 +4.577601706931892 +4.4201516731586725 +4.280057633263803 +4.280057633263803 +4.469148351445398 +4.599002836703154 +4.494664026499109 +4.2970852712950425 +4.2970852712950425 +4.2970852712950425 +4.327610812997139 +4.58852136234472 +4.6356545149438695 +4.451863182214335 +4.536064520713749 +4.668188175579596 +4.652239127360683 +4.400860279484698 +4.313355124891885 +4.541142071059501 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.716549695711502 +4.716549695711502 +4.6705278718852306 +4.360278408571066 +4.331189168179998 +4.577606921643228 +4.740587601014347 +4.753120216906312 +4.673607649283862 +4.375380027466919 +4.331160119833364 +4.331160119833364 +4.331160119833364 +4.331160119833364 +4.3145871600249 +4.464400344563128 +4.726826567571509 +4.726826567571509 +4.509108982480729 +4.340367332701628 +4.324253970781363 +4.321599429301051 +4.321599429301051 +4.347162035888704 +4.347162035888704 +4.347162035888704 +4.331449528132573 +4.331449528132573 +4.331449528132573 +4.331449528132573 +4.347162035888704 +4.347162035888704 +4.347162035888704 +4.405121922133547 +4.566145733226151 +4.771843886220461 +4.858049419420977 +4.830804040442458 +4.677428238159029 +4.4592965723509685 +4.365815996925577 +4.3367692831103835 +4.3367692831103835 +4.3367692831103835 +4.3367692831103835 +4.3367692831103835 +4.3367692831103835 +4.351006892703883 +4.381142838860653 +4.4592965723509685 +4.590812073003308 +4.747524644977324 +4.84508484236849 +4.914400272933426 +4.914400272933426 +4.927710408327659 +4.927710408327659 +4.89873092730281 +4.791710163401963 +4.6149276187135 +4.453819126346629 +4.368169508740839 +4.354800864958 +4.354800864958 +4.354800864958 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.343755683862673 +3.3555162235739977 +0.9014987265478416 +0.3749445413935266 +0.14421518948927226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7522202707914554 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.264828130972697 +2.2960327368554108 +2.3307592583286696 +2.6001616363942115 +4.240432247167452 +4.197071138300812 +4.17835618067723 +4.172308385784067 +4.189445410195999 +4.189445410195999 +4.189445410195999 +4.192161408306018 +4.192161408306018 +4.192161408306018 +4.206280547491993 +4.206280547491993 +4.206280547491993 +4.206280547491993 +4.206280547491993 +4.204412773112099 +4.204412773112099 +4.204412773112099 +4.204412773112099 +4.223061383739665 +4.223061383739665 +4.223061383739665 +4.222982331221807 +4.222982331221807 +4.314109655050784 +4.421815150590111 +4.469240513255388 +4.435608687796262 +4.333865602892183 +4.2280247218333145 +4.230559509060905 +4.2280247218333145 +4.334884057988927 +4.47123123823328 +4.502694043727065 +4.519855048515984 +4.489566010408934 +4.3571598314339965 +4.253531640965672 +4.25 +4.3229225252815375 +4.489566010408934 +4.517386266312412 +4.393499185832402 +4.2549350594948505 +4.2549350594948505 +4.2549350594948505 +4.2549350594948505 +4.2549350594948505 +4.446692229204218 +4.570197478760316 +4.590893017970352 +4.55656589041628 +4.3517175586913135 +4.260910257904718 +4.313053758086822 +4.541557094775911 +4.577601706931892 +4.411631509689652 +4.278776197768125 +4.278776197768125 +4.278776197768125 +4.51854399108187 +4.6356545149438695 +4.562746078431617 +4.51854399108187 +4.648026828159334 +4.657957256979294 +4.463924095533918 +4.295872802376734 +4.4702902099390895 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.508225910052507 +4.295872802376734 +4.438734902106786 +4.69066493212302 +4.728275832763921 +4.715349243520801 +4.445436290543406 +4.331160119833364 +4.331160119833364 +4.331160119833364 +4.3145871600249 +4.3145871600249 +4.464400344563128 +4.7124748910849155 +4.726826567571509 +4.464400344563128 +4.3145871600249 +4.311815282769911 +4.321599429301051 +4.321599429301051 +4.321599429301051 +4.321599429301051 +4.321599429301051 +4.331449528132573 +4.331449528132573 +4.331449528132573 +4.331449528132573 +4.331449528132573 +4.331449528132573 +4.3799934275166095 +4.553807499358993 +4.753526533936163 +4.843246627448329 +4.830804040442458 +4.65822873316879 +4.4592965723509685 +4.365815996925577 +4.3367692831103835 +4.3367692831103835 +4.3367692831103835 +4.3367692831103835 +4.3367692831103835 +4.3367692831103835 +4.372291676433294 +4.477153658161236 +4.623573129679567 +4.771843886220461 +4.8584171544605805 +4.8848078319332995 +4.8946862347346505 +4.911393962819666 +4.914400272933426 +4.89873092730281 +4.811314015577191 +4.633974596215562 +4.470708196465817 +4.38210182753176 +4.354800864958 +4.354800864958 +4.354800864958 +4.354800864958 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.336426518579296 +0.6932492436805235 +0.547964844339119 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +1.4721776460987375 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2583256956715583 +2.2770293828519175 +2.3133893077813523 +2.339865608300966 +2.604089149770337 +4.1708710668561695 +4.197071138300812 +4.17835618067723 +4.17835618067723 +4.172308385784067 +4.172308385784067 +4.172308385784067 +4.172308385784067 +4.192161408306018 +4.192161408306018 +4.192161408306018 +4.206280547491993 +4.206280547491993 +4.206280547491993 +4.188330631831401 +4.188330631831401 +4.188330631831401 +4.204412773112099 +4.204412773112099 +4.204412773112099 +4.204412773112099 +4.204412773112099 +4.204412773112099 +4.222982331221807 +4.241708610495863 +4.365645075900559 +4.459631613915226 +4.4569885833271 +4.380505452216383 +4.260323041219008 +4.2280247218333145 +4.2280247218333145 +4.281840839476571 +4.430470736639508 +4.502694043727065 +4.502694043727065 +4.512159965648277 +4.404476728520448 +4.265623984992915 +4.2305425448815 +4.30404791709206 +4.479478935593598 +4.512159965648277 +4.390161428226321 +4.2549350594948505 +4.235636591185085 +4.235636591185085 +4.2549350594948505 +4.269458948305532 +4.492262563861891 +4.563123965563946 +4.583108031777054 +4.514317882790707 +4.293673850523379 +4.260910257904718 +4.387552875155975 +4.577601706931892 +4.541557094775911 +4.293673850523379 +4.252737371600075 +4.278776197768125 +4.419975864651762 +4.6226847512678555 +4.609159296939702 +4.58852136234472 +4.6356545149438695 +4.6356545149438695 +4.51854399108187 +4.301991565390525 +4.410395058116138 +4.666403422086015 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.7039874056290465 +4.7039874056290465 +4.638630209477494 +4.341633051398518 +4.331189168179998 +4.638630209477494 +4.716549695711502 +4.716549695711502 +4.52708414790585 +4.313355124891885 +4.32212166094401 +4.32212166094401 +4.3145871600249 +4.3145871600249 +4.452260404566443 +4.723400793471849 +4.726826567571509 +4.464400344563128 +4.311815282769911 +4.29565309083286 +4.29565309083286 +4.311815282769911 +4.311815282769911 +4.321599429301051 +4.321599429301051 +4.305945600391937 +4.305945600391937 +4.305945600391937 +4.331449528132573 +4.331449528132573 +4.363362166374713 +4.515666105800818 +4.736141839889283 +4.81729793871423 +4.791122580013401 +4.644528994853105 +4.445120250365041 +4.34127725333279 +4.3367692831103835 +4.3367692831103835 +4.3367692831103835 +4.3367692831103835 +4.351006892703883 +4.390836464365371 +4.507271327463094 +4.656063156626178 +4.790040730028535 +4.845146974506639 +4.858049419420977 +4.858049419420977 +4.869137689341459 +4.8848078319332995 +4.872529839105629 +4.80780959574163 +4.6482824413086865 +4.4837541190057655 +4.374952900097492 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.220535793049947 +2.6231863385468026 +0.7785997875515944 +0.40378354047257137 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.17305757537147226 +2.406513659435264 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.2444401175322657 +2.251535166886691 +2.264828130972697 +2.2935260049111688 +2.3218005945232756 +2.345459739590308 +2.535262654748501 +4.228958610384918 +4.181121458434199 +4.1626765496543685 +4.1626765496543685 +4.1626765496543685 +4.1555211100661475 +4.172308385784067 +4.172308385784067 +4.172308385784067 +4.192161408306018 +4.192161408306018 +4.192161408306018 +4.188330631831401 +4.188330631831401 +4.188330631831401 +4.188330631831401 +4.188330631831401 +4.185803260524182 +4.185803260524182 +4.185803260524182 +4.185803260524182 +4.204412773112099 +4.204412773112099 +4.203881081227029 +4.203881081227029 +4.279174511318519 +4.423669530065321 +4.469240513255388 +4.435608687796262 +4.314109655050784 +4.208043257279148 +4.209015986244905 +4.245536640869366 +4.383033768797953 +4.490181602580803 +4.502694043727065 +4.507264102733968 +4.446948255186931 +4.282040951144447 +4.2305425448815 +4.30404791709206 +4.479478935593598 +4.525148982297988 +4.404476728520448 +4.25 +4.235636591185085 +4.235636591185085 +4.235636591185085 +4.308286843238249 +4.514260294902164 +4.563123965563946 +4.570197478760316 +4.4514998653577775 +4.260910257904718 +4.280057633263803 +4.486557454816782 +4.590893017970352 +4.4514998653577775 +4.241889569491942 +4.249171266666321 +4.321447907498091 +4.58852136234472 +4.6226847512678555 +4.6226847512678555 +4.6356545149438695 +4.6356545149438695 +4.571250454545515 +4.294017679194993 +4.346668293077823 +4.641235384144579 +4.679949111556313 +4.679949111556313 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.508225910052507 +4.295872802376734 +4.489286681787702 +4.7039874056290465 +4.716549695711502 +4.622018630619488 +4.331189168179998 +4.295872802376734 +4.278812254077182 +4.295872802376734 +4.305089596671972 +4.426558867884018 +4.723400793471849 +4.726826567571509 +4.46182495923458 +4.311815282769911 +4.29565309083286 +4.29565309083286 +4.29565309083286 +4.29565309083286 +4.29565309083286 +4.29565309083286 +4.29565309083286 +4.305945600391937 +4.305945600391937 +4.305945600391937 +4.337781444744843 +4.49671038392666 +4.718351151603021 +4.81729793871423 +4.791122580013401 +4.644528994853105 +4.4269312816945705 +4.331449528132573 +4.316287496333247 +4.316287496333247 +4.316287496333247 +4.34127725333279 +4.405121922133547 +4.532028815955536 +4.677428238159029 +4.80780959574163 +4.845146974506639 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.797628303871802 +4.656063156626178 +4.495245211892045 +4.381142838860653 +4.321318784036765 +4.321318784036765 +4.321318784036765 +4.331811824420404 +4.342042743020706 +4.342042743020706 +4.32994289459005 +4.32994289459005 +4.32994289459005 +4.342042743020706 +4.342042743020706 +4.342042743020706 +4.343755683862673 +4.270836668788456 +2.6391154618309955 +0.9303337382556007 +0.4902956806169465 +0.2595829718925291 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.7522202707914554 +2.227587031250235 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.2060580360966195 +2.199780343719392 +2.154609032791459 +2.091620099140353 +2.0249250210520437 +2.1226824881662285 +2.410142949609684 +3.3395423085271414 +4.206236197371835 +4.1626765496543685 +4.1626765496543685 +4.1626765496543685 +4.1626765496543685 +4.1555211100661475 +4.172308385784067 +4.17835618067723 +4.181121458434199 +4.181121458434199 +4.175265798540549 +4.192161408306018 +4.188330631831401 +4.188330631831401 +4.170559406447948 +4.170559406447948 +4.170559406447948 +4.188330631831401 +4.185803260524182 +4.185803260524182 +4.185803260524182 +4.185803260524182 +4.185803260524182 +4.184777003835199 +4.223061383739665 +4.347444600831109 +4.459631613915226 +4.4569885833271 +4.380505452216383 +4.241708610495863 +4.185396469740754 +4.204796272439868 +4.333865602892183 +4.4811159587853355 +4.502694043727065 +4.502694043727065 +4.47123123823328 +4.3387810056485865 +4.2305425448815 +4.282040951144447 +4.479478935593598 +4.525148982297988 +4.421450634493992 +4.246184195954533 +4.235636591185085 +4.235636591185085 +4.235636591185085 +4.34170489193806 +4.543029917126171 +4.563123965563946 +4.556828312189985 +4.364189297235156 +4.241889569491942 +4.313053758086822 +4.55656589041628 +4.577601706931892 +4.3468683991859995 +4.239333061763718 +4.258480407358802 +4.505917988700361 +4.613831733428877 +4.6226847512678555 +4.6226847512678555 +4.6226847512678555 +4.61789817246824 +4.346668293077823 +4.28666792956462 +4.603447413405319 +4.657957256979294 +4.668188175579596 +4.679949111556313 +4.679949111556313 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.662029905531306 +4.341633051398518 +4.341633051398518 +4.654819004174287 +4.7039874056290465 +4.676655864461275 +4.400860279484698 +4.278812254077182 +4.278812254077182 +4.278812254077182 +4.278812254077182 +4.382065911779677 +4.6856931845997325 +4.715349243520801 +4.4834255156601905 +4.29565309083286 +4.29565309083286 +4.29565309083286 +4.29565309083286 +4.29565309083286 +4.29565309083286 +4.280067129658118 +4.280067129658118 +4.280067129658118 +4.29565309083286 +4.311815282769911 +4.437966363085956 +4.673739406868355 +4.778506435350758 +4.778506435350758 +4.644528994853105 +4.4269312816945705 +4.331449528132573 +4.316287496333247 +4.316287496333247 +4.316287496333247 +4.35442503592916 +4.500816995645366 +4.66327376376416 +4.786659476922233 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.858049419420977 +4.815335102610012 +4.696574961377673 +4.507271327463094 +4.38850465887066 +4.321318784036765 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.321318784036765 +4.321318784036765 +4.321318784036765 +4.331811824420404 +4.32994289459005 +4.32994289459005 +4.32994289459005 +4.32994289459005 +4.32994289459005 +4.343755683862673 +4.342042743020706 +3.4128708719498393 +0.6611918524689804 +0.9220007075428525 +0.547964844339119 +0.2884240647434684 +0.11537255792817191 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.3461049112705705 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.38453231097115825 +0.40378354047257137 +0.4326218123061114 +0.5191309563131474 +0.6632793931627319 +0.8647921722156005 +0.9443664684730466 +0.7931129431694348 +2.6231417127608503 +3.49049925704238 +4.229762156405545 +4.206980666498831 +4.18527631598984 +4.18527631598984 +4.1626765496543685 +4.1626765496543685 +4.1626765496543685 +4.1626765496543685 +4.181121458434199 +4.181121458434199 +4.175265798540549 +4.170559406447948 +4.170559406447948 +4.175265798540549 +4.175265798540549 +4.175265798540549 +4.170559406447948 +4.167287633500955 +4.167287633500955 +4.167287633500955 +4.185803260524182 +4.185803260524182 +4.185803260524182 +4.185803260524182 +4.279174511318519 +4.421815150590111 +4.469240513255388 +4.421815150590111 +4.296670760432378 +4.181718697742469 +4.185396469740754 +4.281840839476571 +4.442556880764622 +4.502694043727065 +4.502694043727065 +4.494343167426363 +4.369978307763559 +4.226705965117246 +4.262824069542097 +4.479478935593598 +4.525148982297988 +4.434554479488243 +4.243532542904225 +4.211117229499433 +4.216438603867498 +4.216438603867498 +4.393499185832402 +4.550140350585692 +4.563123965563946 +4.514260294902164 +4.288894789602504 +4.2230503108410335 +4.4201516731586725 +4.590893017970352 +4.514317882790707 +4.252475355022676 +4.220334472865379 +4.399245415273999 +4.590893017970352 +4.613831733428877 +4.6226847512678555 +4.6226847512678555 +4.6226847512678555 +4.392622212842695 +4.275537753904143 +4.571250454545515 +4.6356545149438695 +4.6356545149438695 +4.657957256979294 +4.669446132552235 +4.679949111556313 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.534232616473268 +4.278812254077182 +4.522663592490754 +4.69281418415078 +4.7039874056290465 +4.522663592490754 +4.278812254077182 +4.262256704184564 +4.278812254077182 +4.278812254077182 +4.341633051398518 +4.638630209477494 +4.7039874056290465 +4.4702902099390895 +4.275657408429723 +4.285516381473171 +4.29565309083286 +4.29565309083286 +4.29565309083286 +4.280067129658118 +4.280067129658118 +4.280067129658118 +4.280067129658118 +4.280067129658118 +4.375380027466919 +4.610962775441974 +4.7657943350103915 +4.7657943350103915 +4.655506333614664 +4.437966363085956 +4.305945600391937 +4.29089492712542 +4.30174537583588 +4.347162035888704 +4.445120250365041 +4.6112620879661055 +4.753526533936163 +4.804805898398897 +4.828385320427853 +4.828385320427853 +4.828385320427853 +4.843246627448329 +4.858049419420977 +4.815335102610012 +4.715635347795578 +4.5471908537001 +4.407435659740823 +4.3367692831103835 +4.321318784036765 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3193194762604055 +4.331637880230829 +4.331637880230829 +4.315788338256202 +3.3448636559069373 +1.3705756052118994 +0.8141402729636837 +0.7497229403733847 +0.5191309563131474 +0.3172647300558147 +0.14421518948927226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.08652970395641724 +0.230741499582245 +0.40378354047257137 +0.6056272403357319 +0.8557848105107277 +0.7567958781011281 +2.656497955214265 +3.5190043127715844 +4.373454032767738 +4.359130606419303 +4.330714981509336 +4.302784596518338 +4.281359252824272 +4.247576635145128 +4.206980666498831 +4.18527631598984 +4.165678958020959 +4.165678958020959 +4.158771858263004 +4.175265798540549 +4.175265798540549 +4.158771858263004 +4.158771858263004 +4.158771858263004 +4.153071544848954 +4.167287633500955 +4.167287633500955 +4.170559406447948 +4.188330631831401 +4.170559406447948 +4.163998999246982 +4.2016895928872255 +4.347444600831109 +4.459631613915226 +4.459631613915226 +4.347444600831109 +4.2016895928872255 +4.166017143754156 +4.242051477075593 +4.412959269104636 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.415390784022273 +4.243532542904225 +4.262824069542097 +4.479478935593598 +4.525148982297988 +4.434554479488243 +4.224183288970262 +4.187731060706238 +4.1917640638603615 +4.2305425448815 +4.459189377254781 +4.550140350585692 +4.563123965563946 +4.459189377254781 +4.235636591185085 +4.2549350594948505 +4.508358881478538 +4.590893017970352 +4.399245415273999 +4.220334472865379 +4.308286843238249 +4.563841360958199 +4.590893017970352 +4.590893017970352 +4.613831733428877 +4.6226847512678555 +4.464612354157994 +4.257331764657746 +4.5461808736533715 +4.6356545149438695 +4.6356545149438695 +4.6356545149438695 +4.645199135042 +4.657957256979294 +4.679949111556313 +4.679949111556313 +4.666403422086015 +4.375275290067039 +4.3634085827425615 +4.676655864461275 +4.69281418415078 +4.629658286286764 +4.323181495798052 +4.262256704184564 +4.262256704184564 +4.262256704184564 +4.313355124891885 +4.605035406704348 +4.7039874056290465 +4.489286681787702 +4.2589398463924955 +4.2589398463924955 +4.275657408429723 +4.285516381473171 +4.26937230025452 +4.280067129658118 +4.280067129658118 +4.280067129658118 +4.280067129658118 +4.33685742855189 +4.533871840011103 +4.7124748910849155 +4.753120216906312 +4.673607649283862 +4.46182495923458 +4.311815282769911 +4.2765310720585505 +4.29089492712542 +4.365563274055217 +4.528188482906883 +4.691710049093213 +4.791122580013401 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.81729793871423 +4.791122580013401 +4.718351151603021 +4.553807499358993 +4.416711565401162 +4.3367692831103835 +4.323160554031693 +4.321318784036765 +4.321318784036765 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.310241109073139 +4.310241109073139 +4.334450971017986 +3.337756614473365 +2.633505214007222 +0.7101596369167549 +0.894993423848998 +0.8073046643997408 +0.6344548999581567 +0.4614592400945998 +0.2884240647434684 +0.14421518948927226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.08652970395641724 +0.17305757537147226 +0.3461049112705705 +0.5191309563131474 +0.6821398934982863 +0.950145549599057 +0.740608520590464 +2.6509483927138517 +3.466713592697833 +4.394440664701463 +4.3967083155925035 +4.395466391668603 +4.394440664701463 +4.382782218537319 +4.356991511156253 +4.330714981509336 +4.281359252824272 +4.229762156405545 +4.187847747763415 +4.158771858263004 +4.158771858263004 +4.158771858263004 +4.158771858263004 +4.158771858263004 +4.158771858263004 +4.153071544848954 +4.153071544848954 +4.153071544848954 +4.149583103528658 +4.167422873107778 +4.167422873107778 +4.163998999246982 +4.279174511318519 +4.421815150590111 +4.469240513255388 +4.407393740532426 +4.24013464324727 +4.162310183365068 +4.184777003835199 +4.3666145665109415 +4.477376686372026 +4.502694043727065 +4.502694043727065 +4.442556880764622 +4.2610653657318265 +4.262824069542097 +4.464382022636259 +4.525148982297988 +4.446948255186931 +4.243532542904225 +4.187731060706238 +4.187731060706238 +4.246184195954533 +4.499344690879607 +4.550140350585692 +4.550140350585692 +4.373020992536642 +4.194387164274222 +4.3201913295651035 +4.570197478760316 +4.541557094775911 +4.2719433122175925 +4.239333061763718 +4.524127578822819 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.603682897163525 +4.539257872104618 +4.267819388224821 +4.529291803534183 +4.6356545149438695 +4.6356545149438695 +4.6356545149438695 +4.6356545149438695 +4.6356545149438695 +4.657957256979294 +4.668188175579596 +4.5968888406633095 +4.304971199174973 +4.552289840148148 +4.69281418415078 +4.69281418415078 +4.444957838142898 +4.262256704184564 +4.262256704184564 +4.262256704184564 +4.278812254077182 +4.559466509233962 +4.7039874056290465 +4.501631338735831 +4.275657408429723 +4.2589398463924955 +4.2589398463924955 +4.2589398463924955 +4.2589398463924955 +4.2589398463924955 +4.26937230025452 +4.280067129658118 +4.29565309083286 +4.46182495923458 +4.690283055605783 +4.753120216906312 +4.706471321315166 +4.4834255156601905 +4.302198258940722 +4.250998675515501 +4.29565309083286 +4.412180874957363 +4.59029185398596 +4.734199167934266 +4.791447617972283 +4.791447617972283 +4.791447617972283 +4.81729793871423 +4.81729793871423 +4.804805898398897 +4.718351151603021 +4.572949016875158 +4.408981720886764 +4.316287496333247 +4.287899053243774 +4.30174537583588 +4.312526019392585 +4.310612764776552 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.310241109073139 +4.183067585707844 +3.3884368516118326 +2.633124237316123 +0.721251949749206 +0.890687515674526 +0.8647921722156005 +0.6920998310633086 +0.6056272403357319 +0.4614592400945998 +0.3461049112705705 +0.2018996889437794 +0.11537255792817191 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.0576866479940108 +0.14421518948927226 +0.2595829718925291 +0.40378354047257137 +0.547964844339119 +0.7108936078332695 +0.9134702960435828 +0.7706936374269509 +2.6169002176238445 +3.373718318227698 +4.347339821427706 +4.348018961012276 +4.373887843350183 +4.394440664701463 +4.404716270919072 +4.404716270919072 +4.3935208595585 +4.3815185041079445 +4.342147638371092 +4.2770241344525415 +4.202832528070241 +4.175265798540549 +4.158771858263004 +4.142857142857142 +4.142857142857142 +4.139252000516036 +4.149583103528658 +4.149583103528658 +4.149583103528658 +4.167422873107778 +4.149583103528658 +4.145249727007031 +4.203651089218543 +4.363078943678145 +4.469240513255388 +4.446713403848863 +4.29737398300434 +4.162310183365068 +4.162310183365068 +4.299316960299984 +4.4676065372348495 +4.490181602580803 +4.502694043727065 +4.4676065372348495 +4.281511118199585 +4.242051477075593 +4.464382022636259 +4.525148982297988 +4.446948255186931 +4.224183288970262 +4.168302313602947 +4.168302313602947 +4.298852288588964 +4.522944253053671 +4.550140350585692 +4.514260294902164 +4.265623984992915 +4.213656267826046 +4.459189377254781 +4.5756616620083115 +4.429161646579058 +4.220334472865379 +4.4514998653577775 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.571165324280725 +4.293673850523379 +4.494664026499109 +4.6226847512678555 +4.6226847512678555 +4.6226847512678555 +4.6356545149438695 +4.6356545149438695 +4.6356545149438695 +4.6356545149438695 +4.470438427378001 +4.365786209121337 +4.652239127360683 +4.679949111556313 +4.5968888406633095 +4.287058569715656 +4.262256704184564 +4.262256704184564 +4.2589398463924955 +4.501631338735831 +4.69281418415078 +4.559466509233962 +4.275657408429723 +4.242808832535492 +4.242808832535492 +4.2589398463924955 +4.2589398463924955 +4.2589398463924955 +4.242808832535492 +4.242808832535492 +4.360278408571066 +4.603299912056848 +4.740409145179578 +4.726826567571509 +4.571600036028576 +4.339552889545689 +4.265148157089352 +4.302198258940722 +4.4427961124456905 +4.639065325410318 +4.726826567571509 +4.764851533139823 +4.764851533139823 +4.778088299149454 +4.791447617972283 +4.778506435350758 +4.709360105415576 +4.566403774558057 +4.4269312816945705 +4.331449528132573 +4.287899053243774 +4.287899053243774 +4.287899053243774 +4.287899053243774 +4.285895083247516 +4.285895083247516 +4.285895083247516 +4.297243754933228 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.3084046300143655 +4.296239884869885 +4.321318784036765 +4.351006892703883 +4.326333543660183 +3.334370823290448 +0.659306673328599 +0.8372004512939295 +0.8647921722156005 +0.6920998310633086 +0.6056272403357319 +0.4902956806169465 +0.40378354047257137 +0.2884240647434684 +0.2018996889437794 +0.11537255792817191 +0.0576866479940108 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.08652970395641724 +0.17305757537147226 +0.2884240647434684 +0.40378354047257137 +0.547964844339119 +0.6632793931627319 +0.8647921722156005 +0.8372004512939295 +0.6680677313924832 +3.320055084365214 +3.603891500879352 +4.255173391471447 +4.302784596518338 +4.3453595061404515 +4.382782218537319 +4.404716270919072 +4.4149539674021 +4.416211745185024 +4.394872548397378 +4.356613498206495 +4.291189724650094 +4.218259620984635 +4.1555211100661475 +4.139252000516036 +4.139252000516036 +4.132088428280674 +4.132088428280674 +4.132088428280674 +4.132088428280674 +4.149583103528658 +4.149583103528658 +4.167422873107778 +4.2986838154209295 +4.446713403848863 +4.4569885833271 +4.347517694966176 +4.182830894787021 +4.142890501858993 +4.260543986543284 +4.437940884635671 +4.490181602580803 +4.502694043727065 +4.4811159587853355 +4.316623615341484 +4.242051477075593 +4.460527625640681 +4.525148982297988 +4.431281148704458 +4.224183288970262 +4.16484708315636 +4.16484708315636 +4.369978307763559 +4.530776542855393 +4.550140350585692 +4.4547427227222975 +4.194387164274222 +4.28500654445743 +4.543029917126171 +4.543029917126171 +4.265623984992915 +4.361653407159187 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.577601706931892 +4.3468683991859995 +4.469148351445398 +4.613831733428877 +4.6226847512678555 +4.6226847512678555 +4.6226847512678555 +4.6226847512678555 +4.6356545149438695 +4.6226847512678555 +4.373530165834871 +4.529291803534183 +4.657957256979294 +4.655021703819993 +4.413190127900948 +4.246305058254497 +4.242808832535492 +4.2589398463924955 +4.429521246676579 +4.679949111556313 +4.605035406704348 +4.283930830651066 +4.242808832535492 +4.242808832535492 +4.242808832535492 +4.242808832535492 +4.242808832535492 +4.242808832535492 +4.275657408429723 +4.4702902099390895 +4.6870297623887245 +4.716549695711502 +4.630575200519795 +4.386252292638426 +4.265148157089352 +4.305089596671972 +4.4834255156601905 +4.673607649283862 +4.740409145179578 +4.753120216906312 +4.753120216906312 +4.753120216906312 +4.740409145179578 +4.706471321315166 +4.571600036028576 +4.430850873866957 +4.321599429301051 +4.2629461847989285 +4.2629461847989285 +4.2629461847989285 +4.287899053243774 +4.287899053243774 +4.285895083247516 +4.285895083247516 +4.285895083247516 +4.285895083247516 +4.272808871106918 +4.272808871106918 +4.285895083247516 +4.324673792101699 +4.372291676433294 +4.441702784036244 +4.5471908537001 +4.457774122525462 +3.348727737271513 +0.6995574804335964 +0.9220194164042796 +0.7683375209644598 +0.6344548999581567 +0.5191309563131474 +0.3749445413935266 +0.2884240647434684 +0.2018996889437794 +0.14421518948927226 +0.08652970395641724 +0.0576866479940108 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.08652970395641724 +0.17305757537147226 +0.2595829718925291 +0.3749445413935266 +0.5191309563131474 +0.6344548999581567 +0.7683375209644598 +0.9650888789852514 +0.7249732392691373 +2.6491430356740824 +3.437664135829692 +4.154824223110502 +4.190487592151753 +4.253456513491267 +4.330714981509336 +4.3815185041079445 +4.404716270919072 +4.416211745185024 +4.416211745185024 +4.405288240837459 +4.365873551370795 +4.291189724650094 +4.200425193415516 +4.139252000516036 +4.139252000516036 +4.115142843788829 +4.132088428280674 +4.132088428280674 +4.132088428280674 +4.149583103528658 +4.132088428280674 +4.227703995652505 +4.406095101803691 +4.469240513255388 +4.407393740532426 +4.2243248754084695 +4.145249727007031 +4.203651089218543 +4.395780918368115 +4.490181602580803 +4.490181602580803 +4.477376686372026 +4.351015773715063 +4.241708610495863 +4.442556880764622 +4.519855048515984 +4.434554479488243 +4.201961535726061 +4.145078211515794 +4.1822904867059885 +4.431281148704458 +4.525148982297988 +4.530776542855393 +4.354691823497583 +4.1752910088450745 +4.425098568663218 +4.563123965563946 +4.442260080149436 +4.28500654445743 +4.556828312189985 +4.583108031777054 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.415569757481522 +4.433633609264543 +4.590893017970352 +4.590893017970352 +4.613831733428877 +4.6226847512678555 +4.6226847512678555 +4.6226847512678555 +4.573160800504121 +4.425150693598551 +4.6226847512678555 +4.6356545149438695 +4.5789535989891 +4.257331764657746 +4.242808832535492 +4.227383303293633 +4.341633051398518 +4.646852001521213 +4.629658286286764 +4.323181495798052 +4.242808832535492 +4.242808832535492 +4.242808832535492 +4.242808832535492 +4.242808832535492 +4.227383303293633 +4.341633051398518 +4.587726262875978 +4.716549695711502 +4.6870297623887245 +4.4702902099390895 +4.275657408429723 +4.292861927065423 +4.4702902099390895 +4.6804377029718225 +4.753120216906312 +4.764851533139823 +4.753120216906312 +4.753120216906312 +4.706471321315166 +4.592455460018538 +4.423839349817026 +4.302198258940722 +4.265148157089352 +4.250998675515501 +4.250998675515501 +4.2629461847989285 +4.2629461847989285 +4.2629461847989285 +4.2629461847989285 +4.272808871106918 +4.272808871106918 +4.285895083247516 +4.3143068154002675 +4.337781444744843 +4.40657002550727 +4.494233170364849 +4.623509128848177 +4.718351151603021 +4.7879891805359165 +4.49671038392666 +3.355910963566751 +0.7318749366141102 +0.9630837087124622 +0.7209149579373282 +0.6056272403357319 +0.4614592400945998 +0.3172647300558147 +0.230741499582245 +0.14421518948927226 +0.08652970395641724 +0.0576866479940108 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.11537255792817191 +0.2018996889437794 +0.2884240647434684 +0.4326218123061114 +0.547964844339119 +0.6632793931627319 +0.8456634862694745 +0.8051255375555195 +1.370689976024737 +3.361067436462658 +4.273095217912634 +4.137131613836097 +4.179270418973757 +4.234197004870354 +4.3173943306794955 +4.3815185041079445 +4.416211745185024 +4.426368701027112 +4.426792664882731 +4.405171091330308 +4.351449655609735 +4.24386640352576 +4.1626765496543685 +4.12374457827202 +4.115142843788829 +4.132088428280674 +4.115142843788829 +4.115142843788829 +4.132088428280674 +4.172308385784067 +4.348001132847382 +4.4569885833271 +4.43358416697286 +4.280458950753834 +4.126667610015392 +4.167422873107778 +4.347444600831109 +4.477376686372026 +4.490181602580803 +4.490181602580803 +4.3666145665109415 +4.242436409101377 +4.440598382711996 +4.507264102733968 +4.415390784022273 +4.1822904867059885 +4.145078211515794 +4.242051477075593 +4.489566010408934 +4.525148982297988 +4.503640969757196 +4.243532542904225 +4.246184195954533 +4.528831176637506 +4.536738078402639 +4.35787139154943 +4.514260294902164 +4.563123965563946 +4.570197478760316 +4.590893017970352 +4.590893017970352 +4.497832329201184 +4.433633609264543 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.599002836703154 +4.613831733428877 +4.6226847512678555 +4.5461808736533715 +4.555438561670694 +4.6356545149438695 +4.6356545149438695 +4.411631509689652 +4.19219040425837 +4.202371696128198 +4.257331764657746 +4.5799545547395955 +4.652239127360683 +4.372230331526471 +4.227383303293633 +4.242808832535492 +4.242808832535492 +4.242808832535492 +4.227383303293633 +4.242808832535492 +4.4512597008547115 +4.671919701176808 +4.7039874056290465 +4.577606921643228 +4.341633051398518 +4.275657408429723 +4.4512597008547115 +4.638630209477494 +4.716549695711502 +4.728275832763921 +4.740587601014347 +4.7124748910849155 +4.610962775441974 +4.4427961124456905 +4.302198258940722 +4.250998675515501 +4.250998675515501 +4.250998675515501 +4.250998675515501 +4.250998675515501 +4.237732583564444 +4.237732583564444 +4.250998675515501 +4.280067129658118 +4.328474169847666 +4.4195207143370485 +4.487575113685434 +4.623509128848177 +4.718351151603021 +4.760933045899426 +4.791122580013401 +4.803471126886287 +4.656335710595208 +3.476393452796253 +2.633861646758893 +0.7714249580945625 +0.9029921131331431 +0.6723626437589605 +0.5382175776289602 +0.3749445413935266 +0.2595829718925291 +0.14421518948927226 +0.08652970395641724 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.14421518948927226 +0.7108936078332695 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9379779678995588 +0.9630732565314721 +0.9769912611037048 +0.9295793432470756 +0.8753367724102397 +0.8285407428931473 +0.7853909093635885 +0.7450954734242421 +0.7219183786078611 +1.3086185082177466 +1.4674073676353294 +2.7441662409954692 +3.5383457622682686 +4.137439776966195 +4.113765550959549 +4.131105436808975 +4.1753404548542585 +4.253456513491267 +4.3453595061404515 +4.394872548397378 +4.416217764170109 +4.426792664882731 +4.416380983659691 +4.380974111341195 +4.291189724650094 +4.18527631598984 +4.12374457827202 +4.0990733560973345 +4.115142843788829 +4.115142843788829 +4.115142843788829 +4.132088428280674 +4.282391253599503 +4.43358416697286 +4.4569885833271 +4.3318824654794925 +4.149583103528658 +4.126667610015392 +4.2986838154209295 +4.462569174514068 +4.490181602580803 +4.490181602580803 +4.381564064933234 +4.296644023914582 +4.4811159587853355 +4.502694043727065 +4.399300493996371 +4.162560670484979 +4.145078211515794 +4.315731378555368 +4.512159965648277 +4.525148982297988 +4.431281148704458 +4.16484708315636 +4.407725684983931 +4.550140350585692 +4.48209627902764 +4.514260294902164 +4.563123965563946 +4.563123965563946 +4.563123965563946 +4.583108031777054 +4.549643730922412 +4.463995556498123 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.600699506003629 +4.59511905321376 +4.6226847512678555 +4.6226847512678555 +4.571250454545515 +4.241889569491942 +4.19219040425837 +4.19219040425837 +4.462167072541683 +4.645199135042 +4.436280735418166 +4.227383303293633 +4.227383303293633 +4.227383303293633 +4.227383303293633 +4.227383303293633 +4.287058569715656 +4.552289840148148 +4.69281418415078 +4.662029905531306 +4.4512597008547115 +4.275657408429723 +4.391289528959374 +4.605035406704348 +4.716549695711502 +4.728275832763921 +4.7039874056290465 +4.622018630619488 +4.4702902099390895 +4.323181495798052 +4.2538624055405 +4.250998675515501 +4.250998675515501 +4.250998675515501 +4.250998675515501 +4.250998675515501 +4.265148157089352 +4.305089596671972 +4.386252292638426 +4.4834255156601905 +4.59029185398596 +4.673607649283862 +4.738328274948345 +4.778506435350758 +4.778506435350758 +4.743407471224992 +4.66327376376416 +4.575166900574203 +4.463516460681111 +4.270280676361232 +3.0 +2.5298116246948874 +1.3307395744181507 +0.7128431035659979 +0.7405035296384099 +0.7763091941917066 +0.8285407428931473 +0.8867520004449787 +0.9499400819692401 +0.9717007050706465 +0.9379779678995588 +0.9379779678995588 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.9157781068691486 +0.7010944029779997 +0.11537255792817191 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.14421518948927226 +0.7128431035659979 +2.3410810950415235 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.301758598159419 +2.294952421195424 +2.27228354593505 +2.2503257854353 +2.2110819526399554 +2.152489662966116 +2.0691651155283464 +2.033646161431456 +2.101115010430744 +2.2274730204316917 +2.6027536568337393 +3.5438788910224837 +4.15206094808109 +4.124605143116053 +4.132471371425174 +4.131105436808975 +4.200740871072689 +4.302784596518338 +4.382954350785997 +4.416217764170109 +4.426792664882731 +4.427387113724792 +4.393549265162711 +4.308499281988651 +4.18527631598984 +4.12374457827202 +4.115142843788829 +4.115142843788829 +4.115142843788829 +4.115142843788829 +4.210991133064981 +4.3914638956194585 +4.4569885833271 +4.377925230742242 +4.185475408577178 +4.10650491814166 +4.244565779881304 +4.43358416697286 +4.486334337166265 +4.490181602580803 +4.4676065372348495 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.3666145665109415 +4.142789166892736 +4.145078211515794 +4.401672253049345 +4.525148982297988 +4.525148982297988 +4.315731378555368 +4.224183288970262 +4.508785647745265 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.563123965563946 +4.563123965563946 +4.563123965563946 +4.556828312189985 +4.549643730922412 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.613831733428877 +4.6226847512678555 +4.44374109684803 +4.174992929751723 +4.174992929751723 +4.335175833863043 +4.6226847512678555 +4.512109974050057 +4.209959269971465 +4.213340523077767 +4.200690125825689 +4.212804137539644 +4.212804137539644 +4.353245755530516 +4.629658286286764 +4.69281418415078 +4.587726262875978 +4.341633051398518 +4.323181495798052 +4.552289840148148 +4.69066493212302 +4.716549695711502 +4.676655864461275 +4.559466509233962 +4.37907711329853 +4.2589398463924955 +4.212804137539644 +4.212804137539644 +4.212804137539644 +4.227383303293633 +4.26937230025452 +4.339552889545689 +4.423839349817026 +4.533871840011103 +4.639065325410318 +4.7124748910849155 +4.740409145179578 +4.740409145179578 +4.7124748910849155 +4.673607649283862 +4.571600036028576 +4.46182495923458 +4.383281307781995 +4.324253970781363 +4.2744320876537865 +4.264794256022425 +3.3502799459558017 +2.323831802286236 +2.1124228235671483 +2.0395990526220777 +2.040817592526905 +2.1158475591049335 +2.1971107715186284 +2.2478906171684057 +2.2777796781485313 +2.294952421195424 +2.301758598159419 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.312187166661021 +2.353889367864523 +0.9630732565314721 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6344548999581567 +2.353197272802601 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2743070608481237 +2.2503257854353 +2.224605730129962 +2.182689437410736 +2.1098134426472237 +2.015795365275975 +2.0860965787440415 +2.1511077696207925 +2.209217455389143 +2.532591650260483 +3.451852042322593 +4.18322211802512 +4.113765550959549 +4.109495134451669 +4.121129178702734 +4.154824223110502 +4.255173391471447 +4.356991511156253 +4.416380983659691 +4.427387113724792 +4.427387113724792 +4.405171091330308 +4.32489895623564 +4.209232919086878 +4.109355017580312 +4.0990733560973345 +4.0990733560973345 +4.0990733560973345 +4.1555211100661475 +4.3488481899731894 +4.454624403616235 +4.406095101803691 +4.206813094023217 +4.10650491814166 +4.206813094023217 +4.406095101803691 +4.469240513255388 +4.486334337166265 +4.490181602580803 +4.490181602580803 +4.502694043727065 +4.490181602580803 +4.333419371494481 +4.123000235015652 +4.181718697742469 +4.475186563700894 +4.525148982297988 +4.489566010408934 +4.222982331221807 +4.384942201292258 +4.525148982297988 +4.5437863107195415 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.563123965563946 +4.563123965563946 +4.563123965563946 +4.570197478760316 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.5853224209629255 +4.274291885177432 +4.15856099578416 +4.2230503108410335 +4.571250454545515 +4.58852136234472 +4.260910257904718 +4.174992929751723 +4.174992929751723 +4.186050824078031 +4.202371696128198 +4.425946698109906 +4.652239127360683 +4.652239127360683 +4.463924095533918 +4.304971199174973 +4.463924095533918 +4.662029905531306 +4.704934473220487 +4.676655864461275 +4.520325741539703 +4.323181495798052 +4.212804137539644 +4.199235542692652 +4.199235542692652 +4.227383303293633 +4.266212842735925 +4.372230331526471 +4.501631338735831 +4.622018630619488 +4.701532155459308 +4.753120216906312 +4.753120216906312 +4.726826567571509 +4.656514879187194 +4.571600036028576 +4.4768097253305825 +4.404981570241521 +4.386252292638426 +4.3936790662814325 +4.4549954627911825 +4.452705607583619 +4.337340933525442 +2.6769755810662073 +2.283297061686561 +2.160109727281195 +2.1003810347274205 +2.0 +2.09890148419316 +2.1678160426511073 +2.221728616568317 +2.2503257854353 +2.2625311142457543 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.8361751761523477 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8795831949522626 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.224605730129962 +2.182689437410736 +2.1158475591049335 +2.0331706464402766 +2.0601496310576852 +2.135183545553083 +2.1910332398368633 +2.437345322120844 +3.3627313896351043 +4.2396401002233866 +4.122553981020209 +4.109495134451669 +4.121129178702734 +4.144376911506039 +4.223228363093585 +4.342804609758586 +4.405288240837459 +4.427387113724792 +4.427387113724792 +4.405171091330308 +4.32489895623564 +4.192810323252793 +4.109355017580312 +4.0990733560973345 +4.097112727520313 +4.113407259976061 +4.286106158638396 +4.442593054589512 +4.431848211731319 +4.244565779881304 +4.10650491814166 +4.166061373551728 +4.377925230742242 +4.469240513255388 +4.472314528156556 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.296670760432378 +4.103232625359826 +4.278622723585689 +4.512159965648277 +4.525148982297988 +4.4182170674180945 +4.242051477075593 +4.503640969757196 +4.525148982297988 +4.530776542855393 +4.5437863107195415 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.563123965563946 +4.563123965563946 +4.563123965563946 +4.570197478760316 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.497832329201184 +4.163516600021028 +4.1475510772649535 +4.456666181535883 +4.6226847512678555 +4.368581807425848 +4.15856099578416 +4.174992929751723 +4.15856099578416 +4.201549923241762 +4.480408150189437 +4.645199135042 +4.588824563121978 +4.353245755530516 +4.353245755530516 +4.5799545547395955 +4.69281418415078 +4.679949111556313 +4.552289840148148 +4.315633110904785 +4.212804137539644 +4.199235542692652 +4.212804137539644 +4.287058569715656 +4.410395058116138 +4.559466509233962 +4.654819004174287 +4.7039874056290465 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.728275832763921 +4.728397247583595 +4.7124748910849155 +4.690283055605783 +4.690283055605783 +4.706471321315166 +4.723400793471849 +4.76455519928248 +4.681845242735892 +4.411514045438016 +2.674267853333106 +2.2392507225367644 +2.155483910477752 +2.1003810347274205 +2.0 +2.1038052061323396 +2.17374086367226 +2.224605730129962 +2.2503257854353 +2.2743070608481237 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.317246029935818 +0.7785209922500709 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.7128431035659979 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.229462216113617 +2.1937629269159817 +2.1367106068549404 +2.0512707473844163 +2.0436562698390768 +2.1312372430148407 +2.170541996586407 +2.3420968789349415 +3.279704955838343 +3.6647777611972554 +4.1645865807695905 +4.113765550959549 +4.113175748504369 +4.122553981020209 +4.208892782922307 +4.327726831947551 +4.405288240837459 +4.427387113724792 +4.427387113724792 +4.405171091330308 +4.3117422270487165 +4.170693269388646 +4.082166585709781 +4.082166585709781 +4.082166585709781 +4.231867605852527 +4.418012974920095 +4.442593054589512 +4.284882423026124 +4.10650491814166 +4.13051993625085 +4.347517694966176 +4.469240513255388 +4.469240513255388 +4.472314528156556 +4.490181602580803 +4.490181602580803 +4.4676065372348495 +4.242436409101377 +4.1235014442649724 +4.381564064933234 +4.519855048515984 +4.512159965648277 +4.315731378555368 +4.384942201292258 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.530776542855393 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.563123965563946 +4.563123965563946 +4.563123965563946 +4.583108031777054 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.37650047067259 +4.113710939482175 +4.286633084295397 +4.577601706931892 +4.505917988700361 +4.19219040425837 +4.15856099578416 +4.1431582649902525 +4.241889569491942 +4.529291803534183 +4.6356545149438695 +4.512109974050057 +4.299288615843937 +4.449325548723675 +4.65085686650051 +4.69281418415078 +4.5968888406633095 +4.372230331526471 +4.227383303293633 +4.212804137539644 +4.269511373603581 +4.40691351739986 +4.570132142342086 +4.662029905531306 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.728275832763921 +4.728275832763921 +4.75182931192929 +4.764851533139823 +4.778088299149454 +4.664294722906742 +3.563422141832019 +2.6121534849123687 +2.212819215323622 +2.1511077696207925 +2.0860965787440415 +2.015795365275975 +2.09890148419316 +2.17374086367226 +2.224605730129962 +2.255849667444121 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.4046939094161646 +0.40378354047257137 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.17305757537147226 +1.4612614002956033 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.237685607857683 +2.202927022229015 +2.148386749763698 +2.0691651155283464 +2.02662213211285 +2.1140001744364625 +2.160109727281195 +2.2476747825223162 +2.6608724675715303 +3.5197459726697047 +4.2253855461188 +4.147219722065306 +4.11464846424741 +4.122553981020209 +4.210936623961439 +4.342804609758586 +4.416380983659691 +4.427387113724792 +4.427387113724792 +4.393549265162711 +4.2770241344525415 +4.132106904228244 +4.082166585709781 +4.082166585709781 +4.17835618067723 +4.391267854158244 +4.4525225151227055 +4.332945081527638 +4.113407259976061 +4.10650491814166 +4.316206246689385 +4.4569885833271 +4.469240513255388 +4.469240513255388 +4.486334337166265 +4.490181602580803 +4.4538393189583685 +4.188330631831401 +4.163998999246982 +4.4538393189583685 +4.514890985676352 +4.473282043330933 +4.368049152440184 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.5437863107195415 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.563123965563946 +4.563123965563946 +4.563123965563946 +4.563123965563946 +4.583108031777054 +4.563841360958199 +4.247524644265324 +4.15491515763151 +4.508358881478538 +4.563841360958199 +4.247524644265324 +4.129245250799048 +4.1431582649902525 +4.260910257904718 +4.539257872104618 +4.6226847512678555 +4.512109974050057 +4.3517175586913135 +4.539257872104618 +4.6356545149438695 +4.645199135042 +4.488865705516085 +4.248918520191973 +4.232147173910067 +4.353245755530516 +4.534232616473268 +4.646852001521213 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.7039874056290465 +4.7039874056290465 +4.7039874056290465 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.716549695711502 +4.7039874056290465 +4.6870297623887245 +4.649129891979821 +4.438823716960622 +3.474742796347404 +2.5304056759265983 +2.192769312606066 +2.1432617429014895 +2.0601496310576852 +2.0331706464402766 +2.1158475591049335 +2.182689437410736 +2.224605730129962 +2.255849667444121 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +1.4574341235805166 +0.17305757537147226 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4133322168253235 +2.3983024946691054 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.2478906171684057 +2.2110819526399554 +2.1584557623803704 +2.0866931654730516 +2.0111326748592737 +2.0960825945482244 +2.155483910477752 +2.2082431896030905 +2.5435740731531253 +3.3689649963112624 +4.285393434112448 +4.190487592151753 +4.122553981020209 +4.113175748504369 +4.221266037517113 +4.3549764147420245 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.367190816108014 +4.227753174233639 +4.094693968165638 +4.069462169076174 +4.122164405528604 +4.350012581677216 +4.4525225151227055 +4.350012581677216 +4.137792226867703 +4.088453787285082 +4.298762055403042 +4.4569885833271 +4.469240513255388 +4.469240513255388 +4.472314528156556 +4.486334337166265 +4.410905534283973 +4.126667610015392 +4.279174511318519 +4.490181602580803 +4.502694043727065 +4.4811159587853355 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.5437863107195415 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.563123965563946 +4.563123965563946 +4.563123965563946 +4.502706439240199 +4.15491515763151 +4.38052243010171 +4.590893017970352 +4.4313653771533 +4.129245250799048 +4.129245250799048 +4.267104421859435 +4.541557094775911 +4.613831733428877 +4.586658531412266 +4.47698040719883 +4.603447413405319 +4.6356545149438695 +4.609159296939702 +4.406415144859124 +4.274291885177432 +4.430539033190921 +4.588824563121978 +4.666403422086015 +4.679949111556313 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.69281418415078 +4.676655864461275 +4.638630209477494 +4.587726262875978 +4.520325741539703 +4.441583484616433 +4.341633051398518 +4.234828115179514 +3.3943692655669393 +2.426725364121725 +2.1865790505141085 +2.135183545553083 +2.0601496310576852 +2.045597015308826 +2.1367106068549404 +2.1971107715186284 +2.237685607857683 +2.2625311142457543 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.7163871050283115 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8256255235064831 +2.326976269832184 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2690991488719328 +2.2503257854353 +2.224605730129962 +2.17374086367226 +2.1038052061323396 +2.02739461766745 +2.0711438279723438 +2.1432617429014895 +2.1763775028439323 +2.403163833643906 +3.2700437837185357 +4.327356979995596 +4.240845765526305 +4.144376911506039 +4.134202790326104 +4.240845765526305 +4.380386429263759 +4.427387113724792 +4.427387113724792 +4.416380983659691 +4.32489895623564 +4.14271795783301 +4.060376351998211 +4.107624531009424 +4.305505191501973 +4.44092500022344 +4.378043959316646 +4.1613904777964095 +4.082166585709781 +4.298762055403042 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.472314528156556 +4.365645075900559 +4.126667610015392 +4.395780918368115 +4.502694043727065 +4.502694043727065 +4.502694043727065 +4.507264102733968 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.530776542855393 +4.5437863107195415 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.563123965563946 +4.405511387167781 +4.204366011250862 +4.5350396637789485 +4.549643730922412 +4.213656267826046 +4.113710939482175 +4.282757389600612 +4.55656589041628 +4.590893017970352 +4.590893017970352 +4.599002836703154 +4.6226847512678555 +4.6226847512678555 +4.6226847512678555 +4.5227409419139475 +4.512109974050057 +4.609159296939702 +4.6356545149438695 +4.648026828159334 +4.657957256979294 +4.668188175579596 +4.679949111556313 +4.666403422086015 +4.645399894198835 +4.5968888406633095 +4.531977395390569 +4.444957838142898 +4.353245755530516 +4.283930830651066 +4.242808832535492 +4.228811106557455 +4.157308012802116 +4.295952195528659 +3.3338712248557067 +2.3211831636053715 +2.1673562403753768 +2.1186411559003946 +2.0436562698390768 +2.063727005490863 +2.1367106068549404 +2.1971107715186284 +2.237685607857683 +2.2625311142457543 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.9045078427753417 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8121773597308399 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2743070608481237 +2.2503257854353 +2.224605730129962 +2.1885638406774586 +2.1323446946695777 +2.0512707473844163 +2.0436562698390768 +2.122787738844958 +2.1673562403753768 +2.301877100132785 +2.7408362706092446 +3.5730728061118344 +4.270567858673243 +4.157675480838209 +4.1656882766508945 +4.285908874252486 +4.405288240837459 +4.427387113724792 +4.427387113724792 +4.392079691227801 +4.247576635145128 +4.0987968379282975 +4.084101723741918 +4.256447689150627 +4.429161646579058 +4.391267854158244 +4.18527631598984 +4.082166585709781 +4.317872846720725 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.282391253599503 +4.206280547491993 +4.4676065372348495 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.502694043727065 +4.519855048515984 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.530776542855393 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.298852288588964 +4.387892251749726 +4.563123965563946 +4.387892251749726 +4.1183732023423625 +4.342653094159319 +4.571165324280725 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.613831733428877 +4.6226847512678555 +4.6226847512678555 +4.6356545149438695 +4.6356545149438695 +4.6356545149438695 +4.609159296939702 +4.571250454545515 +4.500777606857457 +4.392622212842695 +4.305664264019257 +4.248918520191973 +4.2101709130931395 +4.1840024052794655 +4.172922335496928 +4.200639673579342 +4.200639673579342 +4.141719569647896 +3.5468119914791725 +2.5827129410220424 +2.2487592108855945 +2.155483910477752 +2.1003810347274205 +2.0111326748592737 +2.0866931654730516 +2.152489662966116 +2.2110819526399554 +2.237685607857683 +2.2625311142457543 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.353197272802601 +0.6723626437589605 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.06854719057135394 +1.3237599323266105 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.237685607857683 +2.1971107715186284 +2.148386749763698 +2.0750586643658098 +2.019145534516432 +2.105457950100802 +2.160109727281195 +2.2350387770506743 +2.633162161009022 +3.4669981438100352 +4.283524110580276 +4.190487592151753 +4.208892782922307 +4.342804609758586 +4.427387113724792 +4.427387113724792 +4.416380983659691 +4.327726831947551 +4.15362856723562 +4.076735249363886 +4.221636875083904 +4.418012974920095 +4.405516093367723 +4.206980666498831 +4.094693968165638 +4.3488481899731894 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.446713403848863 +4.210991133064981 +4.331245648109325 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.502694043727065 +4.519855048515984 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.5437863107195415 +4.536738078402639 +4.438213139075817 +4.536738078402639 +4.528831176637506 +4.241075969322648 +4.387892251749726 +4.570197478760316 +4.583108031777054 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.603682897163525 +4.613831733428877 +4.59511905321376 +4.512109974050057 +4.387552875155975 +4.2549350594948505 +4.165009881185526 +4.1431582649902525 +4.127470160894371 +4.141950580579023 +4.156753372551671 +4.172922335496928 +4.2160995736710225 +4.21602102689975 +4.240735018037144 +3.3857054816622654 +2.4425861609117265 +2.197496488944807 +2.1432617429014895 +2.0711438279723438 +2.0194533506514256 +2.09890148419316 +2.1678160426511073 +2.2168333461740244 +2.2503257854353 +2.2625311142457543 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.432451996876125 +0.3172647300558147 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.22140021244840558 +1.5281001994867136 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.2503257854353 +2.2110819526399554 +2.163970843013399 +2.09890148419316 +2.0112792435250872 +2.0860965787440415 +2.1511077696207925 +2.1927208376298393 +2.5255381483758743 +3.3582972295480533 +4.292846626911027 +4.213979226326154 +4.273346012383912 +4.394136322099213 +4.427387113724792 +4.427387113724792 +4.392079691227801 +4.221266037517113 +4.09250926319951 +4.192810323252793 +4.405516093367723 +4.418012974920095 +4.221636875083904 +4.119144903907343 +4.377713549555899 +4.4664508108639795 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.406095101803691 +4.227703995652505 +4.446713403848863 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.502694043727065 +4.507264102733968 +4.519855048515984 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.5437863107195415 +4.550140350585692 +4.466517186829626 +4.4547427227222975 +4.563123965563946 +4.563123965563946 +4.563123965563946 +4.570197478760316 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.590893017970352 +4.524127578822819 +4.37650047067259 +4.213656267826046 +4.1431582649902525 +4.1169613292012635 +4.107173483038939 +4.1169613292012635 +4.1151921680667005 +4.1415828455394195 +4.19219040425837 +4.308646412990123 +4.338128996657249 +4.353505525188759 +2.704765968797729 +2.3065407450626294 +2.1673562403753768 +2.122787738844958 +2.0436562698390768 +2.045597015308826 +2.1264928868542414 +2.182689437410736 +2.224605730129962 +2.2503257854353 +2.2743070608481237 +2.2796572735563765 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +1.4017247053402326 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5382175776289602 +2.384596050189881 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2743070608481237 +2.2503257854353 +2.224605730129962 +2.182689437410736 +2.1204808889673323 +2.0331706464402766 +2.066570901959439 +2.139334340219923 +2.1763775028439323 +2.424058950280588 +3.2807022858567167 +4.305299409549234 +4.275546918294798 +4.356613498206495 +4.416380983659691 +4.427387113724792 +4.416380983659691 +4.298727598411171 +4.113175748504369 +4.166537627720674 +4.391323950112836 +4.429161646579058 +4.241940840533436 +4.155611178860544 +4.403952440507338 +4.4525225151227055 +4.4664508108639795 +4.469240513255388 +4.469240513255388 +4.363005348045478 +4.377662548191024 +4.469240513255388 +4.472314528156556 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.502694043727065 +4.507264102733968 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.530776542855393 +4.536738078402639 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.563123965563946 +4.563123965563946 +4.563123965563946 +4.583108031777054 +4.590893017970352 +4.508358881478538 +4.323535752524507 +4.15491515763151 +4.099911233827896 +4.099911233827896 +4.088606037180334 +4.1053137652653495 +4.127470160894371 +4.201549923241762 +4.354374661974843 +4.5030960939417675 +4.451614189945248 +3.469949565112751 +2.5867012767877045 +2.221558662260198 +2.155483910477752 +2.1003810347274205 +2.0157495197110955 +2.0750586643658098 +2.148386749763698 +2.1971107715186284 +2.237685607857683 +2.2625311142457543 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.7569148146041229 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.9379779678995588 +2.2965955499850623 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.280529132403043 +2.2743070608481237 +2.2625311142457543 +2.237685607857683 +2.1937629269159817 +2.1323446946695777 +2.05698502564009 +2.0436562698390768 +2.122787738844958 +2.170541996586407 +2.3508959098624618 +3.260182981301824 +4.322349160759665 +4.346962517011327 +4.417897362922753 +4.427387113724792 +4.427387113724792 +4.353117274877057 +4.144376911506039 +4.154824223110502 +4.3786204442699175 +4.429161646579058 +4.256447689150627 +4.206980666498831 +4.430380826279388 +4.4525225151227055 +4.454624403616235 +4.4664508108639795 +4.469240513255388 +4.4569885833271 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.472314528156556 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.502694043727065 +4.519855048515984 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.530776542855393 +4.550140350585692 +4.550140350585692 +4.550140350585692 +4.563123965563946 +4.563123965563946 +4.512329152144901 +4.317592641512847 +4.128740395675791 +4.099911233827896 +4.099911233827896 +4.088606037180334 +4.099911233827896 +4.15491515763151 +4.321414521587279 +4.488817061485224 +4.59511905321376 +4.447806976940592 +3.337450436572998 +2.442590054821959 +2.1828331778914043 +2.1432617429014895 +2.0758977822449705 +2.015795365275975 +2.1038052061323396 +2.1678160426511073 +2.2110819526399554 +2.2503257854353 +2.2625311142457543 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2965955499850623 +0.9630732565314721 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7647373658980348 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.237685607857683 +2.2079729546579676 +2.152489662966116 +2.0750586643658098 +2.032562994261107 +2.1186411559003946 +2.1621169737505186 +2.306811880582651 +3.2449725942218595 +3.6216779421851166 +4.416217764170109 +4.428862183491362 +4.427387113724792 +4.393549265162711 +4.208892782922307 +4.144376911506039 +4.365873551370795 +4.428170372024604 +4.291189724650094 +4.289528505696501 +4.44092500022344 +4.4525225151227055 +4.4525225151227055 +4.454624403616235 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.486334337166265 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.507264102733968 +4.519855048515984 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.5437863107195415 +4.550140350585692 +4.536738078402639 +4.404476728520448 +4.145078211515794 +4.057266954196868 +4.057266954196868 +4.072289591672341 +4.101269072697191 +4.227912298396953 +4.415569757481522 +4.565650682473864 +4.590893017970352 +4.408635309066533 +3.275551551235999 +2.340876918824316 +2.1673562403753768 +2.122787738844958 +2.0436562698390768 +2.045597015308826 +2.1264928868542414 +2.182689437410736 +2.224605730129962 +2.2503257854353 +2.2743070608481237 +2.2796572735563765 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.3772600626078293 +0.547964844339119 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +1.3905844909148 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.2503257854353 +2.2110819526399554 +2.152489662966116 +2.081518910697424 +2.0157495197110955 +2.105457950100802 +2.160109727281195 +2.2777873993509488 +2.7666474563244394 +3.610811338896543 +4.416217764170109 +4.428862183491362 +4.416380983659691 +4.2664459887944055 +4.15362856723562 +4.3549764147420245 +4.427387113724792 +4.338149361251756 +4.36476001222331 +4.44092500022344 +4.4525225151227055 +4.4525225151227055 +4.4525225151227055 +4.454624403616235 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.472314528156556 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.502694043727065 +4.507264102733968 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.489566010408934 +4.280004643579906 +4.057266954196868 +4.044951545574025 +4.044951545574025 +4.073248530971775 +4.201961535726061 +4.421450634493992 +4.565584671587924 +4.583108031777054 +4.405809921245526 +2.7324485083436683 +2.2768529630361805 +2.160109727281195 +2.105457950100802 +2.019145534516432 +2.0750586643658098 +2.152489662966116 +2.1971107715186284 +2.237685607857683 +2.2625311142457543 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +1.5354439345570259 +0.24055939460771647 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3172647300558147 +2.4372360353739517 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2743070608481237 +2.2503257854353 +2.2110819526399554 +2.1678160426511073 +2.1038052061323396 +2.015795365275975 +2.0860965787440415 +2.1570620246091714 +2.2777873993509488 +2.7666474563244394 +3.6254519576508906 +4.428211004156933 +4.428862183491362 +4.3117422270487165 +4.1753404548542585 +4.3549764147420245 +4.427387113724792 +4.405239266363791 +4.429161646579058 +4.44092500022344 +4.44092500022344 +4.4525225151227055 +4.4525225151227055 +4.4525225151227055 +4.4664508108639795 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.472314528156556 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.502694043727065 +4.507264102733968 +4.525148982297988 +4.525148982297988 +4.525148982297988 +4.443596648081142 +4.162310183365068 +4.020098689803449 +4.020098689803449 +4.044951545574025 +4.145078211515794 +4.387892251749726 +4.544787027092619 +4.563123965563946 +4.397484734887035 +2.719898488397276 +2.2623212663256167 +2.163555712820396 +2.1003810347274205 +2.0 +2.092668508103671 +2.1678160426511073 +2.2110819526399554 +2.2503257854353 +2.2625311142457543 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +1.334894959804494 +0.06854719057135394 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6148520064161902 +2.3452889562727264 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2764066145123847 +2.2743070608481237 +2.2503257854353 +2.224605730129962 +2.182689437410736 +2.1098134426472237 +2.015795365275975 +2.0860965787440415 +2.1570620246091714 +2.2948898705815313 +3.2382221208941813 +4.374914026316517 +4.430458479623737 +4.342804609758586 +4.284364652204422 +4.416380983659691 +4.427387113724792 +4.427387113724792 +4.428170372024604 +4.44092500022344 +4.44092500022344 +4.44092500022344 +4.4525225151227055 +4.4525225151227055 +4.454624403616235 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.486334337166265 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.502694043727065 +4.502694043727065 +4.502694043727065 +4.519855048515984 +4.415390784022273 +4.103232625359826 +4.020098689803449 +4.020098689803449 +4.040649006191388 +4.278622723585689 +4.494288594971252 +4.564735477435103 +4.405058310646361 +3.2627492086774588 +2.2732041608987807 +2.1665875672999495 +2.1003810347274205 +2.0 +2.09890148419316 +2.17374086367226 +2.224605730129962 +2.2503257854353 +2.2743070608481237 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.8013943068221954 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.9237561989163847 +2.2965955499850623 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.224605730129962 +2.182689437410736 +2.1098134426472237 +2.015795365275975 +2.0960825945482244 +2.1594910626532013 +2.424058950280588 +3.4033593703152687 +4.403607540711773 +4.417897362922753 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.428170372024604 +4.44092500022344 +4.44092500022344 +4.44092500022344 +4.4525225151227055 +4.4525225151227055 +4.454624403616235 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.472314528156556 +4.486334337166265 +4.490181602580803 +4.490181602580803 +4.490181602580803 +4.502694043727065 +4.395780918368115 +4.0712481374985465 +4.01960399960008 +4.020098689803449 +4.142890501858993 +4.415390784022273 +4.532451043151799 +4.434404132986627 +3.2997923574897783 +2.3214444725064065 +2.160109727281195 +2.1003810347274205 +2.0 +2.09890148419316 +2.1678160426511073 +2.224605730129962 +2.255849667444121 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.312187166661021 +0.8256255235064831 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7238182390859009 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.224605730129962 +2.182689437410736 +2.1038052061323396 +2.0157495197110955 +2.1312372430148407 +2.193652390301639 +2.7386357871628335 +4.377148473974314 +4.428211004156933 +4.426792664882731 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.428170372024604 +4.439493703582002 +4.44092500022344 +4.44092500022344 +4.4525225151227055 +4.4525225151227055 +4.4525225151227055 +4.454624403616235 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.472314528156556 +4.490181602580803 +4.490181602580803 +4.395780918368115 +4.073910026193216 +4.041787974963054 +4.028346058775132 +4.279174511318519 +4.491440034664813 +4.518215987060842 +3.475078474720889 +2.464962024504153 +2.1594258087242073 +2.105457950100802 +2.0 +2.09890148419316 +2.1678160426511073 +2.224605730129962 +2.255849667444121 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.395732822205279 +0.44213125973792344 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.17305757537147226 +1.4351488673587052 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.221728616568317 +2.152489662966116 +2.05698502564009 +2.0758977822449705 +2.1570620246091714 +2.474079002171915 +3.550701921872128 +4.428211004156933 +4.416217764170109 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.428170372024604 +4.44092500022344 +4.44092500022344 +4.44092500022344 +4.4525225151227055 +4.4525225151227055 +4.454624403616235 +4.4664508108639795 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.421815150590111 +4.0990733560973345 +4.057108257194596 +4.0712481374985465 +4.363548074929932 +4.508911515906129 +4.410566038220873 +2.7304609129191033 +2.197713610869064 +2.122787738844958 +2.02662213211285 +2.092668508103671 +2.17374086367226 +2.224605730129962 +2.255849667444121 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +1.4797210190180161 +0.17305757537147226 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.38453231097115825 +2.4107914727036284 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.280529132403043 +2.2743070608481237 +2.2503257854353 +2.1971107715186284 +2.1098134426472237 +2.0111326748592737 +2.1312372430148407 +2.3009344856470513 +3.4092698519760596 +4.416211745185024 +4.416211745185024 +4.416217764170109 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.428170372024604 +4.44092500022344 +4.44092500022344 +4.44092500022344 +4.4525225151227055 +4.4525225151227055 +4.454624403616235 +4.469240513255388 +4.469240513255388 +4.469240513255388 +4.4569885833271 +4.184104289765669 +4.07241892641154 +4.097112727520313 +4.410905534283973 +4.508911515906129 +3.602433982163511 +2.481687647876782 +2.1536364274697855 +2.066570901959439 +2.063727005490863 +2.152489662966116 +2.224605730129962 +2.2503257854353 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.7028916044659059 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7683375209644598 +2.3363209725409417 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.224605730129962 +2.152489662966116 +2.045597015308826 +2.1003810347274205 +2.2242764561854735 +3.3279088601467954 +4.404716270919072 +4.416211745185024 +4.416211745185024 +4.426792664882731 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.439493703582002 +4.44092500022344 +4.44092500022344 +4.44092500022344 +4.4525225151227055 +4.4525225151227055 +4.454624403616235 +4.469240513255388 +4.316214637717017 +4.062586990861675 +4.1088251299545835 +4.405105971950175 +4.487951122439035 +3.4996046386563355 +2.326661896488758 +2.1312372430148407 +2.0111326748592737 +2.1204808889673323 +2.1971107715186284 +2.2503257854353 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.8753367724102397 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.841423246189211 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.280529132403043 +2.2743070608481237 +2.237685607857683 +2.182689437410736 +2.081518910697424 +2.077355219671662 +2.2023661849891183 +3.332339341975471 +4.405992259746327 +4.416211745185024 +4.416211745185024 +4.416217764170109 +4.426792664882731 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.428170372024604 +4.439493703582002 +4.44092500022344 +4.44092500022344 +4.4525225151227055 +4.4525225151227055 +4.418012974920095 +4.085785441124662 +4.096141743038126 +4.406095101803691 +4.486334337166265 +3.462924881186887 +2.2667210007255236 +2.109810149006344 +2.038449217047763 +2.163970843013399 +2.224605730129962 +2.2625311142457543 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.3410810950415235 +0.7108936078332695 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.6982561460305812 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2503257854353 +2.1971107715186284 +2.09890148419316 +2.0625720845185223 +2.223400939437198 +3.4153326189907474 +4.416217764170109 +4.404716270919072 +4.416211745185024 +4.416211745185024 +4.416217764170109 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.428170372024604 +4.44092500022344 +4.44092500022344 +4.4525225151227055 +4.275317971489297 +4.090813886481587 +4.363885490652064 +4.482790556910679 +3.526665827705376 +2.2838577347402804 +2.105457950100802 +2.05698502564009 +2.179096074810225 +2.237685607857683 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.4253402630005265 +0.3557324805227413 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.19262923125003173 +1.505818593254138 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.2110819526399554 +2.1038052061323396 +2.077355219671662 +2.3136782418210164 +3.572769228786013 +4.405568769612216 +4.404716270919072 +4.404716270919072 +4.416211745185024 +4.416211745185024 +4.416217764170109 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.428170372024604 +4.417257564352126 +4.143145871693728 +4.275317971489297 +4.467975145367171 +4.37888240499102 +2.3841071414305532 +2.105457950100802 +2.05698502564009 +2.182689437410736 +2.2503257854353 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +1.4240069420453962 +0.11537255792817191 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4997231449871 +2.395732822205279 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2625311142457543 +2.1971107715186284 +2.0866931654730516 +2.0960825945482244 +2.5253339155275762 +4.365929836233456 +4.405992259746327 +4.404716270919072 +4.404716270919072 +4.416211745185024 +4.416211745185024 +4.416211745185024 +4.426792664882731 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.353117274877057 +4.221266037517113 +4.429161646579058 +4.410753479930216 +2.61433650252718 +2.105135466890179 +2.05698502564009 +2.182689437410736 +2.2503257854353 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.7425722277764768 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8825278501371363 +2.3071218036676138 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2503257854353 +2.182689437410736 +2.0512707473844163 +2.115923216982885 +3.2172127875054515 +4.395466391668603 +4.394440664701463 +4.404716270919072 +4.404716270919072 +4.404716270919072 +4.416211745185024 +4.416211745185024 +4.416217764170109 +4.427387113724792 +4.427387113724792 +4.427387113724792 +4.3549764147420245 +4.393549265162711 +4.440985698726188 +3.2651300009437985 +2.1173813288353065 +2.0331706464402766 +2.182689437410736 +2.2503257854353 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2965955499850623 +0.9650712301005928 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7818913621104633 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2478906171684057 +2.163970843013399 +2.024242880540455 +2.224950265230117 +3.532962738921775 +4.405992259746327 +4.395466391668603 +4.394440664701463 +4.404716270919072 +4.404716270919072 +4.404716270919072 +4.416211745185024 +4.416211745185024 +4.416217764170109 +4.427387113724792 +4.427387113724792 +4.440985698726188 +3.6030567214020692 +2.2879955242179557 +2.0263674134864393 +2.152489662966116 +2.237685607857683 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.366575084333111 +0.5860800426151513 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.06854719057135394 +1.368306154887538 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.224605730129962 +2.1204808889673323 +2.0711438279723438 +2.668617332320399 +4.385295485648465 +4.395466391668603 +4.395466391668603 +4.395466391668603 +4.394440664701463 +4.404716270919072 +4.404716270919072 +4.404716270919072 +4.416211745185024 +4.416211745185024 +4.426792664882731 +4.4149076540614445 +2.7461677393350667 +2.0758977822449705 +2.1204808889673323 +2.224605730129962 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.4449570708859434 +0.2693452599052222 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2884240647434684 +1.5465893039290308 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.280529132403043 +2.2625311142457543 +2.1971107715186284 +2.0581063231035372 +2.1682995337015143 +3.47930344235551 +4.395736408546402 +4.395466391668603 +4.395466391668603 +4.395466391668603 +4.395466391668603 +4.394440664701463 +4.404716270919072 +4.404716270919072 +4.416211745185024 +4.428211004156933 +3.5561228267406797 +2.2092482937679585 +2.054713117792171 +2.1971107715186284 +2.2625311142457543 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +1.3571681680736565 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5767970611512538 +2.353889367864523 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.237685607857683 +2.152489662966116 +2.0353944423380357 +2.7020283218669165 +4.386125747786783 +4.395466391668603 +4.395466391668603 +4.395466391668603 +4.395466391668603 +4.395466391668603 +4.394440664701463 +4.404716270919072 +4.404716270919072 +4.405992259746327 +2.7731637760483516 +2.0436562698390768 +2.148386749763698 +2.237685607857683 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.7763091941917066 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.9650712301005928 +2.2965955499850623 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2110819526399554 +2.071215874545916 +2.250843516412312 +3.6020738028972716 +4.386876885101057 +4.387147043291084 +4.395466391668603 +4.395466391668603 +4.395466391668603 +4.395466391668603 +4.395466391668603 +4.405992259746327 +4.3666498419454385 +2.298157327804374 +2.0581063231035372 +2.2110819526399554 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.3071218036676138 +0.8825278501371363 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7359825258205799 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2503257854353 +2.152489662966116 +2.046265922153117 +3.233043394124209 +4.388327680595662 +4.387147043291084 +4.387147043291084 +4.385615758860399 +4.395466391668603 +4.395466391668603 +4.395466391668603 +4.407428326716359 +3.286739248517872 +2.0558405821318013 +2.1367106068549404 +2.2503257854353 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.384596050189881 +0.48061248750569696 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.14421518948927226 +1.412865539386467 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2625311142457543 +2.1971107715186284 +2.040107581316896 +2.6193909090739607 +4.369061359433033 +4.377747634560744 +4.377747634560744 +4.387147043291084 +4.387147043291084 +4.385615758860399 +4.395466391668603 +4.385295485648465 +2.6466510922053246 +2.0355882720041394 +2.1971107715186284 +2.2625311142457543 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +1.494678486305875 +0.2018996889437794 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3461049112705705 +2.419971038201888 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.2350331725121304 +2.104241453393035 +2.2875914460808904 +4.359769301852005 +4.378973407053157 +4.377747634560744 +4.377747634560744 +4.377747634560744 +4.387147043291084 +4.386876885101057 +4.3680123619119815 +2.302239888941486 +2.104241453393035 +2.224605730129962 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.6951452758865653 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7108936078332695 +2.3452889562727264 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2503257854353 +2.152489662966116 +2.1063363848303007 +3.427842944333051 +4.380396058849706 +4.379738179877462 +4.377747634560744 +4.377747634560744 +4.377747634560744 +4.388327680595662 +3.414657216759596 +2.099288576514305 +2.152489662966116 +2.2503257854353 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +0.8495621167939733 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8704047159223176 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2625311142457543 +2.1971107715186284 +2.02739461766745 +3.2154644369067613 +4.380396058849706 +4.370810227989025 +4.379738179877462 +4.379738179877462 +4.377747634560744 +4.388327680595662 +3.181556897644701 +2.02739461766745 +2.1971107715186284 +2.2625311142457543 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.326976269832184 +0.7584815939613518 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.7077485255828968 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2743070608481237 +2.224605730129962 +2.075416544172951 +2.81649189746334 +4.363184059378072 +4.370810227989025 +4.370810227989025 +4.370810227989025 +4.379738179877462 +4.378757756023827 +2.7134039666737695 +2.0783683466684364 +2.224605730129962 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.41660567849633 +0.39419237910002547 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +1.4017247053402326 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.2838303016383614 +2.237685607857683 +2.104241453393035 +2.6666666666666665 +4.356449779411873 +4.364768579242243 +4.361976979184238 +4.370810227989025 +4.370810227989025 +4.367924906446838 +2.4944548376163715 +2.120374153703379 +2.237685607857683 +2.2743070608481237 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +2.285659673582315 +1.3460310727738563 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.9533025720683259 +1.4574341235805166 +1.494678486305875 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.494678486305875 +1.531709664562058 +2.4127829664478506 +2.6018431061745493 +4.347339821427706 +4.356449779411873 +4.364768579242243 +4.364768579242243 +4.370810227989025 +3.5824983806496986 +2.4769229983066006 +2.4218410027163504 +1.5242987993988644 +1.494678486305875 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.4908650045033593 +1.494678486305875 +1.4908650045033593 +0.6951452758865653 +0.4614592400945998 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.17305757537147226 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.230741499582245 +0.3172647300558147 +0.48061248750569696 +0.766750836091826 +4.395454975696837 +4.356449779411873 +4.356449779411873 +4.356449779411873 +4.364768579242243 +3.3901628360753846 +0.8935964092522806 +0.451806660765417 +0.2884240647434684 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.22140021244840558 +0.2018996889437794 +0.06854719057135394 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.2018996889437794 +0.7831407078010928 +3.5526195691168794 +4.3586769260880995 +4.3586769260880995 +4.356449779411873 +4.356449779411873 +3.3626279895276285 +0.6695114446410386 +0.17305757537147226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.19262923125003173 +0.7805011123938099 +3.518119533002176 +4.354329469367648 +4.3617847327443595 +4.3586769260880995 +4.356449779411873 +3.358640744665906 +0.6054987150990874 +0.17305757537147226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7778914672149311 +3.476402583504967 +4.34704458715414 +4.354329469367648 +4.354329469367648 +4.362650556348973 +3.356648080755124 +0.519387512494303 +0.17305757537147226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7804767966616 +3.45435772355868 +4.350730623451057 +4.34704458715414 +4.354329469367648 +4.355194967601397 +3.3620486854417333 +0.4614592400945998 +0.17305757537147226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7754850383014231 +3.4222154472822863 +4.3433457326275935 +4.3433457326275935 +4.350730623451057 +4.355194967601397 +0.6146514861726686 +0.40378354047257137 +0.14421518948927226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7688158416003859 +3.413519709439906 +4.3433457326275935 +4.3433457326275935 +4.3433457326275935 +4.350730623451057 +0.626196231396948 +0.3749445413935266 +0.11537255792817191 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7796188342366239 +3.403334004770977 +4.347234887382945 +4.3433457326275935 +4.3433457326275935 +4.3433457326275935 +0.6381535476845652 +0.3172647300558147 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.336579979066304 +0.5670150445251751 +0.5762468110472865 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5670150445251751 +0.5762468110472865 +0.451806660765417 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7796188342366239 +3.4025832397127886 +4.340961913960934 +4.340961913960934 +4.347234887382945 +4.3433457326275935 +0.6471225330532464 +0.30777693354095703 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5285140666175412 +0.8495621167939733 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8268653780234594 +0.5382175776289602 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7796188342366239 +3.401876210713215 +4.334910196094188 +4.340961913960934 +4.340961913960934 +4.340961913960934 +0.6567269107919813 +0.30777693354095703 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.963774036650098 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.5958076617353987 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7551530895194016 +3.3881187366326473 +4.339190019517164 +4.339190019517164 +4.334910196094188 +4.340961913960934 +0.6633429124174155 +0.30777693354095703 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.230741499582245 +0.8674260761271935 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8268653780234594 +0.39419237910002547 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7551530895194016 +3.3872926401828645 +4.339190019517164 +4.339190019517164 +4.339190019517164 +4.339190019517164 +0.6701438322619957 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.40378354047257137 +0.8268653780234594 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8829731209921494 +0.21178330071085139 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7796188342366239 +3.3978458064751607 +4.337967397763282 +4.333620133481415 +4.339190019517164 +4.339190019517164 +0.6749865177929246 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6336557862849546 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.9550413197418148 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7796188342366239 +3.3978458064751607 +4.332854430148258 +4.332854430148258 +4.333620133481415 +4.333620133481415 +0.6826826421251191 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.8613593131169246 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.7683375209644598 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7674288466362427 +3.4006038179412155 +4.332854430148258 +4.332854430148258 +4.332854430148258 +4.332854430148258 +0.6826826421251191 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1255917803265003 +0.9342501145827482 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.54752294636826 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7749854823300453 +3.415365057716329 +4.33723332223392 +4.332854430148258 +4.332854430148258 +4.332854430148258 +0.6914960532608827 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2693452599052222 +0.8529267406541052 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8394299183804375 +0.336579979066304 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.780827995344489 +3.4192368324649047 +4.332550398821635 +4.332550398821635 +4.33723332223392 +4.332854430148258 +0.6944175558639092 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.48061248750569696 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8995403851261718 +0.16387931614117957 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7794752654872443 +3.4429921340343563 +4.327833805985841 +4.327833805985841 +4.332550398821635 +4.332550398821635 +0.6944175558639092 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6909830056250525 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.9157781068691486 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7821067592393272 +3.4597708579452107 +4.332371753085499 +4.332371753085499 +4.327833805985841 +4.327833805985841 +0.7037607950418554 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.9157781068691486 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.7195984021833732 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7805011123938099 +3.484915360681428 +4.328341712918101 +4.332371753085499 +4.332371753085499 +4.332371753085499 +0.7070144357115327 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.16387931614117957 +0.8995403851261718 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.48061248750569696 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7692280381806267 +3.533052287049287 +3.6675623664868677 +4.328341712918101 +4.328341712918101 +4.332371753085499 +0.7070144357115327 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.336579979066304 +0.8394299183804375 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8495621167939733 +0.2789778070382427 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7637294987872902 +3.5905083598818424 +3.6675623664868677 +3.6675623664868677 +3.6675623664868677 +4.328341712918101 +0.7110276306660097 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5382175776289602 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.9332803587042555 +0.1351645434877229 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.17305757537147226 +0.7564047449203359 +3.63037851640973 +3.668556167091508 +3.668556167091508 +3.6675623664868677 +3.6675623664868677 +0.7210686438917335 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7766676551803027 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8706366067951148 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.22140021244840558 +0.7354188586773578 +3.6311669204486057 +3.6590433640953237 +3.6590433640953237 +3.668556167091508 +3.668556167091508 +0.7256446863795958 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.9600775485324995 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.6336557862849546 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.230741499582245 +0.722595819752951 +3.6604290860517175 +3.659819236392028 +3.6590433640953237 +3.6590433640953237 +3.63037851640973 +0.7334579454226846 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2018996889437794 +0.8829731209921494 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8268653780234594 +0.4229997026740895 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.2595829718925291 +0.7062695533929109 +3.6571830662978924 +3.6604290860517175 +3.659819236392028 +3.659819236392028 +3.6311669204486057 +0.7464068573479388 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.38453231097115825 +0.8268653780234594 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8674260761271935 +0.230741499582245 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.2884240647434684 +0.6891242947821885 +3.6571830662978924 +3.6571830662978924 +3.6604290860517175 +3.6604290860517175 +3.6311669204486057 +0.7519710559558561 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6049589697638957 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.9615285957602238 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.2884240647434684 +0.6754834543527535 +3.6480580099102515 +3.6480580099102515 +3.6571830662978924 +3.6571830662978924 +3.628330373201324 +0.7519710559558561 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.833333333333333 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.7970093430472787 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.2884240647434684 +0.6635370822318256 +3.648332150527765 +3.6480580099102515 +3.6480580099102515 +3.6476277987965524 +3.628330373201324 +0.7368127772852731 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.11537255792817191 +0.9517446316409384 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.5762468110472865 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.30777693354095703 +0.6530041104745514 +3.6390096082606376 +3.648332150527765 +3.648332150527765 +3.6480580099102515 +3.6480580099102515 +0.7285441512181112 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2501845927102355 +0.8529267406541052 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8394299183804375 +0.3557324805227413 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.30777693354095703 +0.6436597632191985 +3.6390096082606376 +3.6390096082606376 +3.6390096082606376 +3.648332150527765 +3.648332150527765 +0.7201444745328986 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.451806660765417 +0.8268653780234594 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8829731209921494 +0.17305757537147226 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.30777693354095703 +0.6362446198120244 +3.638996173543899 +3.6390096082606376 +3.6390096082606376 +3.6390096082606376 +3.6390096082606376 +0.7062695533929109 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6623325614087925 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.9406793596180201 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.3172647300558147 +0.6273903943262429 +3.638996173543899 +3.638996173543899 +3.638996173543899 +3.6390096082606376 +3.6390096082606376 +0.7122437955064962 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.8889745195479124 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.7396285443968873 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.3172647300558147 +0.6204364114904433 +3.6352246502145382 +3.6352246502145382 +3.638996173543899 +3.638996173543899 +3.6390096082606376 +0.7201444745328986 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.14421518948927226 +0.916900139649794 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.5094164061306206 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.3172647300558147 +2.62636182814419 +3.6293327884224067 +3.6256567937371553 +3.6352246502145382 +3.6352246502145382 +3.638996173543899 +0.7237507092058575 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.30777693354095703 +0.8495621167939733 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8394299183804375 +0.29813748831171427 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.11537255792817191 +0.36554510004184326 +2.635943082963963 +3.6256567937371553 +3.6256567937371553 +3.6256567937371553 +3.6256567937371553 +3.6076203497319868 +0.7396915102064616 +0.2884240647434684 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5094164061306206 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.916900139649794 +0.14421518948927226 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.11537255792817191 +0.42320293884874616 +2.643908914151852 +3.6161878900604445 +3.6122361466389306 +3.6256567937371553 +3.6256567937371553 +3.5986166028485815 +0.7553270992585563 +0.2595829718925291 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7396285443968873 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8980412833157345 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.14421518948927226 +0.48086904368685257 +3.345563419756308 +3.615777007111271 +3.611846835730545 +3.6122361466389306 +3.6122361466389306 +3.5768044292407 +0.773399350476982 +0.24055939460771647 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.9480916413381557 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.6623325614087925 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.14421518948927226 +0.519387512494303 +3.339814797789736 +3.60640522163187 +3.611352922417542 +3.611846835730545 +3.6122361466389306 +3.522512506538295 +0.7852235592687276 +0.22140021244840558 +0.08652970395641724 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.19262923125003173 +0.896381476420494 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8268653780234594 +0.44213125973792344 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.14421518948927226 +0.5386918553337177 +3.3398686007305654 +3.60640522163187 +3.602431634147373 +3.602431634147373 +3.611846835730545 +3.4725591053650846 +0.7981582714728965 +0.2018996889437794 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3557324805227413 +0.8268653780234594 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8674260761271935 +0.2501845927102355 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.17305757537147226 +0.548193339234583 +3.337722952345149 +3.6058675054218323 +3.602431634147373 +3.602431634147373 +3.602431634147373 +3.433949486604032 +0.7952518509254771 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5762468110472865 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.9517446316409384 +0.10651401691436746 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.17305757537147226 +0.548193339234583 +3.339222710596468 +3.605239693322749 +3.60192095283249 +3.60192095283249 +3.602431634147373 +3.411934280089485 +0.8019432589378992 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.8050731528963935 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.814533543275509 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.17305757537147226 +0.519387512494303 +2.6581767679401853 +3.596668967777341 +3.6013224079591977 +3.6013224079591977 +3.60192095283249 +3.397900324664534 +0.8007101677278259 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.10651401691436746 +0.9644359211176852 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.6049589697638957 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.17305757537147226 +0.4902956806169465 +2.647548190681089 +3.596050400837252 +3.592098998426612 +3.5926851884767963 +3.6013224079591977 +3.38524471697017 +0.7977398853553179 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.230741499582245 +0.8674260761271935 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8268653780234594 +0.38453231097115825 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.17305757537147226 +0.48086904368685257 +2.644890284803525 +3.5831072087996483 +3.592098998426612 +3.592098998426612 +3.5926851884767963 +3.3797180321147944 +0.7977398853553179 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4229997026740895 +0.8268653780234594 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8829731209921494 +0.2018996889437794 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.17305757537147226 +0.48086904368685257 +2.650346155851089 +3.591440647082618 +3.591440647082618 +3.591440647082618 +3.592098998426612 +3.366298608599595 +0.7978190722169973 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5094164061306206 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.8151569303072108 +0.833333333333333 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.17305757537147226 +0.48086904368685257 +2.644890284803525 +3.5824858983856354 +3.587337166476254 +3.587337166476254 +3.5831072087996483 +3.355478394593713 +0.7843371560037582 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.17305757537147226 +0.7481662154254112 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.8980412833157345 +0.7195984021833732 +0.19262923125003173 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.14421518948927226 +0.4326218123061114 +1.376450103746838 +3.5824858983856354 +3.5824858983856354 +3.5831072087996483 +3.591440647082618 +3.326998084168252 +0.670764537079803 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.11537255792817191 +0.3461049112705705 +0.7816423580346736 +3.3121934427792192 +3.305953106889765 +2.6831376611757842 +2.6193992969677624 +0.7804689843874018 +0.4326218123061114 +0.17305757537147226 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08652970395641724 +0.2595829718925291 +0.5191309563131474 +0.8648354565122771 +0.963094151909706 +0.9134702960435828 +0.7497229403733847 +0.5191309563131474 +0.2884240647434684 +0.11537255792817191 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.14421518948927226 +0.2884240647434684 +0.451806660765417 +0.5191309563131474 +0.5191309563131474 +0.4326218123061114 +0.3172647300558147 +0.17305757537147226 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0576866479940108 +0.11537255792817191 +0.2018996889437794 +0.230741499582245 +0.230741499582245 +0.17305757537147226 +0.11537255792817191 +0.0576866479940108 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.02884340803556551 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.0576866479940108 +0.02884340803556551 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 \ No newline at end of file diff --git a/examples/heat-equation/data/source.mtx b/examples/heat-equation/data/source.mtx new file mode 100644 index 00000000000..15b759340be --- /dev/null +++ b/examples/heat-equation/data/source.mtx @@ -0,0 +1,65538 @@ +%%MatrixMarket matrix array real general +65536 1 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.09803921568627451 +0.3058823529411765 +0.40784313725490196 +0.3058823529411765 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.33725490196078434 +0.4549019607843137 +0.611764705882353 +0.7215686274509804 +0.7686274509803922 +0.8549019607843137 +0.9215686274509803 +0.9725490196078431 +1.0 +0.9686274509803922 +0.7176470588235294 +0.023529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0196078431372549 +0.3176470588235294 +0.4627450980392157 +0.5098039215686274 +0.5411764705882353 +0.5607843137254902 +0.5607843137254902 +0.5529411764705883 +0.5333333333333333 +0.5098039215686274 +0.48627450980392156 +0.4392156862745098 +0.36470588235294116 +0.16470588235294117 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.16862745098039217 +0.4196078431372549 +0.5215686274509804 +0.6235294117647059 +0.7529411764705882 +0.7372549019607844 +0.5137254901960784 +0.14901960784313725 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.47058823529411764 +0.6627450980392157 +0.7686274509803922 +0.8549019607843137 +0.9333333333333333 +0.9764705882352941 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.09411764705882353 +0.42745098039215684 +0.6588235294117647 +0.8274509803921568 +0.9450980392156862 +0.9725490196078431 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.9803921568627451 +0.9568627450980393 +0.9490196078431372 +0.8745098039215686 +0.7843137254901961 +0.6470588235294118 +0.5215686274509804 +0.30196078431372547 +0.07450980392156863 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.18823529411764706 +0.4235294117647059 +0.5607843137254902 +0.6980392156862745 +0.796078431372549 +0.9176470588235294 +0.9568627450980393 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8313725490196079 +0.047058823529411764 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00392156862745098 +0.8705882352941177 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0196078431372549 +0.3568627450980392 +0.7215686274509804 +0.9411764705882353 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.9764705882352941 +0.9607843137254902 +0.6 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.03529411764705882 +0.8588235294117647 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8745098039215686 +0.06274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00392156862745098 +0.8705882352941177 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.3843137254901961 +0.803921568627451 +0.9764705882352941 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6823529411764706 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.13725490196078433 +0.9176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8588235294117647 +0.03137254901960784 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00392156862745098 +0.8705882352941177 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.21176470588235294 +0.7137254901960784 +0.9725490196078431 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6078431372549019 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.24705882352941178 +0.9607843137254902 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8509803921568627 +0.01568627450980392 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8549019607843137 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.42745098039215684 +0.8980392156862745 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9686274509803922 +0.8274509803921568 +0.6784313725490196 +0.6039215686274509 +0.592156862745098 +0.6 +0.6352941176470588 +0.7490196078431373 +0.8666666666666667 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5568627450980392 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8470588235294118 +0.00392156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.27450980392156865 +0.6745098039215687 +0.9333333333333333 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00784313725490196 +0.5529411764705883 +0.9725490196078431 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9254901960784314 +0.6470588235294118 +0.27450980392156865 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3568627450980392 +0.7058823529411765 +0.9607843137254902 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5333333333333333 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.807843137254902 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.23137254901960785 +0.6313725490196078 +0.8862745098039215 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00784313725490196 +0.5686274509803921 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.8117647058823529 +0.25882352941176473 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5019607843137255 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5215686274509804 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4627450980392157 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9725490196078431 +0.8549019607843137 +0.7098039215686275 +0.30980392156862746 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.13333333333333333 +0.9294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5411764705882353 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.7490196078431373 +0.09803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8352941176470589 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.38823529411764707 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.20392156862745098 +0.8 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9607843137254902 +0.8627450980392157 +0.6392156862745098 +0.3215686274509804 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3843137254901961 +0.9568627450980393 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.792156862745098 +0.09803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4588235294117647 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9725490196078431 +0.2196078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.43529411764705883 +0.8352941176470589 +0.996078431372549 +0.9647058823529412 +0.8156862745098039 +0.5725490196078431 +0.28627450980392155 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1568627450980392 +0.8627450980392157 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.23529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.01568627450980392 +0.8117647058823529 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9647058823529412 +0.09803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.07450980392156863 +0.34901960784313724 +0.26666666666666666 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6705882352941176 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5215686274509804 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.44313725490196076 +0.984313725490196 +1.0 +1.0 +1.0 +1.0 +0.9647058823529412 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3215686274509804 +0.9529411764705882 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8549019607843137 +0.08235294117647059 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7019607843137254 +0.8313725490196079 +0.8313725490196079 +0.8313725490196079 +0.8313725490196079 +0.7372549019607844 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00392156862745098 +0.00392156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.01568627450980392 +0.023529411764705882 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7450980392156863 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5137254901960784 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00392156862745098 +0.00784313725490196 +0.00784313725490196 +0.00784313725490196 +0.00784313725490196 +0.00392156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.19215686274509805 +0.5372549019607843 +0.788235294117647 +0.7176470588235294 +0.42745098039215684 +0.09411764705882353 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2235294117647059 +0.5333333333333333 +0.7764705882352941 +0.7529411764705882 +0.5333333333333333 +0.07058823529411765 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.07450980392156863 +0.3176470588235294 +0.6 +0.7137254901960784 +0.8470588235294118 +0.8666666666666667 +0.8666666666666667 +0.7843137254901961 +0.6470588235294118 +0.3254901960784314 +0.03529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.09803921568627451 +0.30980392156862746 +0.5490196078431373 +0.6784313725490196 +0.7647058823529411 +0.8627450980392157 +0.8666666666666667 +0.8666666666666667 +0.8666666666666667 +0.8313725490196079 +0.7098039215686275 +0.6392156862745098 +0.3843137254901961 +0.17254901960784313 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.26666666666666666 +0.6078431372549019 +0.7411764705882353 +0.8627450980392157 +0.8666666666666667 +0.8666666666666667 +0.8666666666666667 +0.5176470588235295 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.027450980392156862 +0.25882352941176473 +0.4980392156862745 +0.6745098039215687 +0.7803921568627451 +0.8666666666666667 +0.8705882352941177 +0.8745098039215686 +0.8705882352941177 +0.8588235294117647 +0.7372549019607844 +0.6549019607843137 +0.4 +0.1803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.24705882352941178 +0.9450980392156862 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9333333333333333 +0.1843137254901961 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.25098039215686274 +0.5803921568627451 +0.7843137254901961 +0.9333333333333333 +1.0 +1.0 +1.0 +1.0 +0.5882352941176471 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3333333333333333 +0.6509803921568628 +0.8156862745098039 +0.9450980392156862 +1.0 +1.0 +1.0 +0.9725490196078431 +0.32941176470588235 +0.0 +0.0 +0.0 +0.0 +0.33725490196078434 +0.7019607843137254 +0.8745098039215686 +0.984313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9803921568627451 +0.8313725490196079 +0.4980392156862745 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.2627450980392157 +0.7843137254901961 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.803921568627451 +0.8 +0.3803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.10196078431372549 +0.49411764705882355 +0.7529411764705882 +0.8980392156862745 +0.984313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9254901960784314 +0.7843137254901961 +0.5058823529411764 +0.07450980392156863 +0.0 +0.13725490196078433 +0.7058823529411765 +0.9568627450980393 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6196078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.29411764705882354 +0.6705882352941176 +0.8509803921568627 +0.9607843137254902 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9294117647058824 +0.788235294117647 +0.5294117647058824 +0.11764705882352941 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6313725490196078 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7843137254901961 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3333333333333333 +0.5647058823529412 +0.796078431372549 +0.9529411764705882 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1411764705882353 +0.4627450980392157 +0.6745098039215687 +0.8627450980392157 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9764705882352941 +0.3411764705882353 +0.0 +0.0 +0.27450980392156865 +0.7176470588235294 +0.9686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.7137254901960784 +0.11372549019607843 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.3568627450980392 +0.9764705882352941 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5333333333333333 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.06666666666666667 +0.5607843137254902 +0.8705882352941177 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.8392156862745098 +0.4196078431372549 +0.7529411764705882 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6196078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3215686274509804 +0.7333333333333333 +0.9607843137254902 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8666666666666667 +0.5058823529411764 +0.011764705882352941 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8784313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5490196078431373 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00392156862745098 +0.30980392156862746 +0.596078431372549 +0.8313725490196079 +0.9725490196078431 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3254901960784314 +0.7529411764705882 +0.9372549019607843 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9764705882352941 +0.3411764705882353 +0.00392156862745098 +0.5490196078431373 +0.9333333333333333 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7607843137254902 +0.058823529411764705 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.3568627450980392 +0.9764705882352941 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5333333333333333 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3176470588235294 +0.8196078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9764705882352941 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6196078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.050980392156862744 +0.6039215686274509 +0.9568627450980393 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9882352941176471 +0.7607843137254902 +0.17647058823529413 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2823529411764706 +0.9607843137254902 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9803921568627451 +0.30196078431372547 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3137254901960784 +0.9686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6039215686274509 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9764705882352941 +0.3843137254901961 +0.7568627450980392 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5568627450980392 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.3568627450980392 +0.9764705882352941 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5333333333333333 +0.0 +0.0 +0.0 +0.0 +0.39215686274509803 +0.9294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9725490196078431 +0.8313725490196079 +0.7215686274509804 +0.7098039215686275 +0.7725490196078432 +0.9215686274509803 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6196078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.16862745098039217 +0.7803921568627451 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9568627450980393 +0.8627450980392157 +0.803921568627451 +0.8352941176470589 +0.9333333333333333 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8549019607843137 +0.21176470588235294 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4980392156862745 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9137254901960784 +0.13333333333333333 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.35294117647058826 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6039215686274509 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9411764705882353 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8627450980392157 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.3568627450980392 +0.9764705882352941 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5333333333333333 +0.0 +0.0 +0.0 +0.36470588235294116 +0.9333333333333333 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8901960784313725 +0.396078431372549 +0.00784313725490196 +0.0 +0.0 +0.0 +0.1803921568627451 +0.6980392156862745 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6196078431372549 +0.0 +0.0 +0.0 +0.0 +0.2 +0.8509803921568627 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.7411764705882353 +0.27450980392156865 +0.023529411764705882 +0.0 +0.0 +0.2 +0.6705882352941176 +0.9764705882352941 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8352941176470589 +0.13333333333333333 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7058823529411765 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8705882352941177 +0.011764705882352941 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3411764705882353 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6039215686274509 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9764705882352941 +0.29411764705882354 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.25882352941176473 +0.8274509803921568 +0.9019607843137255 +0.9882352941176471 +0.9921568627450981 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.9921568627450981 +0.9882352941176471 +0.9686274509803922 +0.8784313725490196 +0.8431372549019608 +0.36470588235294116 +0.0 +0.0 +0.1607843137254902 +0.8745098039215686 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9450980392156862 +0.30980392156862746 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.03137254901960784 +0.7372549019607844 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6196078431372549 +0.0 +0.0 +0.0 +0.09803921568627451 +0.792156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6392156862745098 +0.03137254901960784 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00784313725490196 +0.615686274509804 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6980392156862745 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8392156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8196078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.12941176470588237 +0.8392156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.43529411764705883 +0.9490196078431372 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.8862745098039215 +0.6980392156862745 +0.5647058823529412 +0.5098039215686274 +0.5764705882352941 +0.6627450980392157 +0.8627450980392157 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5294117647058824 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.01568627450980392 +0.09803921568627451 +0.1803921568627451 +0.2549019607843137 +0.41568627450980394 +0.9333333333333333 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8352941176470589 +0.4235294117647059 +0.27450980392156865 +0.2 +0.1568627450980392 +0.06666666666666667 +0.011764705882352941 +0.0 +0.0 +0.0 +0.615686274509804 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.21176470588235294 +0.9411764705882353 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.792156862745098 +0.7411764705882353 +0.7411764705882353 +0.7411764705882353 +0.7411764705882353 +0.7411764705882353 +0.44313725490196076 +0.0 +0.0 +0.0 +0.6549019607843137 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8156862745098039 +0.08235294117647059 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.058823529411764705 +0.8313725490196079 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.43137254901960786 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.054901960784313725 +0.8980392156862745 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7647058823529411 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.24705882352941178 +0.8745098039215686 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3764705882352941 +0.8431372549019608 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8549019607843137 +0.44313725490196076 +0.050980392156862744 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0392156862745098 +0.5882352941176471 +0.984313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.615686274509804 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.11764705882352941 +0.7568627450980392 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8156862745098039 +0.19607843137254902 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.06666666666666667 +0.8784313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9882352941176471 +0.3686274509803922 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00392156862745098 +0.7490196078431373 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5450980392156862 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3843137254901961 +0.9686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.44313725490196076 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5450980392156862 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2196078431372549 +0.9490196078431372 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7372549019607844 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.35294117647058826 +0.9254901960784314 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.12156862745098039 +0.6941176470588235 +0.9725490196078431 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00784313725490196 +0.7490196078431373 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7254901960784313 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.058823529411764705 +0.7333333333333333 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.807843137254902 +0.1450980392156863 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.37254901960784315 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9529411764705882 +0.24705882352941178 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6196078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6705882352941176 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00784313725490196 +0.8 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9098039215686274 +0.10196078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.16862745098039217 +0.9490196078431372 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9803921568627451 +0.3843137254901961 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3137254901960784 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7333333333333333 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.42745098039215684 +0.9568627450980393 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6274509803921569 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5529411764705883 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.792156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.01568627450980392 +0.6862745098039216 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.796078431372549 +0.12941176470588237 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6431372549019608 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9450980392156862 +0.2196078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5411764705882353 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.788235294117647 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.32941176470588235 +0.9686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7294117647058823 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00784313725490196 +0.8 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7176470588235294 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3215686274509804 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7450980392156863 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.611764705882353 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4470588235294118 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.26666666666666666 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8235294117647058 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.6549019607843137 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7607843137254902 +0.11372549019607843 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7019607843137254 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9607843137254902 +0.27450980392156865 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4745098039215686 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5098039215686274 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6431372549019608 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8588235294117647 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3254901960784314 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7725490196078432 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1803921568627451 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.06666666666666667 +0.0 +0.0 +0.0 +0.5882352941176471 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.7215686274509804 +0.11764705882352941 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.34509803921568627 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5098039215686274 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7764705882352941 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0784313725490196 +0.8862745098039215 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9764705882352941 +0.35294117647058826 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4588235294117647 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.19607843137254902 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3215686274509804 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8196078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.023529411764705882 +0.4196078431372549 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.4627450980392157 +0.37254901960784315 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1411764705882353 +0.9490196078431372 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9215686274509803 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8980392156862745 +0.058823529411764705 +0.0 +0.0 +0.5411764705882353 +0.9764705882352941 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.7058823529411765 +0.08627450980392157 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7019607843137254 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.4627450980392157 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6549019607843137 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2549019607843137 +0.9725490196078431 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.19607843137254902 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3215686274509804 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.4117647058823529 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.30980392156862746 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8509803921568627 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.20392156862745098 +0.9333333333333333 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.9921568627450981 +0.8784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.08627450980392157 +0.8941176470588236 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9450980392156862 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8705882352941177 +0.03137254901960784 +0.0 +0.49019607843137253 +0.9725490196078431 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9411764705882353 +0.16470588235294117 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.592156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6823529411764706 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6901960784313725 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5254901960784314 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4470588235294118 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9333333333333333 +0.06274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.24705882352941178 +0.984313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5019607843137255 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.20784313725490197 +0.9450980392156862 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8980392156862745 +0.058823529411764705 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.20784313725490197 +0.9411764705882353 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8901960784313725 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.058823529411764705 +0.8745098039215686 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8352941176470589 +0.00784313725490196 +0.4588235294117647 +0.9607843137254902 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6745098039215687 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.32941176470588235 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8941176470588236 +0.13333333333333333 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0784313725490196 +0.8862745098039215 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9764705882352941 +0.23529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6392156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9176470588235294 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.20784313725490197 +0.9647058823529412 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6313725490196078 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.03137254901960784 +0.8941176470588236 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9254901960784314 +0.16470588235294117 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.20784313725490197 +0.9411764705882353 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8901960784313725 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8235294117647058 +0.41568627450980394 +0.9333333333333333 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9686274509803922 +0.37254901960784315 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00392156862745098 +0.8313725490196079 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5843137254901961 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5843137254901961 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7803921568627451 +0.011764705882352941 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6862745098039216 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1607843137254902 +0.9294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7215686274509804 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8235294117647058 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9803921568627451 +0.30196078431372547 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.20784313725490197 +0.9411764705882353 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8901960784313725 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8549019607843137 +0.9137254901960784 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8352941176470589 +0.12549019607843137 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5254901960784314 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.5098039215686274 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4470588235294118 +0.9450980392156862 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9647058823529412 +0.3568627450980392 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7568627450980392 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9019607843137255 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.13725490196078433 +0.9137254901960784 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7215686274509804 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6901960784313725 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.4823529411764706 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1411764705882353 +0.8470588235294118 +0.9372549019607843 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9803921568627451 +0.788235294117647 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.615686274509804 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0196078431372549 +0.7568627450980392 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9803921568627451 +0.8117647058823529 +0.5882352941176471 +0.5019607843137255 +0.5372549019607843 +0.7568627450980392 +0.9686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9882352941176471 +0.6196078431372549 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.807843137254902 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.13725490196078433 +0.9176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6941176470588235 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4627450980392157 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6941176470588235 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.12156862745098039 +0.3803921568627451 +0.4627450980392157 +0.6235294117647059 +0.7254901960784313 +0.9254901960784314 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9764705882352941 +0.6235294117647059 +0.3607843137254902 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9450980392156862 +0.33725490196078434 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2 +0.8313725490196079 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.6588235294117647 +0.0392156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7843137254901961 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9215686274509803 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.17254901960784313 +0.9372549019607843 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5568627450980392 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.23921568627450981 +0.9490196078431372 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8627450980392157 +0.023529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7215686274509804 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00392156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9647058823529412 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.803921568627451 +0.058823529411764705 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.1607843137254902 +0.7490196078431373 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9333333333333333 +0.5647058823529412 +0.011764705882352941 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9372549019607843 +0.09019607843137255 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.21176470588235294 +0.9647058823529412 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.4588235294117647 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.8431372549019608 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9568627450980393 +0.23137254901960785 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9294117647058824 +0.396078431372549 +0.9019607843137255 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.5686274509803921 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0196078431372549 +0.4745098039215686 +0.9254901960784314 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.7176470588235294 +0.24705882352941178 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6588235294117647 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.21176470588235294 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.25098039215686274 +0.984313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9725490196078431 +0.3215686274509804 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5647058823529412 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5254901960784314 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9529411764705882 +0.4 +0.0 +0.5411764705882353 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9254901960784314 +0.23137254901960785 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.6352941176470588 +0.9882352941176471 +1.0 +1.0 +1.0 +0.996078431372549 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.9921568627450981 +0.9568627450980393 +0.807843137254902 +0.5686274509803921 +0.2235294117647059 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5568627450980392 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9803921568627451 +0.36470588235294116 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3215686274509804 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9176470588235294 +0.07058823529411765 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.16470588235294117 +0.9176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8 +0.023529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9725490196078431 +0.4980392156862745 +0.0 +0.0 +0.08235294117647059 +0.8745098039215686 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7568627450980392 +0.023529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5568627450980392 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +0.6627450980392157 +0.4549019607843137 +0.5764705882352941 +0.6352941176470588 +0.6549019607843137 +0.6627450980392157 +0.6666666666666666 +0.6509803921568628 +0.6235294117647059 +0.5333333333333333 +0.34901960784313724 +0.1568627450980392 +0.011764705882352941 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.32941176470588235 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.4980392156862745 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.44313725490196076 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.792156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6784313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.32941176470588235 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8274509803921568 +0.00784313725490196 +0.0 +0.0 +0.0 +0.5019607843137255 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.49019607843137253 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3411764705882353 +0.9529411764705882 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.38823529411764707 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.15294117647058825 +0.9176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7019607843137254 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6274509803921569 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.592156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7137254901960784 +0.00392156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8313725490196079 +0.00392156862745098 +0.0 +0.0 +0.0 +0.023529411764705882 +0.8313725490196079 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9058823529411765 +0.21176470588235294 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.027450980392156862 +0.8392156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9647058823529412 +0.2980392156862745 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00392156862745098 +0.7686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9294117647058824 +0.18823529411764706 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5882352941176471 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9647058823529412 +0.37254901960784315 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7176470588235294 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8549019607843137 +0.0196078431372549 +0.0 +0.0 +0.0 +0.0 +0.4549019607843137 +0.984313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7098039215686275 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.47058823529411764 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.48627450980392156 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9647058823529412 +0.2980392156862745 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.11764705882352941 +0.9215686274509803 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6901960784313725 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.023529411764705882 +0.7803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8274509803921568 +0.09411764705882353 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.8196078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8745098039215686 +0.03529411764705882 +0.0 +0.0 +0.0 +0.0 +0.01568627450980392 +0.7764705882352941 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9725490196078431 +0.45098039215686275 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7137254901960784 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8509803921568627 +0.18823529411764706 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.10196078431372549 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.596078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4549019607843137 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9411764705882353 +0.26666666666666666 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.25882352941176473 +0.8941176470588236 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.7411764705882353 +0.06274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.47058823529411764 +0.9764705882352941 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6274509803921569 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.047058823529411764 +0.8666666666666667 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8784313725490196 +0.03529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.37254901960784315 +0.9725490196078431 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8627450980392157 +0.12941176470588237 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8901960784313725 +0.6274509803921569 +0.4980392156862745 +0.44313725490196076 +0.4392156862745098 +0.4392156862745098 +0.4392156862745098 +0.4392156862745098 +0.4392156862745098 +0.4392156862745098 +0.4392156862745098 +0.43529411764705883 +0.41568627450980394 +0.3686274509803922 +0.23137254901960785 +0.08235294117647059 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5529411764705883 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8901960784313725 +0.1607843137254902 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.00784313725490196 +0.7490196078431373 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5764705882352941 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3803921568627451 +0.9372549019607843 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.7725490196078432 +0.23529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.03137254901960784 +0.49411764705882355 +0.9529411764705882 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5333333333333333 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6352941176470588 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.050980392156862744 +0.8705882352941177 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.01568627450980392 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8784313725490196 +0.03529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.7450980392156863 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6666666666666666 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.9882352941176471 +0.9882352941176471 +0.9882352941176471 +0.9882352941176471 +0.9882352941176471 +0.9882352941176471 +0.9882352941176471 +0.9882352941176471 +0.9882352941176471 +0.9882352941176471 +0.9803921568627451 +0.9725490196078431 +0.9647058823529412 +0.9098039215686274 +0.803921568627451 +0.6313725490196078 +0.3803921568627451 +0.06666666666666667 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0392156862745098 +0.7843137254901961 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6627450980392157 +0.011764705882352941 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4549019607843137 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.803921568627451 +0.07058823529411765 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4235294117647059 +0.9254901960784314 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9333333333333333 +0.6392156862745098 +0.25098039215686274 +0.07058823529411765 +0.011764705882352941 +0.00392156862745098 +0.00784313725490196 +0.054901960784313725 +0.18823529411764706 +0.5254901960784314 +0.8313725490196079 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.14901960784313725 +0.34901960784313724 +0.611764705882353 +0.8627450980392157 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8549019607843137 +0.6 +0.34509803921568627 +0.17254901960784313 +0.047058823529411764 +0.0 +0.0 +0.0 +0.10196078431372549 +0.25882352941176473 +0.5450980392156862 +0.807843137254902 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8862745098039215 +0.611764705882353 +0.33725490196078434 +0.12941176470588237 +0.00392156862745098 +0.0 +0.00392156862745098 +0.11372549019607843 +0.2627450980392157 +0.5450980392156862 +0.7176470588235294 +0.9764705882352941 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.6784313725490196 +0.49019607843137253 +0.23529411764705882 +0.09803921568627451 +0.00392156862745098 +0.0 +0.0 +0.08235294117647059 +0.2549019607843137 +0.5490196078431373 +0.7529411764705882 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9607843137254902 +0.6431372549019608 +0.3843137254901961 +0.1607843137254902 +0.01568627450980392 +0.0 +0.12156862745098039 +0.4666666666666667 +0.8235294117647058 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9686274509803922 +0.6392156862745098 +0.34509803921568627 +0.1607843137254902 +0.00784313725490196 +0.0 +0.0 +0.0 +0.7098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9882352941176471 +0.8784313725490196 +0.5137254901960784 +0.03529411764705882 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.27450980392156865 +0.8980392156862745 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.6470588235294118 +0.08235294117647059 +0.0 +0.0 +0.0 +0.0392156862745098 +0.4666666666666667 +0.9333333333333333 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8666666666666667 +0.19607843137254902 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.33725490196078434 +0.8823529411764706 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9725490196078431 +0.8784313725490196 +0.8352941176470589 +0.8156862745098039 +0.8313725490196079 +0.8705882352941177 +0.9529411764705882 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.5450980392156862 +0.9294117647058824 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.9490196078431372 +0.7098039215686275 +0.0 +0.0 +0.1607843137254902 +0.8823529411764706 +0.984313725490196 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.9019607843137255 +0.26666666666666666 +0.0 +0.36470588235294116 +0.9019607843137255 +0.984313725490196 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.9803921568627451 +0.8901960784313725 +0.25098039215686274 +0.0 +0.07450980392156863 +0.807843137254902 +0.9803921568627451 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.9372549019607843 +0.592156862745098 +0.0 +0.7686274509803922 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.9372549019607843 +0.42745098039215684 +0.0 +0.0 +0.0 +0.5019607843137255 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9882352941176471 +0.7372549019607844 +0.09019607843137255 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.3215686274509804 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.8666666666666667 +0.6862745098039216 +0.6392156862745098 +0.6784313725490196 +0.8431372549019608 +0.984313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8588235294117647 +0.22745098039215686 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2 +0.7411764705882353 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9490196078431372 +0.00392156862745098 +0.0 +0.0 +0.0 +0.0 +0.6392156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8196078431372549 +0.0 +0.0 +0.21568627450980393 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.3333333333333333 +0.0 +0.4470588235294118 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.3137254901960784 +0.0 +0.10980392156862745 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7215686274509804 +0.0 +0.8274509803921568 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5333333333333333 +0.0 +0.0 +0.0 +0.12549019607843137 +0.8705882352941177 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7215686274509804 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2784313725490196 +0.8352941176470589 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.7450980392156863 +0.17254901960784313 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4745098039215686 +0.8588235294117647 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9568627450980393 +0.8823529411764706 +0.792156862745098 +0.7568627450980392 +0.7254901960784313 +0.7137254901960784 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6392156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8196078431372549 +0.0 +0.0 +0.21568627450980393 +0.9803921568627451 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.3333333333333333 +0.0 +0.4470588235294118 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.3137254901960784 +0.0 +0.10980392156862745 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7215686274509804 +0.0 +0.8274509803921568 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.5333333333333333 +0.0 +0.0 +0.0 +0.0 +0.2549019607843137 +0.7843137254901961 +0.984313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9803921568627451 +0.4235294117647059 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.09411764705882353 +0.6313725490196078 +0.9411764705882353 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8901960784313725 +0.5137254901960784 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.050980392156862744 +0.5490196078431373 +0.8196078431372549 +0.9686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9921568627450981 +0.9411764705882353 +0.8509803921568627 +0.7529411764705882 +0.6078431372549019 +0.47058823529411764 +0.1803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6392156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8196078431372549 +0.0 +0.0 +0.21176470588235294 +0.984313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9882352941176471 +0.32941176470588235 +0.0 +0.4470588235294118 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.30980392156862746 +0.0 +0.10588235294117647 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7176470588235294 +0.0 +0.8274509803921568 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.5254901960784314 +0.0 +0.0 +0.0 +0.0 +0.0 +0.25882352941176473 +0.8470588235294118 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.792156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.30196078431372547 +0.6784313725490196 +0.8941176470588236 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9803921568627451 +0.8509803921568627 +0.592156862745098 +0.13333333333333333 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.30980392156862746 +0.6196078431372549 +0.7764705882352941 +0.8823529411764706 +0.9294117647058824 +0.9607843137254902 +0.996078431372549 +1.0 +1.0 +0.9686274509803922 +0.9372549019607843 +0.9058823529411765 +0.8196078431372549 +0.7372549019607844 +0.615686274509804 +0.403921568627451 +0.1450980392156863 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.2784313725490196 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.3843137254901961 +0.0 +0.0 +0.054901960784313725 +0.4745098039215686 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.4823529411764706 +0.11764705882352941 +0.0 +0.1803921568627451 +0.49019607843137253 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.4823529411764706 +0.10980392156862745 +0.0 +0.01568627450980392 +0.43137254901960786 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.30196078431372547 +0.0 +0.38823529411764707 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.48627450980392156 +0.1843137254901961 +0.0 +0.0 +0.0 +0.0 +0.4117647058823529 +0.9215686274509803 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.615686274509804 +0.4470588235294118 +0.4666666666666667 +0.47843137254901963 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.49411764705882355 +0.5215686274509804 +0.6666666666666666 +0.7411764705882353 +0.8627450980392157 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9333333333333333 +0.18823529411764706 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.07058823529411765 +0.4235294117647059 +0.6980392156862745 +0.788235294117647 +0.8784313725490196 +0.9215686274509803 +0.9333333333333333 +0.9450980392156862 +0.9450980392156862 +0.9333333333333333 +0.9215686274509803 +0.8666666666666667 +0.7764705882352941 +0.6431372549019608 +0.35294117647058826 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.15294117647058825 +0.2784313725490196 +0.3686274509803922 +0.3843137254901961 +0.3764705882352941 +0.2980392156862745 +0.1843137254901961 +0.054901960784313725 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.396078431372549 +0.9490196078431372 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9411764705882353 +0.1803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.023529411764705882 +0.4627450980392157 +0.9294117647058824 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.33725490196078434 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.10196078431372549 +0.17647058823529413 +0.21568627450980393 +0.2196078431372549 +0.1803921568627451 +0.09803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.23529411764705882 +0.8901960784313725 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8588235294117647 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.5058823529411764 +0.9921568627450981 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.41568627450980394 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6627450980392157 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.023529411764705882 +0.8823529411764706 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.4470588235294118 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.14901960784313725 +0.9098039215686274 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7647058823529411 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.4235294117647059 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.4392156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.792156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7058823529411765 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.3411764705882353 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6784313725490196 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8941176470588236 +0.00784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7019607843137254 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9294117647058824 +0.17647058823529413 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6980392156862745 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9764705882352941 +0.37254901960784315 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.7686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.7490196078431373 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6823529411764706 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.8117647058823529 +0.09803921568627451 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.09411764705882353 +0.8980392156862745 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9725490196078431 +0.3568627450980392 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.45098039215686275 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.7058823529411765 +0.11372549019607843 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6078431372549019 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.6941176470588235 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.16862745098039217 +0.9137254901960784 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.8627450980392157 +0.4392156862745098 +0.07058823529411765 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.011764705882352941 +0.18823529411764706 +0.6588235294117647 +0.9686274509803922 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.807843137254902 +0.12156862745098039 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.6392156862745098 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9882352941176471 +0.8901960784313725 +0.7764705882352941 +0.6196078431372549 +0.6 +0.6 +0.6039215686274509 +0.615686274509804 +0.7372549019607844 +0.8431372549019608 +0.9490196078431372 +0.996078431372549 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.792156862745098 +0.15294117647058825 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.12156862745098039 +0.8 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9803921568627451 +0.6392156862745098 +0.09019607843137255 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.17647058823529413 +0.7529411764705882 +0.9882352941176471 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.984313725490196 +0.796078431372549 +0.3764705882352941 +0.00392156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.47843137254901963 +0.8 +0.9725490196078431 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.996078431372549 +0.9176470588235294 +0.7215686274509804 +0.4196078431372549 +0.00392156862745098 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.33725490196078434 +0.6509803921568628 +0.8 +0.9098039215686274 +0.9568627450980393 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +1.0 +0.9607843137254902 +0.9254901960784314 +0.8235294117647058 +0.7254901960784313 +0.4823529411764706 +0.10196078431372549 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.050980392156862744 +0.2549019607843137 +0.43137254901960786 +0.49019607843137253 +0.5803921568627451 +0.7019607843137254 +0.7215686274509804 +0.7215686274509804 +0.7058823529411765 +0.596078431372549 +0.49411764705882355 +0.43529411764705883 +0.2784313725490196 +0.0784313725490196 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 diff --git a/examples/heat-equation/doc/builds-on b/examples/heat-equation/doc/builds-on new file mode 100644 index 00000000000..896db74e274 --- /dev/null +++ b/examples/heat-equation/doc/builds-on @@ -0,0 +1 @@ +simple-solver three-pt-stencil-solver diff --git a/examples/heat-equation/doc/intro.dox b/examples/heat-equation/doc/intro.dox new file mode 100644 index 00000000000..e57d2052bf3 --- /dev/null +++ b/examples/heat-equation/doc/intro.dox @@ -0,0 +1,40 @@ + +

Introduction

+This example solves a 2D heat conduction equation + +$ + u : [0, d]^2 \rightarrow R\\ + \partial_t u = \delta u + f +$ + +with Dirichlet boundary conditions and given initial condition and +constant-in-time source function f. + +The partial differential equation (PDE) is solved with a finite difference +spatial discretization on an equidistant grid: For `n` grid points, +and grid distance $h = 1/n$ we write + +$ + u_{i,j}' = + \alpha\frac{u_{i-1,j}+u_{i+1,j}+u_{i,j-1}+u_{i,j+1}-4u_{i,j}}{h^2}+f_{i,j} +$ + +We then build an implicit Euler integrator by discretizing with time step $\tau$ + +$ + \frac{u_{i,j}^{k+1} - u_{i,j}^k}{\tau} = + \alpha\frac{u_{i-1,j}^{k+1}+u_{i+1,j}^{k+1} + -u_{i,j-1}^{k+1}-u_{i,j+1}^{k+1}+4u_{i,j}^{k+1}}{h^2} + +f_{i,j} +$ + +and solve the resulting linear system for $ u_{\cdot}^{k+1}$ using Ginkgo's CG +solver preconditioned with an incomplete Cholesky factorization for each time +step, occasionally writing the resulting grid values into a video file using +OpenCV and a custom color mapping. + +The intention of this example is to provide a mini-app showing matrix assembly, +vector initialization, solver setup and the use of Ginkgo in a more complex +setting. + +

About the example

\ No newline at end of file diff --git a/examples/heat-equation/doc/kind b/examples/heat-equation/doc/kind new file mode 100644 index 00000000000..c1d9154931a --- /dev/null +++ b/examples/heat-equation/doc/kind @@ -0,0 +1 @@ +techniques diff --git a/examples/heat-equation/doc/results.dox b/examples/heat-equation/doc/results.dox new file mode 100644 index 00000000000..6bcba24ed7a --- /dev/null +++ b/examples/heat-equation/doc/results.dox @@ -0,0 +1,5 @@ +

Results

+The program will generate a video file named heat.mp4 and output the timestamp +of each generated frame. + +

Comments about programming and debugging

diff --git a/examples/heat-equation/doc/short-intro b/examples/heat-equation/doc/short-intro new file mode 100644 index 00000000000..9ba4bd2def0 --- /dev/null +++ b/examples/heat-equation/doc/short-intro @@ -0,0 +1 @@ +The heat equation example. diff --git a/examples/heat-equation/doc/tooltip b/examples/heat-equation/doc/tooltip new file mode 100644 index 00000000000..4783754312d --- /dev/null +++ b/examples/heat-equation/doc/tooltip @@ -0,0 +1 @@ +Assemble a 2D finite difference matrix. Use it to build an implicit Euler integrator. diff --git a/examples/heat-equation/heat-equation.cpp b/examples/heat-equation/heat-equation.cpp new file mode 100644 index 00000000000..60ca7f53eb3 --- /dev/null +++ b/examples/heat-equation/heat-equation.cpp @@ -0,0 +1,221 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +/**************************************************************** +This example solves a 2D heat conduction equation + + u : [0, d]^2 \rightarrow R\\ + \partial_t u = \delta u + f + +with Dirichlet boundary conditions and given initial condition and +constant-in-time source function f. + +The partial differential equation (PDE) is solved with a finite difference +spatial discretization on an equidistant grid: For `n` grid points, +and grid distance $h = 1/n$ we write + + u_{i,j}' = \alpha (u_{i-1,j} + u_{i+1,j} + u_{i,j-1} + u_{i,j+1} + - 4 u_{i,j}) / h^2 + + f_{i,j} + +We then build an implicit Euler integrator by discretizing with time step $\tau$ + + (u_{i,j}^{k+1} - u_{i,j}^k) / \tau = + \alpha (u_{i-1,j}^{k+1} - u_{i+1,j}^{k+1} + + u_{i,j-1}^{k+1} - u_{i,j+1}^{k+1} - 4 u_{i,j}^{k+1}) / h^2 + + f_{i,j} + +and solve the resulting linear system for $ u_{\cdot}^{k+1}$ using Ginkgo's CG +solver preconditioned with an incomplete Cholesky factorization for each time +step, occasionally writing the resulting grid values into a video file using +OpenCV and a custom color mapping. + +The intention of this example is to provide a mini-app showing matrix assembly, +vector initialization, solver setup and the use of Ginkgo in a more complex +setting. +***************************************************************/ + +#include + + +#include +#include +#include + + +#include +#include + + +// This function implements a simple Ginkgo-themed clamped color mapping for +// values in the range [0,5]. +void set_val(unsigned char *data, double value) +{ + // RGB values for the 6 colors used for values 0, 1, ..., 5 + // We will interpolate linearly between these values. + double col_r[] = {255, 221, 129, 201, 249, 255}; + double col_g[] = {255, 220, 130, 161, 158, 204}; + double col_b[] = {255, 220, 133, 93, 24, 8}; + value = std::max(0.0, value); + auto i = std::max(0, std::min(4, int(value))); + auto d = std::max(0.0, std::min(1.0, value - i)); + // OpenCV uses BGR instead of RGB by default, revert indices + data[2] = static_cast(col_r[i + 1] * d + col_r[i] * (1 - d)); + data[1] = static_cast(col_g[i + 1] * d + col_g[i] * (1 - d)); + data[0] = static_cast(col_b[i + 1] * d + col_b[i] * (1 - d)); +} + + +// Initialize video output with given dimension and FPS (frames per seconds) +std::pair build_output(int n, double fps) +{ + cv::Size videosize{n, n}; + auto output = + std::make_pair(cv::VideoWriter{}, cv::Mat{videosize, CV_8UC3}); + auto fourcc = cv::VideoWriter::fourcc('a', 'v', 'c', '1'); + output.first.open("heat.mp4", fourcc, fps, videosize); + return output; +} + + +// Write the current frame to video output using the above color mapping +void output_timestep(std::pair &output, int n, + const double *data) +{ + for (int i = 0; i < n; i++) { + auto row = output.second.ptr(i); + for (int j = 0; j < n; j++) { + set_val(&row[3 * j], data[i * n + j]); + } + } + output.first.write(output.second); +} + + +int main(int argc, char *argv[]) +{ + using mtx = gko::matrix::Csr<>; + using vec = gko::matrix::Dense<>; + + // Problem parameters: + // simulation length + auto t0 = 5.0; + // diffusion factor + auto diffusion = 0.0005; + // scaling factor for heat source + auto source_scale = 2.5; + // Simulation parameters: + // inner grid points per discretization direction + auto n = 256; + // number of simulation steps per second + auto steps_per_sec = 500; + // number of video frames per second + auto fps = 25; + // number of grid points + auto n2 = n * n; + // grid point distance (ignoring boundary points) + auto h = 1.0 / (n + 1); + auto h2 = h * h; + // time step size for the simulation + auto tau = 1.0 / steps_per_sec; + + // create a CUDA executor with an associated OpenMP host executor + auto exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create()); + // load heat source and initial state vectors + std::ifstream source_stream("data/source.mtx"); + std::ifstream initial_stream("data/initial.mtx"); + auto source = gko::read(source_stream, exec); + auto in_vector = gko::read(initial_stream, exec); + // create output vector with initial guess for + auto out_vector = in_vector->clone(); + // create scalar for source update + auto tau_source_scalar = gko::initialize({source_scale * tau}, exec); + // create stencil matrix as shared_ptr for solver + auto stencil_matrix = gko::share(mtx::create(exec)); + // assemble matrix + gko::matrix_data<> mtx_data{gko::dim<2>(n2, n2)}; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + auto c = i * n + j; + auto c_val = diffusion * tau * 4.0 / h2 + 1.0; + auto off_val = -diffusion * tau / h2; + // for each grid point: insert 5 stencil points + // with Dirichlet boundary conditions, i.e. with zero boundary value + if (i > 0) { + mtx_data.nonzeros.emplace_back(c, c - n, off_val); + } + if (j > 0) { + mtx_data.nonzeros.emplace_back(c, c - 1, off_val); + } + mtx_data.nonzeros.emplace_back(c, c, c_val); + if (j < n - 1) { + mtx_data.nonzeros.emplace_back(c, c + 1, off_val); + } + if (i < n - 1) { + mtx_data.nonzeros.emplace_back(c, c + n, off_val); + } + } + } + stencil_matrix->read(mtx_data); + // prepare video output + auto output = build_output(n, fps); + // build CG solver on stencil with incomplete Cholesky preconditioner + // stopping at 1e-10 relative accuracy + auto solver = + gko::solver::Cg<>::build() + .with_preconditioner(gko::preconditioner::Ic<>::build().on(exec)) + .with_criteria(gko::stop::RelativeResidualNorm<>::build() + .with_tolerance(1e-10) + .on(exec)) + .on(exec) + ->generate(stencil_matrix); + // time stamp of the last output frame (initialized to a sentinel value) + double last_t = -t0; + // execute implicit Euler method: for each timestep, solve stencil system + for (double t = 0; t < t0; t += tau) { + // if enough time has passed, output the next video frame + if (t - last_t > 1.0 / fps) { + last_t = t; + std::cout << t << std::endl; + output_timestep( + output, n, + gko::make_temporary_clone(exec->get_master(), in_vector.get()) + ->get_const_values()); + } + // add heat source contribution + in_vector->add_scaled(gko::lend(tau_source_scalar), gko::lend(source)); + // execute Euler step + solver->apply(gko::lend(in_vector), gko::lend(out_vector)); + // swap input and output + std::swap(in_vector, out_vector); + } +} diff --git a/examples/ilu-preconditioned-solver/CMakeLists.txt b/examples/ilu-preconditioned-solver/CMakeLists.txt index 873dff6ace4..eb35d1f4254 100644 --- a/examples/ilu-preconditioned-solver/CMakeLists.txt +++ b/examples/ilu-preconditioned-solver/CMakeLists.txt @@ -1,6 +1,15 @@ +cmake_minimum_required(VERSION 3.9) +project(ilu-preconditioned-solver) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(ilu-preconditioned-solver ilu-preconditioned-solver.cpp) -target_link_libraries(ilu-preconditioned-solver ginkgo) -target_include_directories(ilu-preconditioned-solver PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(ilu-preconditioned-solver Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) configure_file(data/b.mtx data/b.mtx COPYONLY) configure_file(data/x0.mtx data/x0.mtx COPYONLY) diff --git a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp index 55d2946a123..1b9e7b5f53b 100644 --- a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp +++ b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -55,23 +56,36 @@ int main(int argc, char *argv[]) // Print version information std::cout << gko::version_info::get() << std::endl; - // Figure out where to run the code - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { + if (argc == 2 && (std::string(argv[1]) == "--help")) { std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + // Figure out where to run the code + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + // Read data auto A = gko::share(gko::read(std::ifstream("data/A.mtx"), exec)); auto b = gko::read(std::ifstream("data/b.mtx"), exec); @@ -103,7 +117,7 @@ int main(int argc, char *argv[]) gmres::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(1000u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) .with_generated_preconditioner(gko::share(ilu_preconditioner)) @@ -116,7 +130,7 @@ int main(int argc, char *argv[]) ilu_gmres->apply(gko::lend(b), gko::lend(x)); // Print solution - std::cout << "Solution (x): \n"; + std::cout << "Solution (x):\n"; write(std::cout, gko::lend(x)); // Calculate residual @@ -126,6 +140,6 @@ int main(int argc, char *argv[]) A->apply(gko::lend(one), gko::lend(x), gko::lend(neg_one), gko::lend(b)); b->compute_norm2(gko::lend(res)); - std::cout << "Residual norm sqrt(r^T r): \n"; + std::cout << "Residual norm sqrt(r^T r):\n"; write(std::cout, gko::lend(res)); } diff --git a/examples/inverse-iteration/CMakeLists.txt b/examples/inverse-iteration/CMakeLists.txt index 62c7fc725d0..a9ec2edfcca 100644 --- a/examples/inverse-iteration/CMakeLists.txt +++ b/examples/inverse-iteration/CMakeLists.txt @@ -1,4 +1,13 @@ +cmake_minimum_required(VERSION 3.9) +project(inverse-iteration) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(inverse-iteration inverse-iteration.cpp) -target_link_libraries(inverse-iteration ginkgo) -target_include_directories(inverse-iteration PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(inverse-iteration Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) diff --git a/examples/inverse-iteration/inverse-iteration.cpp b/examples/inverse-iteration/inverse-iteration.cpp index 68b1029d554..4025c743ba1 100644 --- a/examples/inverse-iteration/inverse-iteration.cpp +++ b/examples/inverse-iteration/inverse-iteration.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -60,22 +61,35 @@ int main(int argc, char *argv[]) std::cout << std::scientific << std::setprecision(8) << std::showpos; // Figure out where to run the code - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { + if (argc == 2 && (std::string(argv[1]) == "--help")) { std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + auto this_exec = exec->get_master(); // linear system solver parameters @@ -108,7 +122,7 @@ int main(int argc, char *argv[]) .with_criteria(gko::stop::Iteration::build() .with_max_iters(system_max_iterations) .on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(system_residual_goal) .on(exec)) .on(exec) diff --git a/examples/ir-ilu-preconditioned-solver/CMakeLists.txt b/examples/ir-ilu-preconditioned-solver/CMakeLists.txt index dd77e163e59..00d7af61a04 100644 --- a/examples/ir-ilu-preconditioned-solver/CMakeLists.txt +++ b/examples/ir-ilu-preconditioned-solver/CMakeLists.txt @@ -1,4 +1,13 @@ +cmake_minimum_required(VERSION 3.9) +project(ir-ilu-preconditioned-solver) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(ir-ilu-preconditioned-solver ir-ilu-preconditioned-solver.cpp) -target_link_libraries(ir-ilu-preconditioned-solver ginkgo) -target_include_directories(ir-ilu-preconditioned-solver PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(ir-ilu-preconditioned-solver Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) diff --git a/examples/ir-ilu-preconditioned-solver/doc/results.dox b/examples/ir-ilu-preconditioned-solver/doc/results.dox index 37ddff94b2e..d42b40af236 100644 --- a/examples/ir-ilu-preconditioned-solver/doc/results.dox +++ b/examples/ir-ilu-preconditioned-solver/doc/results.dox @@ -27,7 +27,7 @@ Solution (x): 0.0121141 0.0123025 GMRES iteration count: 8 -GMRES execution time [ms]: 3.89406 +GMRES execution time [ms]: 0.377673 Residual norm sqrt(r^T r): %%MatrixMarket matrix array real general 1 1 diff --git a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp index 704794fd638..aab893c6efa 100644 --- a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp +++ b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -57,24 +58,36 @@ int main(int argc, char *argv[]) // Print version information std::cout << gko::version_info::get() << std::endl; - // Figure out where to run the code and how many block-Jacobi sweeps to use - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if ((argc == 2 || argc == 3) && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if ((argc == 2 || argc == 3) && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if ((argc == 2 || argc == 3) && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { - std::cerr << "Usage: " << argv[0] << " [executor] [sweeps]" - << std::endl; + // Figure out where to run the code + if (argc == 2 && (std::string(argv[1]) == "--help")) { + std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } - unsigned int sweeps = (argc == 3) ? atoi(argv[2]) : 5u; + + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + const unsigned int sweeps = argc == 3 ? std::atoi(argv[2]) : 5u; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid // Read data auto A = gko::share(gko::read(std::ifstream("data/A.mtx"), exec)); @@ -130,7 +143,7 @@ int main(int argc, char *argv[]) const RealValueType reduction_factor{1e-12}; auto iter_stop = gko::stop::Iteration::build().with_max_iters(1000u).on(exec); - auto tol_stop = gko::stop::ResidualNormReduction::build() + auto tol_stop = gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec); @@ -165,10 +178,10 @@ int main(int argc, char *argv[]) time += std::chrono::duration_cast(toc - tic); } - std::cout << "Using " << sweeps << " block-Jacobi sweeps. \n"; + std::cout << "Using " << sweeps << " block-Jacobi sweeps.\n"; // Print solution - std::cout << "Solution (x): \n"; + std::cout << "Solution (x):\n"; write(std::cout, gko::lend(x)); // Calculate residual @@ -182,6 +195,6 @@ int main(int argc, char *argv[]) << "\n"; std::cout << "GMRES execution time [ms]: " << static_cast(time.count()) / 100000000.0 << "\n"; - std::cout << "Residual norm sqrt(r^T r): \n"; + std::cout << "Residual norm sqrt(r^T r):\n"; write(std::cout, gko::lend(res)); } diff --git a/examples/iterative-refinement/CMakeLists.txt b/examples/iterative-refinement/CMakeLists.txt index a21b54d2a96..112f70634cf 100644 --- a/examples/iterative-refinement/CMakeLists.txt +++ b/examples/iterative-refinement/CMakeLists.txt @@ -1,4 +1,13 @@ +cmake_minimum_required(VERSION 3.9) +project(iterative-refinement) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(iterative-refinement iterative-refinement.cpp) -target_link_libraries(iterative-refinement ginkgo) -target_include_directories(iterative-refinement PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(iterative-refinement Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) diff --git a/examples/iterative-refinement/doc/results.dox b/examples/iterative-refinement/doc/results.dox index d15a5ca9f81..ebdbaa49b98 100644 --- a/examples/iterative-refinement/doc/results.dox +++ b/examples/iterative-refinement/doc/results.dox @@ -12,7 +12,7 @@ Final residual norm sqrt(r^T r): 1 1 4.23821e-11 IR iteration count: 24 -IR execution time [ms]: 14.9084 +IR execution time [ms]: 0.794962 @endcode diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp index e0a7131f5d7..20a3514316e 100644 --- a/examples/iterative-refinement/iterative-refinement.cpp +++ b/examples/iterative-refinement/iterative-refinement.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -56,22 +57,35 @@ int main(int argc, char *argv[]) std::cout << gko::version_info::get() << std::endl; // Figure out where to run the code - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { + if (argc == 2 && (std::string(argv[1]) == "--help")) { std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + // Read data auto A = share(gko::read(std::ifstream("data/A.mtx"), exec)); // Create RHS and initial guess as 1 @@ -99,7 +113,7 @@ int main(int argc, char *argv[]) RealValueType outer_reduction_factor{1e-12}; auto iter_stop = gko::stop::Iteration::build().with_max_iters(max_iters).on(exec); - auto tol_stop = gko::stop::ResidualNormReduction::build() + auto tol_stop = gko::stop::ResidualNorm::build() .with_reduction_factor(outer_reduction_factor) .on(exec); @@ -115,7 +129,7 @@ int main(int argc, char *argv[]) .with_solver( cg::build() .with_criteria( - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(inner_reduction_factor) .on(exec)) .on(exec)) @@ -138,9 +152,9 @@ int main(int argc, char *argv[]) A->apply(lend(one), lend(x), lend(neg_one), lend(b)); b->compute_norm2(lend(res)); - std::cout << "Initial residual norm sqrt(r^T r): \n"; + std::cout << "Initial residual norm sqrt(r^T r):\n"; write(std::cout, lend(initres)); - std::cout << "Final residual norm sqrt(r^T r): \n"; + std::cout << "Final residual norm sqrt(r^T r):\n"; write(std::cout, lend(res)); // Print solver statistics diff --git a/examples/minimal-cuda-solver/CMakeLists.txt b/examples/minimal-cuda-solver/CMakeLists.txt index 60a26db0c40..cd04ddb68de 100644 --- a/examples/minimal-cuda-solver/CMakeLists.txt +++ b/examples/minimal-cuda-solver/CMakeLists.txt @@ -1,6 +1,15 @@ +cmake_minimum_required(VERSION 3.9) +project(minimal-cuda-solver) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(minimal-cuda-solver minimal-cuda-solver.cpp) -target_link_libraries(minimal-cuda-solver ginkgo) -target_include_directories(minimal-cuda-solver PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(minimal-cuda-solver Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) configure_file(data/b.mtx data/b.mtx COPYONLY) configure_file(data/x0.mtx data/x0.mtx COPYONLY) diff --git a/examples/minimal-cuda-solver/minimal-cuda-solver.cpp b/examples/minimal-cuda-solver/minimal-cuda-solver.cpp index 1b47f712766..7c8e9c77f96 100644 --- a/examples/minimal-cuda-solver/minimal-cuda-solver.cpp +++ b/examples/minimal-cuda-solver/minimal-cuda-solver.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -47,7 +47,7 @@ int main() .with_preconditioner(gko::preconditioner::Jacobi<>::build().on(gpu)) .with_criteria( gko::stop::Iteration::build().with_max_iters(20u).on(gpu), - gko::stop::ResidualNormReduction<>::build() + gko::stop::ResidualNorm<>::build() .with_reduction_factor(1e-15) .on(gpu)) .on(gpu); diff --git a/examples/mixed-precision-ir/CMakeLists.txt b/examples/mixed-precision-ir/CMakeLists.txt index e4f81ef2c55..242b525d2af 100644 --- a/examples/mixed-precision-ir/CMakeLists.txt +++ b/examples/mixed-precision-ir/CMakeLists.txt @@ -1,4 +1,13 @@ +cmake_minimum_required(VERSION 3.9) +project(mixed-precision-ir) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(mixed-precision-ir mixed-precision-ir.cpp) -target_link_libraries(mixed-precision-ir ginkgo) -target_include_directories(mixed-precision-ir PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(mixed-precision-ir Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) diff --git a/examples/mixed-precision-ir/doc/kind b/examples/mixed-precision-ir/doc/kind index c1d9154931a..082f7497da3 100644 --- a/examples/mixed-precision-ir/doc/kind +++ b/examples/mixed-precision-ir/doc/kind @@ -1 +1 @@ -techniques +mixed-precision diff --git a/examples/mixed-precision-ir/doc/results.dox b/examples/mixed-precision-ir/doc/results.dox index 6cbe205c826..481cd38b641 100644 --- a/examples/mixed-precision-ir/doc/results.dox +++ b/examples/mixed-precision-ir/doc/results.dox @@ -12,7 +12,7 @@ Final residual norm sqrt(r^T r): 1 1 1.22728e-10 MPIR iteration count: 25 -MPIR execution time [ms]: 14.2256 +MPIR execution time [ms]: 0.846559 @endcode diff --git a/examples/mixed-precision-ir/mixed-precision-ir.cpp b/examples/mixed-precision-ir/mixed-precision-ir.cpp index e05ee0aa674..499924fcfe9 100644 --- a/examples/mixed-precision-ir/mixed-precision-ir.cpp +++ b/examples/mixed-precision-ir/mixed-precision-ir.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -64,22 +65,35 @@ int main(int argc, char *argv[]) std::cout << gko::version_info::get() << std::endl; // Figure out where to run the code - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { + if (argc == 2 && (std::string(argv[1]) == "--help")) { std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + // Read data auto A = share(gko::read(std::ifstream("data/A.mtx"), exec)); // Create RHS and initial guess as 1 @@ -113,7 +127,7 @@ int main(int argc, char *argv[]) // Create inner solver auto inner_solver = cg::build() - .with_criteria(gko::stop::ResidualNormReduction::build() + .with_criteria(gko::stop::ResidualNorm::build() .with_reduction_factor(inner_reduction_factor) .on(exec), gko::stop::Iteration::build() @@ -168,9 +182,9 @@ int main(int argc, char *argv[]) A->apply(lend(one), lend(x), lend(neg_one), lend(b)); b->compute_norm2(lend(res_vec)); - std::cout << "Initial residual norm sqrt(r^T r): \n"; + std::cout << "Initial residual norm sqrt(r^T r):\n"; write(std::cout, lend(initres_vec)); - std::cout << "Final residual norm sqrt(r^T r): \n"; + std::cout << "Final residual norm sqrt(r^T r):\n"; write(std::cout, lend(res_vec)); // Print solver statistics diff --git a/examples/mixed-spmv/CMakeLists.txt b/examples/mixed-spmv/CMakeLists.txt new file mode 100644 index 00000000000..044b3a38bb8 --- /dev/null +++ b/examples/mixed-spmv/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.9) +project(mixed-spmv) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + +add_executable(mixed-spmv mixed-spmv.cpp) +target_link_libraries(mixed-spmv Ginkgo::ginkgo) + +# Copy the data files to the execution directory +configure_file(data/A.mtx data/A.mtx COPYONLY) diff --git a/examples/mixed-spmv/build.sh b/examples/mixed-spmv/build.sh new file mode 100755 index 00000000000..3137d5656c1 --- /dev/null +++ b/examples/mixed-spmv/build.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# set up script +if [ $# -ne 1 ]; then + echo -e "Usage: $0 GINKGO_BUILD_DIRECTORY" + exit 1 +fi +BUILD_DIR=$1 +THIS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" &>/dev/null && pwd ) + +source ${THIS_DIR}/../build-setup.sh + +# build +${CXX} -std=c++14 -o ${THIS_DIR}/mixed-spmv ${THIS_DIR}/mixed-spmv.cpp \ + -I${THIS_DIR}/../../include -I${BUILD_DIR}/include \ + -L${THIS_DIR} ${LINK_FLAGS} diff --git a/examples/mixed-spmv/data/A.mtx b/examples/mixed-spmv/data/A.mtx new file mode 100644 index 00000000000..affdafd027d --- /dev/null +++ b/examples/mixed-spmv/data/A.mtx @@ -0,0 +1,68 @@ +%%MatrixMarket matrix coordinate real symmetric +%------------------------------------------------------------------------------- +% UF Sparse Matrix Collection, Tim Davis +% http://www.cise.ufl.edu/research/sparse/matrices/Oberwolfach/LF10 +% name: Oberwolfach/LF10 +% [Oberwolfach: linear 1D beam] +% id: 1438 +% date: 2004 +% author: J. Lienemann, A. Greiner, J. Korvink +% ed: E. Rudnyi +% fields: name title A id notes aux date author ed kind +% aux: M E B C +% kind: model reduction problem +%------------------------------------------------------------------------------- +% notes: +% Primary matrix in this model reduction problem is the Oberwolfach K matrix +%------------------------------------------------------------------------------- +18 18 50 +1 1 3.5344800000000003 +2 1 -477.1548 +3 1 1.7672400000000001 +2 2 171775.728 +4 2 -85887.864 +5 2 477.1548 +3 3 7.068960000000001 +4 3 -477.1548 +5 3 1.7672400000000001 +4 4 171775.728 +6 4 -85887.864 +7 4 477.1548 +5 5 7.068960000000001 +6 5 -477.1548 +7 5 1.7672400000000001 +6 6 171775.728 +8 6 -85887.864 +9 6 477.1548 +7 7 7.068960000000001 +8 7 -477.1548 +9 7 1.7672400000000001 +8 8 171775.728 +10 8 -85887.864 +11 8 477.1548 +9 9 7.068960000000001 +10 9 -477.1548 +11 9 1.7672400000000001 +10 10 171775.728 +12 10 -85887.864 +13 10 477.1548 +11 11 7.068960000000001 +12 11 -477.1548 +13 11 1.7672400000000001 +12 12 171775.728 +14 12 -85887.864 +15 12 477.1548 +13 13 7.068960000000001 +14 13 -477.1548 +15 13 1.7672400000000001 +14 14 171775.728 +16 14 -85887.864 +17 14 477.1548 +15 15 7.068960000000001 +16 15 -477.1548 +17 15 1.7672400000000001 +16 16 171775.728 +18 16 477.1548 +17 17 7.068960000000001 +18 17 1.7672400000000001 +18 18 3.5344800000000003 diff --git a/examples/mixed-spmv/doc/builds-on b/examples/mixed-spmv/doc/builds-on new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/examples/mixed-spmv/doc/builds-on @@ -0,0 +1 @@ + diff --git a/examples/mixed-spmv/doc/intro.dox b/examples/mixed-spmv/doc/intro.dox new file mode 100644 index 00000000000..1e2d4744bb7 --- /dev/null +++ b/examples/mixed-spmv/doc/intro.dox @@ -0,0 +1,19 @@ + +

Introduction

+This mixed spmv example should give the usage of Ginkgo mixed precision. This example is meant for you to understand +how Ginkgo works with different precision of data. We encourage you to play with the code, +change the parameters and see what is best suited for your purposes. + +

About the example

+Each example has the following sections: +
    +
  1. Introduction:This gives an overview of the example and mentions + any interesting aspects in the example that might help the reader. +
  2. The commented program: This section is intended for you to + understand the details of the example so that you can play with it and understand + Ginkgo and its features better. +
  3. Results: This section shows the results of the code when run. Though the + results may not be completely the same, you can expect the behaviour to be similar. +
  4. The plain program: This is the complete code without any comments to have + an complete overview of the code. +
diff --git a/examples/mixed-spmv/doc/kind b/examples/mixed-spmv/doc/kind new file mode 100644 index 00000000000..082f7497da3 --- /dev/null +++ b/examples/mixed-spmv/doc/kind @@ -0,0 +1 @@ +mixed-precision diff --git a/examples/mixed-spmv/doc/results.dox b/examples/mixed-spmv/doc/results.dox new file mode 100644 index 00000000000..60baf566562 --- /dev/null +++ b/examples/mixed-spmv/doc/results.dox @@ -0,0 +1,18 @@ +

Results

+The following is the expected result (omp): + +@code{.cpp} + +High Precision time(s): 9.8980000000e-07 +High Precision result norm: 2.5547848401e+05 +Low Precision time(s): 9.8890000000e-07 +Low Precision relative error: 5.5253439244e-08 +Hp * Lp -> Hp time(s): 1.3829000000e-06 +Hp * Lp -> Hp relative error: 1.6328092846e-08 +Lp * Lp -> Hp time(s): 1.3761000000e-06 +Lp * Lp -> Hp relative error: 2.5540873856e-08 +Lp * Hp -> Hp time(s): 1.3761000000e-06 +Lp * Hp -> Hp relative error: 3.7166469483e-08 +@endcode + +

Comments about programming and debugging

diff --git a/examples/mixed-spmv/doc/short-intro b/examples/mixed-spmv/doc/short-intro new file mode 100644 index 00000000000..daf3b4d36bd --- /dev/null +++ b/examples/mixed-spmv/doc/short-intro @@ -0,0 +1 @@ +The mixed spmv example. diff --git a/examples/mixed-spmv/doc/tooltip b/examples/mixed-spmv/doc/tooltip new file mode 100644 index 00000000000..7db892e5a27 --- /dev/null +++ b/examples/mixed-spmv/doc/tooltip @@ -0,0 +1 @@ +Mixed precision apply. Read a matrix from a file and generate right hand side randomly. diff --git a/examples/mixed-spmv/mixed-spmv.cpp b/examples/mixed-spmv/mixed-spmv.cpp new file mode 100644 index 00000000000..7a6e6a8b542 --- /dev/null +++ b/examples/mixed-spmv/mixed-spmv.cpp @@ -0,0 +1,287 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +// @sect3{Include files} + +// This is the main ginkgo header file. +#include + +// Add the fstream header to read from data from files. +#include +// Add the C++ iostream header to output information to the console. +#include +// Add the STL map header for the executor selection. +#include +// Add the string manipulation header to handle strings. +#include +// Add the timing header for timing. +#include +// Add the random header to generate random vectors. +#include + +namespace { + + +/** + * Generate a random value. + * + * @tparam ValueType valuetype of the value + * @tparam ValueDistribution type of value distribution + * @tparam Engine type of random engine + * + * @param value_dist distribution of array values + * @param engine a random engine + * + * @return ValueType + */ +template +typename std::enable_if::value, ValueType>::type +get_rand_value(ValueDistribution &&value_dist, Engine &&gen) +{ + return value_dist(gen); +} + +/** + * Specialization for complex types. + * + * @copydoc get_rand_value + */ +template +typename std::enable_if::value, ValueType>::type +get_rand_value(ValueDistribution &&value_dist, Engine &&gen) +{ + return ValueType(value_dist(gen), value_dist(gen)); +} + +/** + * timing the apply operation A->apply(b, x). It will runs 2 warmup and get + * average time among 10 times. + * + * @return seconds + */ +double timing(std::shared_ptr exec, + std::shared_ptr A, + std::shared_ptr b, + std::shared_ptr x) +{ + int warmup = 2; + int rep = 10; + for (int i = 0; i < warmup; i++) { + A->apply(lend(b), lend(x)); + } + double total_sec = 0; + for (int i = 0; i < rep; i++) { + // always clone the x in each apply + auto xx = x->clone(); + // synchronize to make sure data is already on device + exec->synchronize(); + auto start = std::chrono::steady_clock::now(); + A->apply(lend(b), lend(xx)); + // synchronize to make sure the operation is done + exec->synchronize(); + auto stop = std::chrono::steady_clock::now(); + // get the duration in seconds + std::chrono::duration duration_time = stop - start; + total_sec += duration_time.count(); + if (i + 1 == rep) { + // copy the result back to x + x->copy_from(lend(xx)); + } + } + + return total_sec / rep; +} + + +} // namespace + + +int main(int argc, char *argv[]) +{ + // Use some shortcuts. In Ginkgo, vectors are seen as a gko::matrix::Dense + // with one column/one row. The advantage of this concept is that using + // multiple vectors is a now a natural extension of adding columns/rows are + // necessary. + using HighPrecision = double; + using RealValueType = gko::remove_complex; + using LowPrecision = float; + using IndexType = int; + using hp_vec = gko::matrix::Dense; + using lp_vec = gko::matrix::Dense; + using real_vec = gko::matrix::Dense; + // The gko::matrix::Ell class is used here, but any other matrix class such + // as gko::matrix::Coo, gko::matrix::Hybrid, gko::matrix::Csr or + // gko::matrix::Sellp could also be used. + // Note. the behavior will depends GINKGO_MIXED_PRECISION flags and the + // actual implementation from different matrices. + using hp_mtx = gko::matrix::Ell; + using lp_mtx = gko::matrix::Ell; + + // Print the ginkgo version information. + std::cout << gko::version_info::get() << std::endl; + + if (argc == 2 && (std::string(argv[1]) == "--help")) { + std::cerr << "Usage: " << argv[0] << " [executor] " << std::endl; + std::exit(-1); + } + + // @sect3{Where do you want to run your operation?} + // The gko::Executor class is one of the cornerstones of Ginkgo. Currently, + // we have support for + // an gko::OmpExecutor, which uses OpenMP multi-threading in most of its + // kernels, a gko::ReferenceExecutor, a single threaded specialization of + // the OpenMP executor and a gko::CudaExecutor which runs the code on a + // NVIDIA GPU if available. + // @note With the help of C++, you see that you only ever need to change the + // executor and all the other functions/ routines within Ginkgo should + // automatically work and run on the executor with any other changes. + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + + // @sect3{Preparing your data and transfer to the proper device.} + // Read the matrix using the @ref read function and set the right hand side + // randomly. + // @note Ginkgo uses C++ smart pointers to automatically manage memory. To + // this end, we use our own object ownership transfer functions that under + // the hood call the required smart pointer functions to manage object + // ownership. The gko::share , gko::give and gko::lend are the functions + // that you would need to use. + + // read the matrix into HighPrecision and LowPrecision. + auto hp_A = share(gko::read(std::ifstream("data/A.mtx"), exec)); + auto lp_A = share(gko::read(std::ifstream("data/A.mtx"), exec)); + // Set the shortcut for each dimension + auto A_dim = hp_A->get_size(); + auto b_dim = gko::dim<2>{A_dim[1], 1}; + auto x_dim = gko::dim<2>{A_dim[0], b_dim[1]}; + auto host_b = hp_vec::create(exec->get_master(), b_dim); + // fill the b vector with some random data + std::ranlux48 rand_engine(32); + auto dist = std::uniform_real_distribution(0.0, 1.0); + for (int i = 0; i < host_b->get_size()[0]; i++) { + host_b->at(i, 0) = get_rand_value(dist, rand_engine); + } + // copy the data from host to device + auto hp_b = share(hp_vec::create(exec)); + auto lp_b = share(lp_vec::create(exec)); + hp_b->copy_from(lend(host_b)); + lp_b->copy_from(lend(hp_b)); + + // create several result x vector in different precision + auto hp_x = share(hp_vec::create(exec, x_dim)); + auto lp_x = share(lp_vec::create(exec, x_dim)); + auto hplp_x = share(hp_x->clone()); + auto lplp_x = share(hp_x->clone()); + auto lphp_x = share(hp_x->clone()); + + // @sect3{Measure the time of apply} + // We measure the time among different combination of apply operation. + + // Hp * Hp -> Hp + auto hp_sec = timing(exec, hp_A, hp_b, hp_x); + // Lp * Lp -> Lp + auto lp_sec = timing(exec, lp_A, lp_b, lp_x); + // Hp * Lp -> Hp + auto hplp_sec = timing(exec, hp_A, lp_b, hplp_x); + // Lp * Lp -> Hp + auto lplp_sec = timing(exec, lp_A, lp_b, lplp_x); + // Lp * Hp -> Hp + auto lphp_sec = timing(exec, lp_A, hp_b, lphp_x); + + + // To measure error of result. + // neg_one is an object that represent the number -1.0 which allows for a + // uniform interface when computing on any device. To compute the residual, + // all you need to do is call the add_scaled method, which in this case is + // an axpy and equivalent to the LAPACK axpy routine. Finally, you compute + // the euclidean 2-norm with the compute_norm2 function. + auto neg_one = gko::initialize({-1.0}, exec); + auto hp_x_norm = gko::initialize({0.0}, exec->get_master()); + auto lp_diff_norm = gko::initialize({0.0}, exec->get_master()); + auto hplp_diff_norm = gko::initialize({0.0}, exec->get_master()); + auto lplp_diff_norm = gko::initialize({0.0}, exec->get_master()); + auto lphp_diff_norm = gko::initialize({0.0}, exec->get_master()); + auto lp_diff = hp_x->clone(); + auto hplp_diff = hp_x->clone(); + auto lplp_diff = hp_x->clone(); + auto lphp_diff = hp_x->clone(); + + hp_x->compute_norm2(lend(hp_x_norm)); + lp_diff->add_scaled(lend(neg_one), lend(lp_x)); + lp_diff->compute_norm2(lend(lp_diff_norm)); + hplp_diff->add_scaled(lend(neg_one), lend(hplp_x)); + hplp_diff->compute_norm2(lend(hplp_diff_norm)); + lplp_diff->add_scaled(lend(neg_one), lend(lplp_x)); + lplp_diff->compute_norm2(lend(lplp_diff_norm)); + lphp_diff->add_scaled(lend(neg_one), lend(lphp_x)); + lphp_diff->compute_norm2(lend(lphp_diff_norm)); + exec->synchronize(); + + std::cout.precision(10); + std::cout << std::scientific; + std::cout << "High Precision time(s): " << hp_sec << std::endl; + std::cout << "High Precision result norm: " << hp_x_norm->at(0) + << std::endl; + std::cout << "Low Precision time(s): " << lp_sec << std::endl; + std::cout << "Low Precision relative error: " + << lp_diff_norm->at(0) / hp_x_norm->at(0) << "\n"; + std::cout << "Hp * Lp -> Hp time(s): " << hplp_sec << std::endl; + std::cout << "Hp * Lp -> Hp relative error: " + << hplp_diff_norm->at(0) / hp_x_norm->at(0) << "\n"; + std::cout << "Lp * Lp -> Hp time(s): " << lplp_sec << std::endl; + std::cout << "Lp * Lp -> Hp relative error: " + << lplp_diff_norm->at(0) / hp_x_norm->at(0) << "\n"; + std::cout << "Lp * Hp -> Hp time(s): " << lplp_sec << std::endl; + std::cout << "Lp * Hp -> Hp relative error: " + << lphp_diff_norm->at(0) / hp_x_norm->at(0) << "\n"; +} diff --git a/examples/nine-pt-stencil-solver/CMakeLists.txt b/examples/nine-pt-stencil-solver/CMakeLists.txt index 9902887e5c5..b9a3fc8421e 100644 --- a/examples/nine-pt-stencil-solver/CMakeLists.txt +++ b/examples/nine-pt-stencil-solver/CMakeLists.txt @@ -1,3 +1,10 @@ +cmake_minimum_required(VERSION 3.9) +project(nine-pt-stencil-solver) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(nine-pt-stencil-solver nine-pt-stencil-solver.cpp) -target_link_libraries(nine-pt-stencil-solver ginkgo) -target_include_directories(nine-pt-stencil-solver PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(nine-pt-stencil-solver Ginkgo::ginkgo) diff --git a/examples/nine-pt-stencil-solver/doc/results.dox b/examples/nine-pt-stencil-solver/doc/results.dox index 352c92c070a..f9973a3d79e 100644 --- a/examples/nine-pt-stencil-solver/doc/results.dox +++ b/examples/nine-pt-stencil-solver/doc/results.dox @@ -1,22 +1,10 @@

Results

- -The expected output of the relative error at K=10 should be +The expected output should be @code{.cpp} -0.00150263 0.00676184 0.0210368 0.0488355 0.0946657 0.163035 0.258452 0.385425 0.54846 0.752066 -0.00676184 0.012021 0.026296 0.0540947 0.0999249 0.168295 0.263712 0.390684 0.553719 0.757325 -0.0210368 0.026296 0.040571 0.0683697 0.1142 0.18257 0.277987 0.404959 0.567994 0.7716 -0.0488354 0.0540947 0.0683697 0.0961683 0.141998 0.210368 0.305785 0.432757 0.595793 0.799399 -0.0946656 0.0999248 0.1142 0.141998 0.187829 0.256198 0.351615 0.478588 0.641623 0.845229 -0.163035 0.168295 0.182569 0.210368 0.256198 0.324568 0.419985 0.546957 0.709993 0.913599 -0.258452 0.263711 0.277987 0.305785 0.351615 0.419985 0.515402 0.642374 0.80541 1.00902 -0.385424 0.390684 0.404959 0.432757 0.478588 0.546957 0.642374 0.769346 0.932382 1.13599 -0.54846 0.553719 0.567994 0.595793 0.641623 0.709992 0.805409 0.932382 1.09542 1.29902 -0.752066 0.757325 0.7716 0.799399 0.845229 0.913599 1.00902 1.13599 1.29902 1.50263 - -The average relative error is 1.4283e-07 -The runtime is 5.784994 ms +The average relative error is 6.35715e-06 +The runtime is 167.320520 ms @endcode diff --git a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp index 9f573ac79d3..cf3d360c11f 100644 --- a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp +++ b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -238,6 +238,11 @@ void solve_system(const std::string &executor_string, return gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; // executor where Ginkgo will perform the computation @@ -280,7 +285,7 @@ void solve_system(const std::string &executor_string, cg::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(dp_2).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) .with_preconditioner(bj::build().on(exec)) @@ -294,17 +299,23 @@ void solve_system(const std::string &executor_string, int main(int argc, char *argv[]) { - if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " DISCRETIZATION_POINTS [executor]" - << " [stencil_alpha] [stencil_beta] [stencil_gamma]" - << std::endl; - std::exit(-1); - } using ValueType = double; using IndexType = int; - const int discretization_points = argc >= 2 ? std::atoi(argv[1]) : 100; - const auto executor_string = argc >= 3 ? argv[2] : "reference"; + // Print version information + std::cout << gko::version_info::get() << std::endl; + + if (argc == 2 && std::string(argv[1]) == "--help") { + std::cerr + << "Usage: " << argv[0] + << " [executor] [DISCRETIZATION_POINTS] [alpha] [beta] [gamma]" + << std::endl; + std::exit(-1); + } + + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + const IndexType discretization_points = + argc >= 3 ? std::atoi(argv[2]) : 100; const ValueType alpha_c = argc >= 4 ? std::atof(argv[3]) : default_alpha; const ValueType beta_c = argc >= 5 ? std::atof(argv[4]) : default_beta; const ValueType gamma_c = argc >= 6 ? std::atof(argv[5]) : default_gamma; @@ -312,7 +323,7 @@ int main(int argc, char *argv[]) // clang-format off std::array coefs{ gamma_c, beta_c, gamma_c, - beta_c, alpha_c, beta_c, + beta_c, alpha_c, beta_c, gamma_c, beta_c, gamma_c}; // clang-format on @@ -354,7 +365,8 @@ int main(int argc, char *argv[]) .count()) * 1e-6; - print_solution(dp, u.data()); + // Uncomment to print the solution + // print_solution(dp, u.data()); std::cout << "The average relative error is " << calculate_error(dp, u.data(), correct_u) / static_cast>(dp_2) diff --git a/examples/papi-logging/CMakeLists.txt b/examples/papi-logging/CMakeLists.txt index c378a0bd543..d7db0ef89fa 100644 --- a/examples/papi-logging/CMakeLists.txt +++ b/examples/papi-logging/CMakeLists.txt @@ -1,8 +1,19 @@ -if (GINKGO_HAVE_PAPI_SDE) - add_executable(papi-logging papi-logging.cpp) - target_link_libraries(papi-logging ginkgo PAPI::PAPI) - target_include_directories(papi-logging PRIVATE ${PROJECT_SOURCE_DIR}) - configure_file(data/A.mtx data/A.mtx COPYONLY) - configure_file(data/b.mtx data/b.mtx COPYONLY) - configure_file(data/x0.mtx data/x0.mtx COPYONLY) +cmake_minimum_required(VERSION 3.9) +project(papi-logging) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) endif() + +if (NOT GINKGO_HAVE_PAPI_SDE) + message(FATAL_ERROR "This example needs Ginkgo built with PAPI support") +endif() + +add_executable(papi-logging papi-logging.cpp) +target_link_libraries(papi-logging ginkgo PAPI::PAPI) + +# Copy the data files to the execution directory +configure_file(data/A.mtx data/A.mtx COPYONLY) +configure_file(data/b.mtx data/b.mtx COPYONLY) +configure_file(data/x0.mtx data/x0.mtx COPYONLY) diff --git a/examples/papi-logging/papi-logging.cpp b/examples/papi-logging/papi-logging.cpp index 5d8019cc519..6aff2772980 100644 --- a/examples/papi-logging/papi-logging.cpp +++ b/examples/papi-logging/papi-logging.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -138,23 +139,36 @@ int main(int argc, char *argv[]) // Print version information std::cout << gko::version_info::get() << std::endl; - // Figure out where to run the code - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { + if (argc == 2 && (std::string(argv[1]) == "--help")) { std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } + // Figure out where to run the code + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + // Read data auto A = share(gko::read(std::ifstream("data/A.mtx"), exec)); auto b = gko::read(std::ifstream("data/b.mtx"), exec); @@ -166,7 +180,7 @@ int main(int argc, char *argv[]) cg::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(20u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) .on(exec); diff --git a/examples/par-ilu-convergence/CMakeLists.txt b/examples/par-ilu-convergence/CMakeLists.txt new file mode 100644 index 00000000000..b5e0c8492e5 --- /dev/null +++ b/examples/par-ilu-convergence/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.9) +project(par-ilu-convergence) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + +add_executable(par-ilu-convergence par-ilu-convergence.cpp) +target_link_libraries(par-ilu-convergence Ginkgo::ginkgo) + +# Copy the data files to the execution directory +configure_file(data/A.mtx data/A.mtx COPYONLY) diff --git a/examples/par-ilu-convergence/build.sh b/examples/par-ilu-convergence/build.sh new file mode 100755 index 00000000000..6af53f00387 --- /dev/null +++ b/examples/par-ilu-convergence/build.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# set up script +if [ $# -ne 1 ]; then + echo -e "Usage: $0 GINKGO_BUILD_DIRECTORY" + exit 1 +fi +BUILD_DIR=$1 +THIS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" &>/dev/null && pwd ) + +source ${THIS_DIR}/../build-setup.sh + +# build +${CXX} -std=c++14 -o ${THIS_DIR}/par-ilu-convergence \ + ${THIS_DIR}/par-ilu-convergence.cpp \ + -I${THIS_DIR}/../../include -I${BUILD_DIR}/include \ + -L${THIS_DIR} ${LINK_FLAGS} diff --git a/examples/par-ilu-convergence/data/A.mtx b/examples/par-ilu-convergence/data/A.mtx new file mode 100644 index 00000000000..c67437da567 --- /dev/null +++ b/examples/par-ilu-convergence/data/A.mtx @@ -0,0 +1,114 @@ +%%MatrixMarket matrix coordinate integer symmetric +%------------------------------------------------------------------------------- +% UF Sparse Matrix Collection, Tim Davis +% http://www.cise.ufl.edu/research/sparse/matrices/JGD_Trefethen/Trefethen_20b +% name: JGD_Trefethen/Trefethen_20b +% [Diagonal matrices with primes, Nick Trefethen, Oxford Univ.] +% id: 2203 +% date: 2008 +% author: N. Trefethen +% ed: J.-G. Dumas +% fields: name title A id date author ed kind notes +% kind: combinatorial problem +%------------------------------------------------------------------------------- +% notes: +% Diagonal matrices with primes, Nick Trefethen, Oxford Univ. +% From Jean-Guillaume Dumas' Sparse Integer Matrix Collection, +% http://ljk.imag.fr/membres/Jean-Guillaume.Dumas/simc.html +% +% Problem 7 of the Hundred-dollar, Hundred-digit Challenge Problems, +% SIAM News, vol 35, no. 1. +% +% 7. Let A be the 20,000 x 20,000 matrix whose entries are zero +% everywhere except for the primes 2, 3, 5, 7, . . . , 224737 along the +% main diagonal and the number 1 in all the positions A(i,j) with +% |i-j| = 1,2,4,8, . . . ,16384. What is the (1,1) entry of inv(A)? +% +% http://www.siam.org/news/news.php?id=388 +% +% Filename in JGD collection: Trefethen/trefethen_20__19_minor.sms +%------------------------------------------------------------------------------- +19 19 83 +1 1 3 +2 1 1 +3 1 1 +5 1 1 +9 1 1 +17 1 1 +2 2 5 +3 2 1 +4 2 1 +6 2 1 +10 2 1 +18 2 1 +3 3 7 +4 3 1 +5 3 1 +7 3 1 +11 3 1 +19 3 1 +4 4 11 +5 4 1 +6 4 1 +8 4 1 +12 4 1 +5 5 13 +6 5 1 +7 5 1 +9 5 1 +13 5 1 +6 6 17 +7 6 1 +8 6 1 +10 6 1 +14 6 1 +7 7 19 +8 7 1 +9 7 1 +11 7 1 +15 7 1 +8 8 23 +9 8 1 +10 8 1 +12 8 1 +16 8 1 +9 9 29 +10 9 1 +11 9 1 +13 9 1 +17 9 1 +10 10 31 +11 10 1 +12 10 1 +14 10 1 +18 10 1 +11 11 37 +12 11 1 +13 11 1 +15 11 1 +19 11 1 +12 12 41 +13 12 1 +14 12 1 +16 12 1 +13 13 43 +14 13 1 +15 13 1 +17 13 1 +14 14 47 +15 14 1 +16 14 1 +18 14 1 +15 15 53 +16 15 1 +17 15 1 +19 15 1 +16 16 59 +17 16 1 +18 16 1 +17 17 61 +18 17 1 +19 17 1 +18 18 67 +19 18 1 +19 19 71 diff --git a/examples/par-ilu-convergence/doc/builds-on b/examples/par-ilu-convergence/doc/builds-on new file mode 100644 index 00000000000..369aa997770 --- /dev/null +++ b/examples/par-ilu-convergence/doc/builds-on @@ -0,0 +1 @@ +simple-solver diff --git a/examples/par-ilu-convergence/doc/intro.dox b/examples/par-ilu-convergence/doc/intro.dox new file mode 100644 index 00000000000..adc036224d4 --- /dev/null +++ b/examples/par-ilu-convergence/doc/intro.dox @@ -0,0 +1,7 @@ + +

Introduction

+ +

About the example

+ +This example can be used to inspect the convergence behavior of parallel +incomplete factorizations. \ No newline at end of file diff --git a/examples/par-ilu-convergence/doc/kind b/examples/par-ilu-convergence/doc/kind new file mode 100644 index 00000000000..53a96d5771f --- /dev/null +++ b/examples/par-ilu-convergence/doc/kind @@ -0,0 +1 @@ +preconditioners diff --git a/examples/par-ilu-convergence/doc/results.dox b/examples/par-ilu-convergence/doc/results.dox new file mode 100644 index 00000000000..e0456cc9d2a --- /dev/null +++ b/examples/par-ilu-convergence/doc/results.dox @@ -0,0 +1,28 @@ +

Results

+This is the expected output: + +@code{.cpp} + +Usage: executable [] [] [] [fill-in-limit] + +@endcode + +When specifying an executor: + +@code{.cpp} + +Reading data/A.mtx +1;71800;10300;8800;8200;8000;7700;7500;7500;7500;7400;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14; +2;15500;9100;13500;9000;8600;8800;8700;8600;8600;8500;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16; +3;16500;10200;10100;10100;9900;10000;9800;9800;9900;9900;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16; +4;17500;11500;11200;15600;11300;11200;11400;11200;11200;11100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16; +5;18800;12800;12700;12600;12500;12400;12400;12400;12400;14100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16; +6;19200;13400;23100;15400;13200;13000;13000;13000;13100;13000;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16; +7;20500;14500;14400;14200;14200;14300;14200;14100;14300;14200;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16; +8;21600;15700;86200;16300;15700;15600;15500;15400;15500;15600;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16; +9;22700;17000;16700;16600;16700;16800;20400;17400;17500;17400;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16; +10;25500;19000;18800;18700;18700;18800;18600;18700;18600;18700;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16; + +@endcode + +

Comments about programming and debugging

diff --git a/examples/par-ilu-convergence/doc/short-intro b/examples/par-ilu-convergence/doc/short-intro new file mode 100644 index 00000000000..c8f8281379e --- /dev/null +++ b/examples/par-ilu-convergence/doc/short-intro @@ -0,0 +1 @@ +The ParILU convergence example. diff --git a/examples/par-ilu-convergence/doc/tooltip b/examples/par-ilu-convergence/doc/tooltip new file mode 100644 index 00000000000..0f788aaa603 --- /dev/null +++ b/examples/par-ilu-convergence/doc/tooltip @@ -0,0 +1 @@ +Compute runtimes and residuals for different ParILU(T)/IC(T) iteration counts. diff --git a/examples/par-ilu-convergence/par-ilu-convergence.cpp b/examples/par-ilu-convergence/par-ilu-convergence.cpp new file mode 100644 index 00000000000..b72ea5e667a --- /dev/null +++ b/examples/par-ilu-convergence/par-ilu-convergence.cpp @@ -0,0 +1,209 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +#include + + +#include +#include +#include +#include +#include +#include + + +const std::map()>> + executors{ + {"reference", [] { return gko::ReferenceExecutor::create(); }}, + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create()); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); + }}, + {"dpcpp", [] { + return gko::DpcppExecutor::create(0, gko::OmpExecutor::create()); + }}}; + + +template +auto try_generate(Function fun) -> decltype(fun()) +{ + decltype(fun()) result; + try { + result = fun(); + } catch (const gko::Error &err) { + std::cerr << "Error: " << err.what() << '\n'; + std::exit(-1); + } + return result; +} + + +template +double compute_ilu_residual_norm( + const gko::matrix::Csr *residual, + const gko::matrix::Csr *mtx) +{ + gko::matrix_data residual_data; + gko::matrix_data mtx_data; + residual->write(residual_data); + mtx->write(mtx_data); + residual_data.ensure_row_major_order(); + mtx_data.ensure_row_major_order(); + auto it = mtx_data.nonzeros.begin(); + double residual_norm{}; + for (auto entry : residual_data.nonzeros) { + auto ref_row = it->row; + auto ref_col = it->column; + if (entry.row == ref_row && entry.column == ref_col) { + residual_norm += gko::squared_norm(entry.value); + ++it; + } + } + return std::sqrt(residual_norm); +} + + +int main(int argc, char *argv[]) +{ + using ValueType = double; + using IndexType = int; + + // print usage message + if (argc < 2 || executors.find(argv[1]) == executors.end()) { + std::cerr << "Usage: executable" + << " [] " + "[] " + "[] []\n"; + return -1; + } + + // generate executor based on first argument + auto exec = try_generate([&] { return executors.at(argv[1])(); }); + + // set matrix and preconditioner name with default values + std::string matrix = argc < 3 ? "data/A.mtx" : argv[2]; + std::string precond = argc < 4 ? "parilu" : argv[3]; + int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]); + int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]); + double limit = argc < 7 ? 2 : std::stod(argv[6]); + + // load matrix file into Csr format + auto mtx = gko::share(try_generate([&] { + std::ifstream mtx_stream{matrix}; + if (!mtx_stream) { + throw GKO_STREAM_ERROR("Unable to open matrix file"); + } + std::cerr << "Reading " << matrix << std::endl; + return gko::read>(mtx_stream, + exec); + })); + + std::shared_ptr factory; + std::function set_iterations; + if (precond == "parilu") { + factory = + gko::factorization::ParIlu::build().on(exec); + set_iterations = [&](int it) { + gko::as::Factory>( + factory) + ->get_parameters() + .iterations = it; + }; + } else if (precond == "paric") { + factory = + gko::factorization::ParIc::build().on(exec); + set_iterations = [&](int it) { + gko::as::Factory>( + factory) + ->get_parameters() + .iterations = it; + }; + } else if (precond == "parilut") { + factory = gko::factorization::ParIlut::build() + .with_fill_in_limit(limit) + .on(exec); + set_iterations = [&](int it) { + gko::as::Factory>( + factory) + ->get_parameters() + .iterations = it; + }; + } else if (precond == "parict") { + factory = gko::factorization::ParIct::build() + .with_fill_in_limit(limit) + .on(exec); + set_iterations = [&](int it) { + gko::as::Factory>( + factory) + ->get_parameters() + .iterations = it; + }; + } + auto one = gko::initialize>({1.0}, exec); + auto minus_one = + gko::initialize>({-1.0}, exec); + for (int it = 1; it <= max_iterations; ++it) { + set_iterations(it); + std::cout << it << ';'; + std::vector times; + std::vector residuals; + for (int rep = 0; rep < num_repetitions; ++rep) { + auto tic = std::chrono::high_resolution_clock::now(); + auto result = + gko::as>(factory->generate(mtx)); + exec->synchronize(); + auto toc = std::chrono::high_resolution_clock::now(); + auto residual = gko::clone(exec, mtx); + result->get_operators()[0]->apply(lend(one), + lend(result->get_operators()[1]), + lend(minus_one), lend(residual)); + times.push_back( + std::chrono::duration_cast(toc - tic) + .count()); + residuals.push_back( + compute_ilu_residual_norm(lend(residual), lend(mtx))); + } + for (auto el : times) { + std::cout << el << ';'; + } + for (auto el : residuals) { + std::cout << el << ';'; + } + std::cout << '\n'; + } +} diff --git a/examples/performance-debugging/CMakeLists.txt b/examples/performance-debugging/CMakeLists.txt index f44e29ad39f..97cf82cb0ff 100644 --- a/examples/performance-debugging/CMakeLists.txt +++ b/examples/performance-debugging/CMakeLists.txt @@ -1,4 +1,13 @@ +cmake_minimum_required(VERSION 3.9) +project(performance-debugging) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(performance-debugging performance-debugging.cpp) -target_link_libraries(performance-debugging ginkgo) -target_include_directories(performance-debugging PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(performance-debugging Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) diff --git a/examples/performance-debugging/performance-debugging.cpp b/examples/performance-debugging/performance-debugging.cpp index 573346531cf..902062c06fd 100644 --- a/examples/performance-debugging/performance-debugging.cpp +++ b/examples/performance-debugging/performance-debugging.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -369,18 +369,36 @@ int main(int argc, char *argv[]) std::cout << gko::version_info::get() << std::endl; // Figure out where to run the code - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc > 1 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc > 1 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else { - print_usage(argv[0]); + if (argc == 2 && (std::string(argv[1]) == "--help")) { + std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; + std::exit(-1); } + // Figure out where to run the code + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + // Read the input matrix file directory std::string input_mtx = "data/A.mtx"; if (argc == 3) { @@ -408,7 +426,7 @@ int main(int argc, char *argv[]) auto solver_factory = solver::build() .with_criteria( - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec), gko::stop::Iteration::build().with_max_iters(max_iters).on( diff --git a/examples/poisson-solver/CMakeLists.txt b/examples/poisson-solver/CMakeLists.txt index 945808cc4c6..fcbc831a6e5 100644 --- a/examples/poisson-solver/CMakeLists.txt +++ b/examples/poisson-solver/CMakeLists.txt @@ -1,3 +1,10 @@ +cmake_minimum_required(VERSION 3.9) +project(poisson-solver) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(poisson-solver poisson-solver.cpp) -target_link_libraries(poisson-solver ginkgo) -target_include_directories(poisson-solver PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(poisson-solver Ginkgo::ginkgo) diff --git a/examples/poisson-solver/doc/results.dox b/examples/poisson-solver/doc/results.dox index 5e093c1ac2f..7d123881371 100644 --- a/examples/poisson-solver/doc/results.dox +++ b/examples/poisson-solver/doc/results.dox @@ -3,29 +3,8 @@ This is the expected output: @code{.cpp} -0 -0.00010798 -0.000863838 -0.00291545 -0.0069107 -0.0134975 -0.0233236 -0.037037 -0.0552856 -0.0787172 -0.10798 -0.143721 -0.186589 -0.237231 -0.296296 -0.364431 -0.442285 -0.530504 -0.629738 -0.740633 -0.863838 -1 -The average relative error is 1.87318e-15 +Solve complete. +The average relative error is 2.52236e-11 @endcode diff --git a/examples/poisson-solver/poisson-solver.cpp b/examples/poisson-solver/poisson-solver.cpp index 9124b13e352..2d04a84a7d3 100644 --- a/examples/poisson-solver/poisson-solver.cpp +++ b/examples/poisson-solver/poisson-solver.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -122,16 +122,19 @@ int main(int argc, char *argv[]) using cg = gko::solver::Cg; using bj = gko::preconditioner::Jacobi; - if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " DISCRETIZATION_POINTS [executor]" - << std::endl; + // Print version information + std::cout << gko::version_info::get() << std::endl; + + if (argc == 2 && (std::string(argv[1]) == "--help")) { + std::cerr << "Usage: " << argv[0] + << " [executor] [DISCRETIZATION_POINTS]" << std::endl; std::exit(-1); } // Get number of discretization points + const auto executor_string = argc >= 2 ? argv[1] : "reference"; const unsigned int discretization_points = - argc >= 2 ? std::atoi(argv[1]) : 100; - const auto executor_string = argc >= 3 ? argv[2] : "reference"; + argc >= 3 ? std::atoi(argv[2]) : 100; // Figure out where to run the code std::map()>> @@ -147,6 +150,11 @@ int main(int argc, char *argv[]) return gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; // executor where Ginkgo will perform the computation @@ -177,7 +185,7 @@ int main(int argc, char *argv[]) .with_criteria(gko::stop::Iteration::build() .with_max_iters(discretization_points) .on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) .with_preconditioner(bj::build().on(exec)) @@ -185,8 +193,9 @@ int main(int argc, char *argv[]) ->generate(clone(exec, matrix)) // copy the matrix to the executor ->apply(lend(rhs), lend(u)); - print_solution(u0, u1, lend(u)); - std::cout << "The average relative error is " + // Uncomment to print the solution + // print_solution(u0, u1, lend(u)); + std::cout << "Solve complete.\nThe average relative error is " << calculate_error(discretization_points, lend(u), correct_u) / static_cast>( discretization_points) diff --git a/examples/preconditioned-solver/CMakeLists.txt b/examples/preconditioned-solver/CMakeLists.txt index 0bcbf2eeced..bd2fcd3cb60 100644 --- a/examples/preconditioned-solver/CMakeLists.txt +++ b/examples/preconditioned-solver/CMakeLists.txt @@ -1,6 +1,14 @@ +cmake_minimum_required(VERSION 3.9) +project(preconditioned-solver) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() add_executable(preconditioned-solver preconditioned-solver.cpp) -target_link_libraries(preconditioned-solver ginkgo) -target_include_directories(preconditioned-solver PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(preconditioned-solver Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) configure_file(data/b.mtx data/b.mtx COPYONLY) configure_file(data/x0.mtx data/x0.mtx COPYONLY) diff --git a/examples/preconditioned-solver/preconditioned-solver.cpp b/examples/preconditioned-solver/preconditioned-solver.cpp index 142614c73b0..ce01f0c0cc5 100644 --- a/examples/preconditioned-solver/preconditioned-solver.cpp +++ b/examples/preconditioned-solver/preconditioned-solver.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include @@ -56,22 +57,36 @@ int main(int argc, char *argv[]) std::cout << gko::version_info::get() << std::endl; // Figure out where to run the code - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { + if (argc == 2 && (std::string(argv[1]) == "--help")) { std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } + // Figure out where to run the code + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + // Read data auto A = share(gko::read(std::ifstream("data/A.mtx"), exec)); auto b = gko::read(std::ifstream("data/b.mtx"), exec); @@ -83,7 +98,7 @@ int main(int argc, char *argv[]) cg::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(20u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) // Add preconditioner, these 2 lines are the only @@ -97,7 +112,7 @@ int main(int argc, char *argv[]) solver->apply(lend(b), lend(x)); // Print solution - std::cout << "Solution (x): \n"; + std::cout << "Solution (x):\n"; write(std::cout, lend(x)); // Calculate residual @@ -107,6 +122,6 @@ int main(int argc, char *argv[]) A->apply(lend(one), lend(x), lend(neg_one), lend(b)); b->compute_norm2(lend(res)); - std::cout << "Residual norm sqrt(r^T r): \n"; + std::cout << "Residual norm sqrt(r^T r):\n"; write(std::cout, lend(res)); } diff --git a/examples/preconditioner-export/CMakeLists.txt b/examples/preconditioner-export/CMakeLists.txt index 3e35167bc9e..8195bf8a969 100644 --- a/examples/preconditioner-export/CMakeLists.txt +++ b/examples/preconditioner-export/CMakeLists.txt @@ -1,4 +1,13 @@ +cmake_minimum_required(VERSION 3.9) +project(preconditioner-export) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(preconditioner-export preconditioner-export.cpp) -target_link_libraries(preconditioner-export ginkgo) -target_include_directories(preconditioner-export PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(preconditioner-export Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) diff --git a/examples/preconditioner-export/doc/results.dox b/examples/preconditioner-export/doc/results.dox index 66e257949ed..6287655ea57 100644 --- a/examples/preconditioner-export/doc/results.dox +++ b/examples/preconditioner-export/doc/results.dox @@ -3,7 +3,7 @@ This is the expected output: @code{.cpp} -Usage: ./preconditioner-export [] [] +Usage: ./preconditioner-export [] [] Jacobi parameters: [] [] [] ParILU parameters: [] ParILUT parameters: [] [] diff --git a/examples/preconditioner-export/preconditioner-export.cpp b/examples/preconditioner-export/preconditioner-export.cpp index 866b0e051f9..6d048e0e9d9 100644 --- a/examples/preconditioner-export/preconditioner-export.cpp +++ b/examples/preconditioner-export/preconditioner-export.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -49,9 +50,14 @@ const std::map()>> return gko::CudaExecutor::create( 0, gko::ReferenceExecutor::create()); }}, - {"hip", [] { + {"hip", + [] { return gko::HipExecutor::create( 0, gko::ReferenceExecutor::create()); + }}, + {"dpcpp", [] { + return gko::DpcppExecutor::create( + 0, gko::ReferenceExecutor::create()); }}}; @@ -81,8 +87,8 @@ int main(int argc, char *argv[]) { // print usage message if (argc < 2 || executors.find(argv[1]) == executors.end()) { - std::cerr << "Usage: " << argv[0] - << " [] " + std::cerr << "Usage: executable" + << " [] " "[]\n"; std::cerr << "Jacobi parameters: [] [] " diff --git a/examples/simple-solver-logging/CMakeLists.txt b/examples/simple-solver-logging/CMakeLists.txt index f28413db303..1d5e75912e0 100644 --- a/examples/simple-solver-logging/CMakeLists.txt +++ b/examples/simple-solver-logging/CMakeLists.txt @@ -1,6 +1,15 @@ +cmake_minimum_required(VERSION 3.9) +project(simple-solver-logging) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(simple-solver-logging simple-solver-logging.cpp) -target_link_libraries(simple-solver-logging ginkgo) -target_include_directories(simple-solver-logging PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(simple-solver-logging Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) configure_file(data/b.mtx data/b.mtx COPYONLY) configure_file(data/x0.mtx data/x0.mtx COPYONLY) diff --git a/examples/simple-solver-logging/doc/results.dox b/examples/simple-solver-logging/doc/results.dox index e77175e5f46..c7cf3f0aee8 100644 --- a/examples/simple-solver-logging/doc/results.dox +++ b/examples/simple-solver-logging/doc/results.dox @@ -3,96 +3,679 @@ This is the expected output: @code{.cpp} -[LOG] >>> apply started on A LinOp[gko::solver::Cg,0x562525b9cad0] with b LinOp[gko::matrix::Dense,0x562525b9d670] and x LinOp[gko::matrix::Dense,0x562525b9dca0] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9c350] with Bytes[8] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9c0f0] with Bytes[8] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[152] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2a30] with Bytes[152] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[152] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2c30] with Bytes[152] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[152] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2e30] with Bytes[152] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[152] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3030] with Bytes[152] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3010] with Bytes[8] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3210] with Bytes[8] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3390] with Bytes[8] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3510] with Bytes[8] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[1] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3690] with Bytes[1] -[LOG] >>> Operation[gko::solver::cg::initialize_operation const*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7fffc24b0d30] started on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::solver::cg::initialize_operation const*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7fffc24b0d30] completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::matrix::csr::advanced_spmv_operation const*, gko::matrix::Csr const*, gko::matrix::Dense const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7fffc24b0a80] started on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::matrix::csr::advanced_spmv_operation const*, gko::matrix::Csr const*, gko::matrix::Dense const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7fffc24b0a80] completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[2] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba36d0] with Bytes[2] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba4360] with Bytes[8] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba43a0] with Bytes[8] -[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7fffc24b0710] started on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7fffc24b0710] completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x562525b99ec0] to Executor[gko::ReferenceExecutor,0x562525b99ec0] from Location[0x562525ba2a30] to Location[0x562525ba2c30] with Bytes[152] -[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x562525b99ec0] to Executor[gko::ReferenceExecutor,0x562525b99ec0] from Location[0x562525ba2a30] to Location[0x562525ba2c30] with Bytes[152] -[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7fffc24b0a80] started on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7fffc24b0a80] completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> iteration 0 completed with solver LinOp[gko::solver::Cg,0x562525b9cad0] with residual LinOp[gko::matrix::Dense,0x562525b9e470], solution LinOp[gko::matrix::Dense,0x562525b9dca0] and residual_norm LinOp[gko::LinOp const*,0] -[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNormReduction,0x562525ba42a0] at iteration 0 with ID 1 and finalized set to 1 -[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7fffc24b0800] started on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7fffc24b0800] completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7fffc24b0a00] started on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7fffc24b0a00] completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNormReduction,0x562525ba42a0] at iteration 0 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[152] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba4790] with Bytes[152] - -. -. -. -. -. -. - -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba4830] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba4830] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba4790] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba4790] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba36d0] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba36d0] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba43a0] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba43a0] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba4360] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba4360] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3690] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3690] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3390] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3390] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3510] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3510] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3210] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3210] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3010] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3010] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3030] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba3030] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2e30] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2e30] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2c30] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2c30] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2a30] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2a30] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9c0f0] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9c0f0] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9c350] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9c350] -[LOG] >>> apply completed on A LinOp[gko::solver::Cg,0x562525b9cad0] with b LinOp[gko::matrix::Dense,0x562525b9d670] and x LinOp[gko::matrix::Dense,0x562525b9dca0] -Last memory copied was of size 98 FROM executor 0x562525b99ec0 pointer 562525b9de80 TO executor 0x562525b99ec0 pointer 562525ba5170 +[LOG] >>> apply started on A LinOp[gko::solver::Cg,0x2142d60] with b LinOp[gko::matrix::Dense,0x2142140] and x LinOp[gko::matrix::Dense,0x2143450] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2142280] with Bytes[8] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143410] with Bytes[8] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21480a0] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21482f0] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21484d0] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21486b0] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148010] with Bytes[8] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148a60] with Bytes[8] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21482b0] with Bytes[8] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148a40] with Bytes[8] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[1] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2147c90] with Bytes[1] +[LOG] >>> Operation[gko::solver::cg::initialize_operation const*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::initialize_operation const*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::advanced_spmv_operation const*, gko::matrix::Csr const*, gko::matrix::Dense const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14aa0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::advanced_spmv_operation const*, gko::matrix::Csr const*, gko::matrix::Dense const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14aa0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[2] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148ee0] with Bytes[2] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148e50] with Bytes[8] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2147ce0] with Bytes[8] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14a20] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14a20] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 0 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 0 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 0 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149550] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149550] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149550] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149730] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149730] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149730] with Bytes[152] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 1 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 1 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 1 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149980] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149980] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149980] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149b80] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149b80] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149b80] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149730] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149730] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149550] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149550] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 2 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 2 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 2 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149290] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149290] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149290] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149690] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149690] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149690] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149b80] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149b80] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149980] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149980] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 3 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 3 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 3 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149890] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149890] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149890] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149ae0] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149ae0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149ae0] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149690] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149690] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149290] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149290] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 4 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 4 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 4 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149200] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149200] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149200] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149310] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149310] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149310] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149ae0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149ae0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149890] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149890] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 5 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 5 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 5 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149890] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149890] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149890] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149cc0] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149cc0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149cc0] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149310] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149310] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149200] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149200] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 6 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 6 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 6 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149450] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149450] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149450] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21494f0] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21494f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21494f0] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149cc0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149cc0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149890] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149890] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 7 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 7 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 7 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149730] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149730] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149730] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21497d0] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21497d0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21497d0] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21494f0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21494f0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149450] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149450] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 8 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 8 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 8 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149200] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149200] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149200] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21492a0] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21492a0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21492a0] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21497d0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21497d0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149730] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149730] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 9 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 9 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 9 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149620] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149620] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149620] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21496c0] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21496c0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21496c0] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21492a0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21492a0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149200] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149200] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 10 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 10 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 10 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149450] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149450] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149450] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149760] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149760] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149760] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21496c0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21496c0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149620] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149620] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 11 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 11 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 11 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149860] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149860] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149860] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149900] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149900] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149900] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149760] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149760] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149450] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149450] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 12 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 12 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 12 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21499a0] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21499a0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21499a0] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21493d0] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21493d0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21493d0] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149900] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149900] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149860] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149860] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 13 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 13 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 13 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149490] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149490] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149490] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149580] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149580] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149580] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21493d0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21493d0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21499a0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21499a0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 14 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 14 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 14 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149b50] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149b50] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149b50] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21499c0] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21499c0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x21499c0] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149580] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149580] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149490] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149490] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 15 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 15 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 15 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149a70] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149a70] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149a70] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149340] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149340] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149340] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21499c0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21499c0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149b50] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149b50] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 16 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 16 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 16 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149970] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149970] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149970] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149b10] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149b10] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149b10] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149340] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149340] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149a70] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149a70] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 17 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 17 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 17 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149780] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149780] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149780] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149890] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149890] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149890] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149b10] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149b10] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149970] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149970] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 18 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 18 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 18 with ID 1 and finalized set to 1. It changed one RHS 0, stopped the iteration process 0 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149620] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149620] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149620] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149cf0] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149cf0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149cf0] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149890] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149890] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149780] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149780] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_1_operation*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::spmv_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14b80] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::solver::cg::step_2_operation*&, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::matrix::Dense*, gko::Array*>,0x7ffd93d14ef0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x21482f0] with Bytes[152] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_dot_operation const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14c50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> iteration 19 completed with solver LinOp[gko::solver::Cg,0x2142d60] with residual LinOp[gko::matrix::Dense,0x2147b30], solution LinOp[gko::matrix::Dense,0x2143450] and residual_norm LinOp[gko::LinOp const*,0] +[LOG] >>> check started for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 19 with ID 1 and finalized set to 1 +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14ad0] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::stop::residual_norm::residual_norm_operation const*&, gko::matrix::Dense*, double&, unsigned char&, bool&, gko::Array*&, gko::Array*, bool*, bool*&>,0x7ffd93d14b90] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> check completed for stop::Criterion[gko::stop::ResidualNorm,0x2148db0] at iteration 19 with ID 1 and finalized set to 1. It changed one RHS 1, stopped the iteration process 1 +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149890] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149890] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x21480a0] to Location[0x2149890] with Bytes[152] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[152] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149340] with Bytes[152] +[LOG] >>> copy started from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149340] with Bytes[152] +[LOG] >>> copy completed from Executor[gko::ReferenceExecutor,0x21400d0] to Executor[gko::ReferenceExecutor,0x21400d0] from Location[0x2143e90] to Location[0x2149340] with Bytes[152] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149cf0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149cf0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149620] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149620] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148ee0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148ee0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2147ce0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2147ce0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148e50] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148e50] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2147c90] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2147c90] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21482b0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21482b0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148a40] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148a40] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148a60] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148a60] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148010] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2148010] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21486b0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21486b0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21484d0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21484d0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21482f0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21482f0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21480a0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x21480a0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143410] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143410] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2142280] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2142280] +[LOG] >>> apply completed on A LinOp[gko::solver::Cg,0x2142d60] with b LinOp[gko::matrix::Dense,0x2142140] and x LinOp[gko::matrix::Dense,0x2143450] +Last memory copied was of size 98 FROM executor 0x21400d0 pointer 2143e90 TO executor 0x21400d0 pointer 2149340 Residual = [ 8.1654e-19 -1.51449e-17 @@ -136,36 +719,36 @@ Solution (x): 0.0107016 0.0121141 0.0123025 -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba5720] with Bytes[8] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2e10] with Bytes[8] -[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x562525b99ec0] with Bytes[8] -[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba36b0] with Bytes[8] -[LOG] >>> Operation[gko::matrix::csr::advanced_spmv_operation const*, gko::matrix::Csr const*, gko::matrix::Dense const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7fffc24b0d10] started on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::matrix::csr::advanced_spmv_operation const*, gko::matrix::Csr const*, gko::matrix::Dense const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7fffc24b0d10] completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7fffc24b0d60] started on Executor[gko::ReferenceExecutor,0x562525b99ec0] -[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7fffc24b0d60] completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149bb0] with Bytes[8] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149870] with Bytes[8] +[LOG] >>> allocation started on Executor[gko::ReferenceExecutor,0x21400d0] with Bytes[8] +[LOG] >>> allocation completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149500] with Bytes[8] +[LOG] >>> Operation[gko::matrix::csr::advanced_spmv_operation const*, gko::matrix::Csr const*, gko::matrix::Dense const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14e50] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::csr::advanced_spmv_operation const*, gko::matrix::Csr const*, gko::matrix::Dense const*, gko::matrix::Dense const*, gko::matrix::Dense*>,0x7ffd93d14e50] completed on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14f70] started on Executor[gko::ReferenceExecutor,0x21400d0] +[LOG] >>> Operation[gko::matrix::dense::compute_norm2_operation const*, gko::matrix::Dense*>,0x7ffd93d14f70] completed on Executor[gko::ReferenceExecutor,0x21400d0] Residual norm sqrt(r^T r): %%MatrixMarket matrix array real general 1 1 2.10788e-15 -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba36b0] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba36b0] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2e10] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba2e10] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba5720] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525ba5720] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9de80] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9de80] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9dde0] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9dde0] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9d1a0] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9d1a0] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9d3c0] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9d3c0] -[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9dfb0] -[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x562525b99ec0] at Location[0x562525b9dfb0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149500] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149500] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149870] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149870] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149bb0] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2149bb0] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143e90] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143e90] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143590] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143590] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2142b10] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2142b10] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143c30] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143c30] +[LOG] >>> free started on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143790] +[LOG] >>> free completed on Executor[gko::ReferenceExecutor,0x21400d0] at Location[0x2143790] @endcode diff --git a/examples/simple-solver-logging/simple-solver-logging.cpp b/examples/simple-solver-logging/simple-solver-logging.cpp index 72739146881..a7f5bbaca84 100644 --- a/examples/simple-solver-logging/simple-solver-logging.cpp +++ b/examples/simple-solver-logging/simple-solver-logging.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include @@ -72,23 +73,36 @@ int main(int argc, char *argv[]) // Print version information std::cout << gko::version_info::get() << std::endl; - // Figure out where to run the code - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { + if (argc == 2 && (std::string(argv[1]) == "--help")) { std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } + // Figure out where to run the code + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid + // Read data auto A = share(gko::read(std::ifstream("data/A.mtx"), exec)); auto b = gko::read(std::ifstream("data/b.mtx"), exec); @@ -109,12 +123,12 @@ int main(int argc, char *argv[]) // Add stream_logger to the executor exec->add_logger(stream_logger); - // Add stream_logger only to the ResidualNormReduction criterion Factory + // Add stream_logger only to the ResidualNorm criterion Factory // Note that the logger will get automatically propagated to every criterion // generated from this factory. const RealValueType reduction_factor{1e-7}; using ResidualCriterionFactory = - gko::stop::ResidualNormReduction::Factory; + gko::stop::ResidualNorm::Factory; std::shared_ptr residual_criterion = ResidualCriterionFactory::create() .with_reduction_factor(reduction_factor) @@ -169,7 +183,7 @@ int main(int argc, char *argv[]) print_vector("Residual", residual_d); // Print solution - std::cout << "Solution (x): \n"; + std::cout << "Solution (x):\n"; write(std::cout, lend(x)); // Calculate residual @@ -179,6 +193,6 @@ int main(int argc, char *argv[]) A->apply(lend(one), lend(x), lend(neg_one), lend(b)); b->compute_norm2(lend(res)); - std::cout << "Residual norm sqrt(r^T r): \n"; + std::cout << "Residual norm sqrt(r^T r):\n"; write(std::cout, lend(res)); } diff --git a/examples/simple-solver/CMakeLists.txt b/examples/simple-solver/CMakeLists.txt index a4f54d59112..ed067ec02e1 100644 --- a/examples/simple-solver/CMakeLists.txt +++ b/examples/simple-solver/CMakeLists.txt @@ -1,6 +1,15 @@ +cmake_minimum_required(VERSION 3.9) +project(simple-solver) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(simple-solver simple-solver.cpp) -target_link_libraries(simple-solver ginkgo) -target_include_directories(simple-solver PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(simple-solver Ginkgo::ginkgo) + +# Copy the data files to the execution directory configure_file(data/A.mtx data/A.mtx COPYONLY) configure_file(data/b.mtx data/b.mtx COPYONLY) configure_file(data/x0.mtx data/x0.mtx COPYONLY) diff --git a/examples/simple-solver/simple-solver.cpp b/examples/simple-solver/simple-solver.cpp index 092b5ec4a5a..bf8095fc373 100644 --- a/examples/simple-solver/simple-solver.cpp +++ b/examples/simple-solver/simple-solver.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -39,6 +39,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include // Add the C++ iostream header to output information to the console. #include +// Add the STL map header for the executor selection +#include // Add the string manipulation header to handle strings. #include @@ -65,6 +67,11 @@ int main(int argc, char *argv[]) // Print the ginkgo version information. std::cout << gko::version_info::get() << std::endl; + if (argc == 2 && (std::string(argv[1]) == "--help")) { + std::cerr << "Usage: " << argv[0] << " [executor] " << std::endl; + std::exit(-1); + } + // @sect3{Where do you want to run your solver ?} // The gko::Executor class is one of the cornerstones of Ginkgo. Currently, // we have support for @@ -75,21 +82,29 @@ int main(int argc, char *argv[]) // @note With the help of C++, you see that you only ever need to change the // executor and all the other functions/ routines within Ginkgo should // automatically work and run on the executor with any other changes. - std::shared_ptr exec; - if (argc == 1 || std::string(argv[1]) == "reference") { - exec = gko::ReferenceExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "omp") { - exec = gko::OmpExecutor::create(); - } else if (argc == 2 && std::string(argv[1]) == "cuda" && - gko::CudaExecutor::get_num_devices() > 0) { - exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); - } else if (argc == 2 && std::string(argv[1]) == "hip" && - gko::HipExecutor::get_num_devices() > 0) { - exec = gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); - } else { - std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; - std::exit(-1); - } + const auto executor_string = argc >= 2 ? argv[1] : "reference"; + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::OmpExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + // executor where Ginkgo will perform the computation + const auto exec = exec_map.at(executor_string)(); // throws if not valid // @sect3{Reading your data and transfer to the proper device.} // Read the matrix, right hand side and the initial solution using the @ref @@ -117,7 +132,7 @@ int main(int argc, char *argv[]) cg::build() .with_criteria( gko::stop::Iteration::build().with_max_iters(20u).on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) .on(exec); @@ -137,7 +152,7 @@ int main(int argc, char *argv[]) solver->apply(lend(b), lend(x)); // Print the solution to the command line. - std::cout << "Solution (x): \n"; + std::cout << "Solution (x):\n"; write(std::cout, lend(x)); // To measure if your solution has actually converged, you can measure the @@ -153,6 +168,6 @@ int main(int argc, char *argv[]) A->apply(lend(one), lend(x), lend(neg_one), lend(b)); b->compute_norm2(lend(res)); - std::cout << "Residual norm sqrt(r^T r): \n"; + std::cout << "Residual norm sqrt(r^T r):\n"; write(std::cout, lend(res)); } diff --git a/examples/three-pt-stencil-solver/CMakeLists.txt b/examples/three-pt-stencil-solver/CMakeLists.txt index 15f42fcb1c0..661f064b36a 100644 --- a/examples/three-pt-stencil-solver/CMakeLists.txt +++ b/examples/three-pt-stencil-solver/CMakeLists.txt @@ -1,3 +1,10 @@ +cmake_minimum_required(VERSION 3.9) +project(three-pt-stencil-solver) + +# We only need to find Ginkgo if we build this example stand-alone +if (NOT GINKGO_BUILD_EXAMPLES) + find_package(Ginkgo 1.4.0 REQUIRED) +endif() + add_executable(three-pt-stencil-solver three-pt-stencil-solver.cpp) -target_link_libraries(three-pt-stencil-solver ginkgo) -target_include_directories(three-pt-stencil-solver PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(three-pt-stencil-solver Ginkgo::ginkgo) diff --git a/examples/three-pt-stencil-solver/doc/results.dox b/examples/three-pt-stencil-solver/doc/results.dox index 5e093c1ac2f..49e8cf79a2e 100644 --- a/examples/three-pt-stencil-solver/doc/results.dox +++ b/examples/three-pt-stencil-solver/doc/results.dox @@ -3,29 +3,7 @@ This is the expected output: @code{.cpp} -0 -0.00010798 -0.000863838 -0.00291545 -0.0069107 -0.0134975 -0.0233236 -0.037037 -0.0552856 -0.0787172 -0.10798 -0.143721 -0.186589 -0.237231 -0.296296 -0.364431 -0.442285 -0.530504 -0.629738 -0.740633 -0.863838 -1 -The average relative error is 1.87318e-15 +The average relative error is 2.52236e-11 @endcode diff --git a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp index 7056e768ba5..a050be1a7fe 100644 --- a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp +++ b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -173,6 +173,11 @@ void solve_system(const std::string &executor_string, return gko::HipExecutor::create(0, gko::OmpExecutor::create(), true); }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::OmpExecutor::create()); + }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; // executor where Ginkgo will perform the computation @@ -215,7 +220,7 @@ void solve_system(const std::string &executor_string, .with_criteria(gko::stop::Iteration::build() .with_max_iters(gko::size_type(dp)) .on(exec), - gko::stop::ResidualNormReduction::build() + gko::stop::ResidualNorm::build() .with_reduction_factor(reduction_factor) .on(exec)) .with_preconditioner(bj::build().on(exec)) @@ -232,15 +237,18 @@ int main(int argc, char *argv[]) using ValueType = double; using IndexType = int; - if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " DISCRETIZATION_POINTS [executor]" - << std::endl; + // Print version information + std::cout << gko::version_info::get() << std::endl; + + if (argc == 2 && std::string(argv[1]) == "--help") { + std::cerr << "Usage: " << argv[0] + << " [executor] [DISCRETIZATION_POINTS]" << std::endl; std::exit(-1); } + const auto executor_string = argc >= 2 ? argv[1] : "reference"; const IndexType discretization_points = - argc >= 2 ? std::atoi(argv[1]) : 100; - const auto executor_string = argc >= 3 ? argv[2] : "reference"; + argc >= 3 ? std::atoi(argv[2]) : 100; // problem: auto correct_u = [](ValueType x) { return x * x * x; }; @@ -267,7 +275,9 @@ int main(int argc, char *argv[]) col_idxs.data(), values.data(), rhs.data(), u.data(), reduction_factor); - print_solution(discretization_points, 0, 1, u.data()); + // Uncomment to print the solution + // print_solution(discretization_points, 0, 1, + // u.data()); std::cout << "The average relative error is " << calculate_error(discretization_points, u.data(), correct_u) / discretization_points diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 57c6503ce36..e173605fde2 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -1,3 +1,8 @@ +if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}" + AND GINKGO_BUILD_CUDA AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9.2) + message(FATAL_ERROR "Ginkgo HIP backend requires CUDA >= 9.2.") +endif() + if(NOT DEFINED ROCM_PATH) if(DEFINED ENV{ROCM_PATH}) set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed") @@ -16,29 +21,28 @@ if(NOT DEFINED HIPBLAS_PATH) endif() endif() -if(NOT DEFINED HIPSPARSE_PATH) - if(DEFINED ENV{HIPSPARSE_PATH}) - set(HIPSPARSE_PATH $ENV{HIPSPARSE_PATH} CACHE PATH "Path to which HIPSPARSE has been installed") +if(NOT DEFINED HIPRAND_PATH) + if(DEFINED ENV{HIPRAND_PATH}) + set(HIPRAND_PATH $ENV{HIPRAND_PATH} CACHE PATH "Path to which HIPRAND has been installed") else() - set(HIPSPARSE_PATH "${ROCM_PATH}/hipsparse" CACHE PATH "Path to which HIPSPARSE has been installed") + set(HIPRAND_PATH "${ROCM_PATH}/hiprand" CACHE PATH "Path to which HIPRAND has been installed") + endif() +endif() + +if(NOT DEFINED ROCRAND_PATH) + if(DEFINED ENV{ROCRAND_PATH}) + set(ROCRAND_PATH $ENV{ROCRAND_PATH} CACHE PATH "Path to which ROCRAND has been installed") + else() + set(ROCRAND_PATH "${ROCM_PATH}/rocrand" CACHE PATH "Path to which ROCRAND has been installed") endif() endif() -## Both the definition of `HCC_PATH` and `HIP_HIPCC_CMAKE_LINKER_HELPER` are required -## before including `FindHIP`, as these are essential but not defined in the beginning -## of the `FindHIP` file itself. Not defining these currently results in: -## 1. Without `HCC_PATH`: the `hcc` backend not working properly if it is wrongly set, -## if it is not set, popentially all compilation could fail. -## 2. Without `HIP_HIPCC_CMAKE_LINKER_HELPER` two compilations are required, since -## `FindHIP` defines this only in macro calls, which we call much later on after -## including the file itself. -if(NOT DEFINED HCC_PATH) - if(DEFINED ENV{HCC_PATH}) - set(HCC_PATH $ENV{HCC_PATH} CACHE PATH "Path to which HCC has been installed") +if(NOT DEFINED HIPSPARSE_PATH) + if(DEFINED ENV{HIPSPARSE_PATH}) + set(HIPSPARSE_PATH $ENV{HIPSPARSE_PATH} CACHE PATH "Path to which HIPSPARSE has been installed") else() - set(HCC_PATH "${ROCM_PATH}/hcc" CACHE PATH "Path to which HCC has been installed") + set(HIPSPARSE_PATH "${ROCM_PATH}/hipsparse" CACHE PATH "Path to which HIPSPARSE has been installed") endif() - set(HCC_HOME "${HCC_PATH}") endif() if(NOT DEFINED HIP_CLANG_PATH) @@ -92,38 +96,35 @@ execute_process( ) set(GINKGO_HIP_VERSION ${GINKGO_HIP_VERSION} PARENT_SCOPE) -if (GINKGO_HIP_PLATFORM MATCHES "nvcc") # ensure ENV{CUDA_PATH} is set by the user +if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") + # ensure ENV{CUDA_PATH} is set by the user if (NOT DEFINED ENV{CUDA_PATH}) find_path(GINKGO_HIP_DEFAULT_CUDA_PATH "cuda.h" PATH /usr/local/cuda/include NO_DEFAULT_PATH) if (NOT GINKGO_HIP_DEFAULT_CUDA_PATH) - message(FATAL_ERROR "HIP nvcc backend was requested but CUDA could not be located. " - "Set and export the environment variable CUDA_PATH.") + message(FATAL_ERROR "HIP nvidia backend was requested but CUDA could not be " + "located. Set and export the environment variable CUDA_PATH.") endif() endif() endif() -if (GINKGO_HIP_PLATFORM STREQUAL "hcc") - # This is required by hipblas/hipsparse in the case where the platform is hcc. - # For nvcc platform, these aren't required and only cause trouble. - list(APPEND CMAKE_PREFIX_PATH - "${HIP_PATH}/lib/cmake" - "${HIP_PATH}/../lib/cmake" # hopefully catches all extra HIP dependencies, e.g. hcc - ) -endif() - - ## Setup all CMAKE variables to find HIP and its dependencies list(APPEND CMAKE_MODULE_PATH "${HIP_PATH}/cmake") +if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") + list(APPEND CMAKE_PREFIX_PATH "${HIP_PATH}/lib/cmake") +endif() list(APPEND CMAKE_PREFIX_PATH "${HIPBLAS_PATH}/lib/cmake" + "${HIPRAND_PATH}/lib/cmake" "${HIPSPARSE_PATH}/lib/cmake" + "${ROCRAND_PATH}/lib/cmake" ) # Set CMAKE_MODULE_PATH and CMAKE_PREFIX_PATH as PARENT_SCOPE to easily find HIP again set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}" PARENT_SCOPE) set(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH}" PARENT_SCOPE) -# setting the default flags like CMAKE_{LANG}_FLAGS_{TYPE} -# the setting is copied from the default CMAKE_CXX_FLAGS_{TYPE} +# NOTE: without this, HIP jacobi build takes a *very* long time. The reason for +# that is that these variables are seemingly empty by default, thus there is no +# proper optimization applied to the HIP builds otherwise. set(HIP_HIPCC_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING "Flags used by the HIPCC compiler during DEBUG builds") set(HIP_HIPCC_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL}" CACHE STRING "Flags used by the HIPCC compiler during MINSIZEREL builds") set(HIP_HIPCC_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "Flags used by the HIPCC compiler during RELEASE builds") @@ -131,7 +132,10 @@ set(HIP_HIPCC_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}" CACHE STR find_package(HIP REQUIRED) find_package(hipblas REQUIRED) +find_package(hiprand REQUIRED) find_package(hipsparse REQUIRED) +# At the moment, for hiprand to work also rocrand is required. +find_package(rocrand REQUIRED) find_path(GINKGO_HIP_THRUST_PATH "thrust/complex.h" PATHS "${HIP_PATH}/../include" ENV HIP_THRUST_PATH) @@ -143,11 +147,13 @@ set(GINKGO_HIP_SOURCES base/exception.hip.cpp base/executor.hip.cpp base/version.hip.cpp + components/absolute_array.hip.cpp components/fill_array.hip.cpp - components/precision_conversion.hip.cpp components/prefix_sum.hip.cpp - factorization/ilu_kernels.hip.cpp factorization/factorization_kernels.hip.cpp + factorization/ic_kernels.hip.cpp + factorization/ilu_kernels.hip.cpp + factorization/par_ic_kernels.hip.cpp factorization/par_ict_kernels.hip.cpp factorization/par_ilu_kernels.hip.cpp factorization/par_ilut_approx_filter_kernel.hip.cpp @@ -161,46 +167,55 @@ set(GINKGO_HIP_SOURCES matrix/dense_kernels.hip.cpp matrix/diagonal_kernels.hip.cpp matrix/ell_kernels.hip.cpp + matrix/fbcsr_kernels.hip.cpp matrix/hybrid_kernels.hip.cpp matrix/sellp_kernels.hip.cpp matrix/sparsity_csr_kernels.hip.cpp + multigrid/amgx_pgm_kernels.hip.cpp preconditioner/isai_kernels.hip.cpp preconditioner/jacobi_advanced_apply_kernel.hip.cpp preconditioner/jacobi_generate_kernel.hip.cpp preconditioner/jacobi_kernels.hip.cpp preconditioner/jacobi_simple_apply_kernel.hip.cpp - solver/bicg_kernels.hip.cpp - solver/bicgstab_kernels.hip.cpp - solver/cg_kernels.hip.cpp - solver/cgs_kernels.hip.cpp - solver/fcg_kernels.hip.cpp + reorder/rcm_kernels.hip.cpp solver/gmres_kernels.hip.cpp - solver/ir_kernels.hip.cpp + solver/cb_gmres_kernels.hip.cpp + solver/idr_kernels.hip.cpp solver/lower_trs_kernels.hip.cpp solver/upper_trs_kernels.hip.cpp stop/criterion_kernels.hip.cpp - stop/residual_norm_kernels.hip.cpp) + stop/residual_norm_kernels.hip.cpp + ../common/unified/components/precision_conversion.cpp + ../common/unified/matrix/coo_kernels.cpp + ../common/unified/matrix/csr_kernels.cpp + ../common/unified/matrix/dense_kernels.cpp + ../common/unified/matrix/diagonal_kernels.cpp + ../common/unified/preconditioner/jacobi_kernels.cpp + ../common/unified/solver/bicg_kernels.cpp + ../common/unified/solver/bicgstab_kernels.cpp + ../common/unified/solver/cg_kernels.cpp + ../common/unified/solver/cgs_kernels.cpp + ../common/unified/solver/fcg_kernels.cpp + ../common/unified/solver/ir_kernels.cpp + ) set(GINKGO_HIP_NVCC_ARCH "") -if (GINKGO_HIP_PLATFORM MATCHES "nvcc") +if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") if (NOT CMAKE_CUDA_HOST_COMPILER AND NOT GINKGO_CUDA_DEFAULT_HOST_COMPILER) set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "" FORCE) elseif(GINKGO_CUDA_DEFAULT_HOST_COMPILER) unset(CMAKE_CUDA_HOST_COMPILER CACHE) endif() if (CMAKE_CUDA_HOST_COMPILER) - set(GINKGO_HIP_CUDA_HOST_COMPILER "-ccbin=${CMAKE_CUDA_HOST_COMPILER}") + list(APPEND GINKGO_HIP_NVCC_ADDITIONAL_FLAGS "-ccbin=${CMAKE_CUDA_HOST_COMPILER}") endif() # Remove false positive CUDA warnings when calling one() and zero() - # This creates a compilation bug on nvcc 9.0.102 *with* the new array_deleter - # merged at commit ed12b3df5d26 - if(NOT CMAKE_CUDA_COMPILER_VERSION MATCHES "9.0") - set(GINKGO_HIP_NVCC_ADDITIONAL_FLAGS --expt-relaxed-constexpr) - endif() + list(APPEND GINKGO_HIP_NVCC_ADDITIONAL_FLAGS --expt-relaxed-constexpr --expt-extended-lambda) - if (GINKGO_HIP_PLATFROM MATCHES "nvcc" AND CMAKE_CUDA_COMPILER_VERSION - MATCHES "9.2" AND CMAKE_CUDA_HOST_COMPILER MATCHES ".*clang.*" ) + if (GINKGO_HIP_PLATFROM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}" + AND CMAKE_CUDA_COMPILER_VERSION MATCHES "9.2" + AND CMAKE_CUDA_HOST_COMPILER MATCHES ".*clang.*" ) ginkgo_extract_clang_version(${CMAKE_CUDA_HOST_COMPILER} GINKGO_CUDA_HOST_CLANG_VERSION) if (GINKGO_CUDA_HOST_CLANG_VERSION MATCHES "5\.0.*") @@ -214,63 +229,25 @@ if (GINKGO_HIP_PLATFORM MATCHES "nvcc") ARCHITECTURES ${GINKGO_CUDA_ARCHITECTURES} UNSUPPORTED "20" "21") endif() -set(GINKGO_HIPCC_OPTIONS ${GINKGO_HIP_COMPILER_FLAGS}) -set(GINKGO_HIP_NVCC_OPTIONS ${GINKGO_HIP_NVCC_COMPILER_FLAGS} ${GINKGO_HIP_NVCC_ARCH} ${GINKGO_HIP_NVCC_ADDITIONAL_FLAGS}) -set(GINKGO_HIP_HCC_OPTIONS ${GINKGO_HIP_HCC_COMPILER_FLAGS}) -set(GINKGO_HIP_CLANG_OPTIONS ${GINKGO_HIP_CLANG_COMPILER_FLAGS}) -set_source_files_properties(${GINKGO_HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) -if (GINKGO_HIP_VERSION VERSION_GREATER_EQUAL "3.5") - hip_add_library(ginkgo_hip $ ${GINKGO_HIP_SOURCES} - HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} "-std=c++14" - HCC_OPTIONS ${GINKGO_HIP_HCC_OPTIONS} - CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS} - NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} ${GINKGO_HIP_CUDA_HOST_COMPILER} - ${GINKGO_STATIC_OR_SHARED}) -else() - hip_add_library(ginkgo_hip $ ${GINKGO_HIP_SOURCES} - HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} "-std=c++14" - HCC_OPTIONS ${GINKGO_HIP_HCC_OPTIONS} - NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} ${GINKGO_HIP_CUDA_HOST_COMPILER} - ${GINKGO_STATIC_OR_SHARED}) -endif() - -if(GINKGO_HIP_AMDGPU AND GINKGO_HIP_PLATFORM MATCHES "hcc") +# `target_compile_options` do not work with hip_add_library +# Thus, we need to pass the flags to `hip_add_library` itself +if(GINKGO_HIP_AMDGPU) foreach(target ${GINKGO_HIP_AMDGPU}) - target_compile_options(ginkgo_hip PRIVATE --amdgpu-target=${target}) - target_link_libraries(ginkgo_hip PRIVATE --amdgpu-target=${target}) + list(APPEND GINKGO_AMD_ARCH_FLAGS --amdgpu-target=${target}) endforeach() endif() -target_compile_options(ginkgo_hip PRIVATE $<$:${GINKGO_COMPILER_FLAGS}>) -if(GINKGO_WITH_CLANG_TIDY AND GINKGO_CLANG_TIDY_PATH) - set_property(TARGET ginkgo_hip PROPERTY CXX_CLANG_TIDY "${GINKGO_CLANG_TIDY_PATH};-checks=*") -endif() -if(GINKGO_WITH_IWYU AND GINKGO_IWYU_PATH) - set_property(TARGET ginkgo_hip PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${GINKGO_IWYU_PATH}) -endif() - -if(GINKGO_HIP_PLATFORM MATCHES "hcc") - # Fix the exception thrown bug with `hcc` backend and shared libraries - set_target_properties(ginkgo_hip PROPERTIES LINKER_LANGUAGE HIP) - - # Ban `-hc` flag as INTERFACE_LINK_LIBRARIES since that is propagated when building - # a static library, and it's definitely not a known option to any compiler. - ginkgo_hip_ban_link_hcflag(hcc::hccrt) - - if (NOT BUILD_SHARED_LIBS) - # Do not let hip::device flags propagate to executables which don't - # directly use HIP - ginkgo_hip_clang_ban_hip_device_flags() - endif() - target_link_libraries(ginkgo_hip PRIVATE hip::device) -elseif(GINKGO_HIP_PLATFORM MATCHES "nvcc") - find_package(CUDA 9.0 REQUIRED) - target_link_libraries(ginkgo_hip PUBLIC ${CUDA_LIBRARIES}) - set(HIP_CUDA_LIBRARIES ${CUDA_LIBRARIES} PARENT_SCOPE) -endif() +set(GINKGO_HIPCC_OPTIONS ${GINKGO_HIP_COMPILER_FLAGS} "-std=c++14 -DGKO_COMPILING_HIP") +set(GINKGO_HIP_NVCC_OPTIONS ${GINKGO_HIP_NVCC_COMPILER_FLAGS} ${GINKGO_HIP_NVCC_ARCH} ${GINKGO_HIP_NVCC_ADDITIONAL_FLAGS}) +set(GINKGO_HIP_CLANG_OPTIONS ${GINKGO_HIP_CLANG_COMPILER_FLAGS} ${GINKGO_AMD_ARCH_FLAGS}) -target_link_libraries(ginkgo_hip PRIVATE roc::hipblas roc::hipsparse) +set_source_files_properties(${GINKGO_HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT TRUE) +hip_add_library(ginkgo_hip $ ${GINKGO_HIP_SOURCES} + HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} + CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS} + NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS} + ${GINKGO_STATIC_OR_SHARED}) target_include_directories(ginkgo_hip PUBLIC @@ -278,27 +255,54 @@ target_include_directories(ginkgo_hip PRIVATE ${GINKGO_HIP_THRUST_PATH} ${HIPBLAS_INCLUDE_DIRS} + ${hiprand_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS} $) +target_link_libraries(ginkgo_hip PUBLIC ginkgo_device) +target_link_libraries(ginkgo_hip PRIVATE roc::hipblas roc::hipsparse hip::hiprand roc::rocrand) + +target_compile_options(ginkgo_hip PRIVATE $<$:${GINKGO_COMPILER_FLAGS}>) + +if(GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") + find_package(hip REQUIRED) + # To save a bit of pain, we directly link against the `library` instead of + # linking against the target. + if (CMAKE_BUILD_TYPE) + # Check if our configuration is available first + string(TOUPPER "${CMAKE_BUILD_TYPE}" UPPER_BUILD_TYPE) + get_target_property(HIP_LIBAMDHIP64_LIBRARIES hip::amdhip64 IMPORTED_LOCATION_${UPPER_BUILD_TYPE}) + endif() + if (NOT HIP_LIBAMDHIP64_LIBRARIES) + # Fall back to anything + get_target_property(HIP_LIBAMDHIP64_LIBRARIES hip::amdhip64 LOCATION) + endif() + target_link_libraries(ginkgo_hip PUBLIC ${HIP_LIBAMDHIP64_LIBRARIES}) +elseif(GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") + find_package(CUDA 9.2 REQUIRED) + target_link_libraries(ginkgo_hip PUBLIC ${CUDA_LIBRARIES}) +endif() + +# Try to find everything in /opt/rocm/lib first. +set(GKO_HIP_RPATH "${ROCM_PATH}/lib" ) +if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}") + list(GET CUDA_LIBRARIES 0 CUDA_FIRST_LIB) + get_filename_component(GKO_CUDA_LIBDIR "${CUDA_FIRST_LIB}" DIRECTORY) + list(APPEND GKO_HIP_RPATH "${GKO_CUDA_LIBDIR}") +else() + list(APPEND GKO_HIP_RPATH "${HIP_PATH}/lib") +endif() +list(APPEND GKO_HIP_RPATH "${HIPBLAS_PATH}/lib" "${HIPRAND_PATH}/lib" + "${HIPSPARSE_PATH}/lib" "${ROCRAND_PATH}/lib") + ginkgo_compile_features(ginkgo_hip) ginkgo_default_includes(ginkgo_hip) -ginkgo_install_library(ginkgo_hip hip) +ginkgo_install_library(ginkgo_hip "${GKO_HIP_RPATH}") if (GINKGO_CHECK_CIRCULAR_DEPS) - ginkgo_check_headers(ginkgo_hip) + ginkgo_check_headers(ginkgo_hip GKO_COMPILING_HIP) endif() if(GINKGO_BUILD_TESTS) - # Here, we go through all of Ginkgo's dependencies to build a `-Wl,-rpath` string since for - # some reason `hipcc` through CMake does not have rpath settings unlike the other compilers. - get_target_property(GINKGO_LINK_LIBRARIES ginkgo LINK_LIBRARIES) - set(GINKGO_RPATH_FOR_HIP "-Wl,-rpath,$") - foreach(target ${GINKGO_LINK_LIBRARIES}) - if("${target}" MATCHES "^ginkgo") - set(GINKGO_RPATH_FOR_HIP "${GINKGO_RPATH_FOR_HIP}:$") - endif() - endforeach() - add_subdirectory(test) endif() diff --git a/hip/base/config.hip.hpp b/hip/base/config.hip.hpp index d698a6a8d83..95323dd7e96 100644 --- a/hip/base/config.hip.hpp +++ b/hip/base/config.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/base/device_guard.hip.hpp b/hip/base/device_guard.hip.hpp index b7d63ebc152..a0e02c78658 100644 --- a/hip/base/device_guard.hip.hpp +++ b/hip/base/device_guard.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/base/exception.hip.cpp b/hip/base/exception.hip.cpp index 9e6f2ff7a00..1f39ff9890f 100644 --- a/hip/base/exception.hip.cpp +++ b/hip/base/exception.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include @@ -77,6 +78,31 @@ std::string HipblasError::get_error(int64 error_code) } +std::string HiprandError::get_error(int64 error_code) +{ +#define GKO_REGISTER_HIPRAND_ERROR(error_name) \ + if (error_code == static_cast(error_name)) { \ + return #error_name; \ + } + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_SUCCESS); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_VERSION_MISMATCH); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_NOT_INITIALIZED); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_ALLOCATION_FAILED); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_TYPE_ERROR); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_OUT_OF_RANGE); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_LENGTH_NOT_MULTIPLE); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_LAUNCH_FAILURE); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_PREEXISTING_FAILURE); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_INITIALIZATION_FAILED); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_ARCH_MISMATCH); + GKO_REGISTER_HIPRAND_ERROR(HIPRAND_STATUS_INTERNAL_ERROR); + return "Unknown error"; + +#undef GKO_REGISTER_HIPRAND_ERROR +} + + std::string HipsparseError::get_error(int64 error_code) { #define GKO_REGISTER_HIPSPARSE_ERROR(error_name) \ diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index f41fb69f46c..fa24c1eb929 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -52,18 +53,30 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -#include "common/base/executor.hpp.inc" +#include "common/cuda_hip/base/executor.hpp.inc" + + +#if (GINKGO_HIP_PLATFORM_NVCC == 1) +using hip_device_class = nvidia_device; +#else +using hip_device_class = amd_device; +#endif std::shared_ptr HipExecutor::create( - int device_id, std::shared_ptr master, bool device_reset) + int device_id, std::shared_ptr master, bool device_reset, + allocation_mode alloc_mode) { return std::shared_ptr( - new HipExecutor(device_id, std::move(master), device_reset), + new HipExecutor(device_id, std::move(master), device_reset, alloc_mode), [device_id](HipExecutor *exec) { + auto device_reset = exec->get_device_reset(); + std::lock_guard guard( + hip_device_class::get_mutex(device_id)); delete exec; - if (!HipExecutor::get_num_execs(device_id) && - exec->get_device_reset()) { + auto &num_execs = hip_device_class::get_num_execs(device_id); + num_execs--; + if (!num_execs && device_reset) { hip::device_guard g(device_id); hipDeviceReset(); } @@ -71,6 +84,26 @@ std::shared_ptr HipExecutor::create( } +void HipExecutor::populate_exec_info(const MachineTopology *mach_topo) +{ + if (this->get_device_id() < this->get_num_devices() && + this->get_device_id() >= 0) { + hip::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_HIP_ERRORS( + hipDeviceGetPCIBusId(&(this->get_exec_info().pci_bus_id.front()), + 13, this->get_device_id())); + + auto hip_hwloc_obj = + mach_topo->get_pci_device(this->get_exec_info().pci_bus_id); + if (hip_hwloc_obj) { + this->get_exec_info().numa_node = hip_hwloc_obj->closest_numa; + this->get_exec_info().closest_pu_ids = + hip_hwloc_obj->closest_pu_ids; + } + } +} + + void OmpExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, const void *src_ptr, void *dest_ptr) const { @@ -89,9 +122,10 @@ void HipExecutor::raw_free(void *ptr) const noexcept if (error_code != hipSuccess) { #if GKO_VERBOSE_LEVEL >= 1 // Unfortunately, if memory free fails, there's not much we can do - std::cerr << "Unrecoverable HIP error on device " << this->device_id_ - << " in " << __func__ << ": " << hipGetErrorName(error_code) - << ": " << hipGetErrorString(error_code) << std::endl + std::cerr << "Unrecoverable HIP error on device " + << this->get_device_id() << " in " << __func__ << ": " + << hipGetErrorName(error_code) << ": " + << hipGetErrorString(error_code) << std::endl << "Exiting program" << std::endl; #endif // GKO_VERBOSE_LEVEL >= 1 std::exit(error_code); @@ -103,11 +137,18 @@ void *HipExecutor::raw_alloc(size_type num_bytes) const { void *dev_ptr = nullptr; hip::device_guard g(this->get_device_id()); -#if defined(NDEBUG) || (GINKGO_HIP_PLATFORM_HCC == 1) - auto error_code = hipMalloc(&dev_ptr, num_bytes); -#else - auto error_code = hipMallocManaged(&dev_ptr, num_bytes); + int error_code = 0; + if (this->alloc_mode_ == allocation_mode::device) { + error_code = hipMalloc(&dev_ptr, num_bytes); +#if !(GKO_HIP_PLATFORM_HCC == 1) + } else if (this->alloc_mode_ == allocation_mode::unified_global) { + error_code = hipMallocManaged(&dev_ptr, num_bytes, hipMemAttachGlobal); + } else if (this->alloc_mode_ == allocation_mode::unified_host) { + error_code = hipMallocManaged(&dev_ptr, num_bytes, hipMemAttachHost); #endif + } else { + GKO_NOT_SUPPORTED(this->alloc_mode_); + } if (error_code != hipErrorMemoryAllocation) { GKO_ASSERT_NO_HIP_ERRORS(error_code); } @@ -138,11 +179,18 @@ void HipExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, num_bytes)); } #else - GKO_NOT_SUPPORTED(this); + GKO_NOT_SUPPORTED(dest); #endif } +void HipExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + GKO_NOT_SUPPORTED(dest); +} + + void HipExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, const void *src_ptr, void *dest_ptr) const { @@ -186,31 +234,56 @@ int HipExecutor::get_num_devices() void HipExecutor::set_gpu_property() { - if (device_id_ < this->get_num_devices() && device_id_ >= 0) { + if (this->get_device_id() < this->get_num_devices() && + this->get_device_id() >= 0) { hip::device_guard g(this->get_device_id()); GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( - &num_multiprocessor_, hipDeviceAttributeMultiprocessorCount, - device_id_)); + &this->get_exec_info().num_computing_units, + hipDeviceAttributeMultiprocessorCount, this->get_device_id())); + GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( + &this->get_exec_info().major, + hipDeviceAttributeComputeCapabilityMajor, this->get_device_id())); + GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( + &this->get_exec_info().minor, + hipDeviceAttributeComputeCapabilityMinor, this->get_device_id())); + auto max_threads_per_block = 0; + GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( + &max_threads_per_block, hipDeviceAttributeMaxThreadsPerBlock, + this->get_device_id())); + this->get_exec_info().max_workitem_sizes.push_back( + max_threads_per_block); + std::vector max_threads_per_block_dim(3, 0); + GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( + &max_threads_per_block_dim[0], hipDeviceAttributeMaxBlockDimX, + this->get_device_id())); GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( - &major_, hipDeviceAttributeComputeCapabilityMajor, device_id_)); + &max_threads_per_block_dim[1], hipDeviceAttributeMaxBlockDimY, + this->get_device_id())); GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( - &minor_, hipDeviceAttributeComputeCapabilityMinor, device_id_)); + &max_threads_per_block_dim[2], hipDeviceAttributeMaxBlockDimZ, + this->get_device_id())); + this->get_exec_info().max_workgroup_size = max_threads_per_block; + this->get_exec_info().max_workitem_sizes = max_threads_per_block_dim; #if GINKGO_HIP_PLATFORM_NVCC - num_warps_per_sm_ = convert_sm_ver_to_cores(major_, minor_) / - kernels::hip::config::warp_size; + this->get_exec_info().num_pu_per_cu = + convert_sm_ver_to_cores(this->get_exec_info().major, + this->get_exec_info().minor) / + kernels::hip::config::warp_size; #else // In GCN (Graphics Core Next), each multiprocessor has 4 SIMD // Reference: https://en.wikipedia.org/wiki/Graphics_Core_Next - num_warps_per_sm_ = 4; + this->get_exec_info().num_pu_per_cu = 4; #endif // GINKGO_HIP_PLATFORM_NVCC - warp_size_ = kernels::hip::config::warp_size; + this->get_exec_info().max_subgroup_size = + kernels::hip::config::warp_size; } } void HipExecutor::init_handles() { - if (device_id_ < this->get_num_devices() && device_id_ >= 0) { + if (this->get_device_id() < this->get_num_devices() && + this->get_device_id() >= 0) { const auto id = this->get_device_id(); hip::device_guard g(id); this->hipblas_handle_ = handle_manager( diff --git a/hip/base/hipblas_bindings.hip.hpp b/hip/base/hipblas_bindings.hip.hpp index 7bef3278f79..3889df69ff1 100644 --- a/hip/base/hipblas_bindings.hip.hpp +++ b/hip/base/hipblas_bindings.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -91,15 +91,11 @@ struct is_supported : std::true_type {}; template <> struct is_supported : std::true_type {}; -// hipblas supports part of complex function version is >= 0.19, but the version -// is not set now. -/* not implemented template <> struct is_supported> : std::true_type {}; template <> struct is_supported> : std::true_type {}; -*/ #define GKO_BIND_HIPBLAS_GEMM(ValueType, HipblasName) \ @@ -110,9 +106,9 @@ struct is_supported> : std::true_type {}; ValueType *c, int ldc) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS(HipblasName( \ - handle, transa, transb, m, n, k, as_hiplibs_type(alpha), \ - as_hiplibs_type(a), lda, as_hiplibs_type(b), ldb, \ - as_hiplibs_type(beta), as_hiplibs_type(c), ldc)); \ + handle, transa, transb, m, n, k, as_hipblas_type(alpha), \ + as_hipblas_type(a), lda, as_hipblas_type(b), ldb, \ + as_hipblas_type(beta), as_hipblas_type(c), ldc)); \ } \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -120,10 +116,9 @@ struct is_supported> : std::true_type {}; GKO_BIND_HIPBLAS_GEMM(float, hipblasSgemm); GKO_BIND_HIPBLAS_GEMM(double, hipblasDgemm); -/* not implemented GKO_BIND_HIPBLAS_GEMM(std::complex, hipblasCgemm); GKO_BIND_HIPBLAS_GEMM(std::complex, hipblasZgemm); -*/ + template GKO_BIND_HIPBLAS_GEMM(ValueType, detail::not_implemented); @@ -138,9 +133,9 @@ GKO_BIND_HIPBLAS_GEMM(ValueType, detail::not_implemented); ValueType *c, int ldc) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS( \ - HipblasName(handle, transa, transb, m, n, as_hiplibs_type(alpha), \ - as_hiplibs_type(a), lda, as_hiplibs_type(beta), \ - as_hiplibs_type(b), ldb, as_hiplibs_type(c), ldc)); \ + HipblasName(handle, transa, transb, m, n, as_hipblas_type(alpha), \ + as_hipblas_type(a), lda, as_hipblas_type(beta), \ + as_hipblas_type(b), ldb, as_hipblas_type(c), ldc)); \ } \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -160,7 +155,7 @@ GKO_BIND_HIPBLAS_GEAM(ValueType, detail::not_implemented); ValueType *x, int incx) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS(HipblasName( \ - handle, n, as_hiplibs_type(alpha), as_hiplibs_type(x), incx)); \ + handle, n, as_hipblas_type(alpha), as_hipblas_type(x), incx)); \ } \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -168,10 +163,9 @@ GKO_BIND_HIPBLAS_GEAM(ValueType, detail::not_implemented); GKO_BIND_HIPBLAS_SCAL(float, hipblasSscal); GKO_BIND_HIPBLAS_SCAL(double, hipblasDscal); -/* not implemented GKO_BIND_HIPBLAS_SCAL(std::complex, hipblasCscal); GKO_BIND_HIPBLAS_SCAL(std::complex, hipblasZscal); -*/ + template GKO_BIND_HIPBLAS_SCAL(ValueType, detail::not_implemented); @@ -183,8 +177,8 @@ GKO_BIND_HIPBLAS_SCAL(ValueType, detail::not_implemented); const ValueType *x, int incx, ValueType *y, int incy) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS( \ - HipblasName(handle, n, as_hiplibs_type(alpha), as_hiplibs_type(x), \ - incx, as_hiplibs_type(y), incy)); \ + HipblasName(handle, n, as_hipblas_type(alpha), as_hipblas_type(x), \ + incx, as_hipblas_type(y), incy)); \ } \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -192,10 +186,9 @@ GKO_BIND_HIPBLAS_SCAL(ValueType, detail::not_implemented); GKO_BIND_HIPBLAS_AXPY(float, hipblasSaxpy); GKO_BIND_HIPBLAS_AXPY(double, hipblasDaxpy); -/* not implemented GKO_BIND_HIPBLAS_AXPY(std::complex, hipblasCaxpy); GKO_BIND_HIPBLAS_AXPY(std::complex, hipblasZaxpy); -*/ + template GKO_BIND_HIPBLAS_AXPY(ValueType, detail::not_implemented); @@ -207,8 +200,8 @@ GKO_BIND_HIPBLAS_AXPY(ValueType, detail::not_implemented); int incx, const ValueType *y, int incy, ValueType *result) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS( \ - HipblasName(handle, n, as_hiplibs_type(x), incx, \ - as_hiplibs_type(y), incy, as_hiplibs_type(result))); \ + HipblasName(handle, n, as_hipblas_type(x), incx, \ + as_hipblas_type(y), incy, as_hipblas_type(result))); \ } \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -216,22 +209,45 @@ GKO_BIND_HIPBLAS_AXPY(ValueType, detail::not_implemented); GKO_BIND_HIPBLAS_DOT(float, hipblasSdot); GKO_BIND_HIPBLAS_DOT(double, hipblasDdot); -/* not implemented -GKO_BIND_HIPBLAS_DOT(std::complex, hipblasCdotc); -GKO_BIND_HIPBLAS_DOT(std::complex, hipblasZdotc); -*/ +GKO_BIND_HIPBLAS_DOT(std::complex, hipblasCdotu); +GKO_BIND_HIPBLAS_DOT(std::complex, hipblasZdotu); + template GKO_BIND_HIPBLAS_DOT(ValueType, detail::not_implemented); #undef GKO_BIND_HIPBLAS_DOT +#define GKO_BIND_HIPBLAS_CONJ_DOT(ValueType, HipblasName) \ + inline void conj_dot(hipblasHandle_t handle, int n, const ValueType *x, \ + int incx, const ValueType *y, int incy, \ + ValueType *result) \ + { \ + GKO_ASSERT_NO_HIPBLAS_ERRORS( \ + HipblasName(handle, n, as_hipblas_type(x), incx, \ + as_hipblas_type(y), incy, as_hipblas_type(result))); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +GKO_BIND_HIPBLAS_CONJ_DOT(float, hipblasSdot); +GKO_BIND_HIPBLAS_CONJ_DOT(double, hipblasDdot); +GKO_BIND_HIPBLAS_CONJ_DOT(std::complex, hipblasCdotc); +GKO_BIND_HIPBLAS_CONJ_DOT(std::complex, hipblasZdotc); + +template +GKO_BIND_HIPBLAS_CONJ_DOT(ValueType, detail::not_implemented); + +#undef GKO_BIND_HIPBLAS_CONJ_DOT + + #define GKO_BIND_HIPBLAS_NORM2(ValueType, HipblasName) \ inline void norm2(hipblasHandle_t handle, int n, const ValueType *x, \ int incx, remove_complex *result) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS(HipblasName( \ - handle, n, as_hiplibs_type(x), incx, as_hiplibs_type(result))); \ + handle, n, as_hipblas_type(x), incx, as_hipblas_type(result))); \ } \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -239,10 +255,9 @@ GKO_BIND_HIPBLAS_DOT(ValueType, detail::not_implemented); GKO_BIND_HIPBLAS_NORM2(float, hipblasSnrm2); GKO_BIND_HIPBLAS_NORM2(double, hipblasDnrm2); -/* not implemented GKO_BIND_HIPBLAS_NORM2(std::complex, hipblasScnrm2); GKO_BIND_HIPBLAS_NORM2(std::complex, hipblasDznrm2); -*/ + template GKO_BIND_HIPBLAS_NORM2(ValueType, detail::not_implemented); diff --git a/hip/base/hiprand_bindings.hip.hpp b/hip/base/hiprand_bindings.hip.hpp new file mode 100644 index 00000000000..f2f54313bad --- /dev/null +++ b/hip/base/hiprand_bindings.hip.hpp @@ -0,0 +1,114 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_HIP_BASE_HIPRAND_BINDINGS_HIP_HPP_ +#define GKO_HIP_BASE_HIPRAND_BINDINGS_HIP_HPP_ + + +#include + + +#include + + +#include "hip/base/math.hip.hpp" +#include "hip/base/types.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The HIPRAND namespace. + * + * @ingroup hiprand + */ +namespace hiprand { + + +template +struct is_supported : std::false_type {}; + +template <> +struct is_supported : std::true_type {}; + +template <> +struct is_supported : std::true_type {}; + +template <> +struct is_supported> : std::true_type {}; + +template <> +struct is_supported> : std::true_type {}; + + +inline hiprandGenerator_t rand_generator(int64 seed, + hiprandRngType generator_type) +{ + hiprandGenerator_t gen; + hiprandCreateGenerator(&gen, generator_type); + hiprandSetPseudoRandomGeneratorSeed(gen, seed); + return gen; +} + + +#define GKO_BIND_HIPRAND_RANDOM_VECTOR(ValueType, HiprandName) \ + inline void rand_vector( \ + hiprandGenerator_t &gen, int n, remove_complex mean, \ + remove_complex stddev, ValueType *values) \ + { \ + n = is_complex() ? 2 * n : n; \ + GKO_ASSERT_NO_HIPRAND_ERRORS(HiprandName( \ + gen, reinterpret_cast *>(values), n, \ + mean, stddev)); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +GKO_BIND_HIPRAND_RANDOM_VECTOR(float, hiprandGenerateNormal); +GKO_BIND_HIPRAND_RANDOM_VECTOR(double, hiprandGenerateNormalDouble); +GKO_BIND_HIPRAND_RANDOM_VECTOR(std::complex, hiprandGenerateNormal); +GKO_BIND_HIPRAND_RANDOM_VECTOR(std::complex, + hiprandGenerateNormalDouble); + + +#undef GKO_BIND_HIPRAND_RANDOM_VECTOR + + +} // namespace hiprand +} // namespace hip +} // namespace kernels +} // namespace gko + + +#endif // GKO_HIP_BASE_HIPRAND_BINDINGS_HIP_HPP_ diff --git a/hip/base/hipsparse_bindings.hip.hpp b/hip/base/hipsparse_bindings.hip.hpp index 95be1bfe129..8a275631fc6 100644 --- a/hip/base/hipsparse_bindings.hip.hpp +++ b/hip/base/hipsparse_bindings.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -617,6 +617,20 @@ inline void destroy_ilu0_info(csrilu02Info_t info) } +inline csric02Info_t create_ic0_info() +{ + csric02Info_t info{}; + GKO_ASSERT_NO_HIPSPARSE_ERRORS(hipsparseCreateCsric02Info(&info)); + return info; +} + + +inline void destroy_ic0_info(csric02Info_t info) +{ + GKO_ASSERT_NO_HIPSPARSE_ERRORS(hipsparseDestroyCsric02Info(info)); +} + + template void create_identity_permutation(hipsparseHandle_t handle, IndexType size, IndexType *permutation) GKO_NOT_IMPLEMENTED; @@ -807,6 +821,118 @@ GKO_BIND_HIPSPARSE_ILU0(std::complex, hipsparseZcsrilu02); #undef GKO_BIND_HIPSPARSE_ILU0 +template +void ic0_buffer_size(hipsparseHandle_t handle, IndexType m, IndexType nnz, + const hipsparseMatDescr_t descr, const ValueType *vals, + const IndexType *row_ptrs, const IndexType *col_idxs, + csric02Info_t info, + size_type &buffer_size) GKO_NOT_IMPLEMENTED; + +#define GKO_BIND_HIPSPARSE_IC0_BUFFER_SIZE(ValueType, HipsparseName) \ + template <> \ + inline void ic0_buffer_size( \ + hipsparseHandle_t handle, int32 m, int32 nnz, \ + const hipsparseMatDescr_t descr, const ValueType *vals, \ + const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ + size_type &buffer_size) \ + { \ + int tmp_buffer_size{}; \ + GKO_ASSERT_NO_HIPSPARSE_ERRORS( \ + HipsparseName(handle, m, nnz, descr, \ + as_hiplibs_type(const_cast(vals)), \ + row_ptrs, col_idxs, info, &tmp_buffer_size)); \ + buffer_size = tmp_buffer_size; \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +GKO_BIND_HIPSPARSE_IC0_BUFFER_SIZE(float, hipsparseScsric02_bufferSize); +GKO_BIND_HIPSPARSE_IC0_BUFFER_SIZE(double, hipsparseDcsric02_bufferSize); +#if defined(hipsparseVersionMajor) && defined(hipsparseVersionMinor) && \ + ((hipsparseVersionMajor > 1) || \ + (hipsparseVersionMajor == 1 && hipsparseVersionMinor >= 4)) +GKO_BIND_HIPSPARSE_IC0_BUFFER_SIZE(std::complex, + hipsparseCcsric02_bufferSize); +GKO_BIND_HIPSPARSE_IC0_BUFFER_SIZE(std::complex, + hipsparseZcsric02_bufferSize); +#endif // hipsparse version >= 1.4 + +#undef GKO_BIND_HIPSPARSE_ILU0_BUFFER_SIZE + + +template +void ic0_analysis(hipsparseHandle_t handle, IndexType m, IndexType nnz, + const hipsparseMatDescr_t descr, const ValueType *vals, + const IndexType *row_ptrs, const IndexType *col_idxs, + csric02Info_t info, hipsparseSolvePolicy_t policy, + void *buffer) GKO_NOT_IMPLEMENTED; + +#define GKO_BIND_HIPSPARSE_IC0_ANALYSIS(ValueType, HipsparseName) \ + template <> \ + inline void ic0_analysis( \ + hipsparseHandle_t handle, int32 m, int32 nnz, \ + const hipsparseMatDescr_t descr, const ValueType *vals, \ + const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ + hipsparseSolvePolicy_t policy, void *buffer) \ + { \ + GKO_ASSERT_NO_HIPSPARSE_ERRORS( \ + HipsparseName(handle, m, nnz, descr, as_hiplibs_type(vals), \ + row_ptrs, col_idxs, info, policy, buffer)); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +GKO_BIND_HIPSPARSE_IC0_ANALYSIS(float, hipsparseScsric02_analysis); +GKO_BIND_HIPSPARSE_IC0_ANALYSIS(double, hipsparseDcsric02_analysis); +#if defined(hipsparseVersionMajor) && defined(hipsparseVersionMinor) && \ + ((hipsparseVersionMajor > 1) || \ + (hipsparseVersionMajor == 1 && hipsparseVersionMinor >= 4)) +GKO_BIND_HIPSPARSE_IC0_ANALYSIS(std::complex, + hipsparseCcsric02_analysis); +GKO_BIND_HIPSPARSE_IC0_ANALYSIS(std::complex, + hipsparseZcsric02_analysis); +#endif // hipsparse version >= 1.4 + +#undef GKO_BIND_HIPSPARSE_IC0_ANALYSIS + + +template +void ic0(hipsparseHandle_t handle, IndexType m, IndexType nnz, + const hipsparseMatDescr_t descr, ValueType *vals, + const IndexType *row_ptrs, const IndexType *col_idxs, + csric02Info_t info, hipsparseSolvePolicy_t policy, + void *buffer) GKO_NOT_IMPLEMENTED; + +#define GKO_BIND_HIPSPARSE_IC0(ValueType, HipsparseName) \ + template <> \ + inline void ic0( \ + hipsparseHandle_t handle, int32 m, int32 nnz, \ + const hipsparseMatDescr_t descr, ValueType *vals, \ + const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ + hipsparseSolvePolicy_t policy, void *buffer) \ + { \ + GKO_ASSERT_NO_HIPSPARSE_ERRORS( \ + HipsparseName(handle, m, nnz, descr, as_hiplibs_type(vals), \ + row_ptrs, col_idxs, info, policy, buffer)); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + +GKO_BIND_HIPSPARSE_IC0(float, hipsparseScsric02); +GKO_BIND_HIPSPARSE_IC0(double, hipsparseDcsric02); +#if defined(hipsparseVersionMajor) && defined(hipsparseVersionMinor) && \ + ((hipsparseVersionMajor > 1) || \ + (hipsparseVersionMajor == 1 && hipsparseVersionMinor >= 4)) +GKO_BIND_HIPSPARSE_IC0(std::complex, hipsparseCcsric02); +GKO_BIND_HIPSPARSE_IC0(std::complex, hipsparseZcsric02); +#endif // hipsparse version >= 1.4 + +#undef GKO_BIND_HIPSPARSE_IC0 + + } // namespace hipsparse } // namespace hip } // namespace kernels diff --git a/hip/base/kernel_launch.hip.hpp b/hip/base/kernel_launch.hip.hpp new file mode 100644 index 00000000000..5e18cb9c645 --- /dev/null +++ b/hip/base/kernel_launch.hip.hpp @@ -0,0 +1,106 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_HPP_ +#error \ + "This file can only be used from inside common/unified/base/kernel_launch.hpp" +#endif + + +#include + + +#include "hip/base/device_guard.hip.hpp" +#include "hip/base/types.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { + + +constexpr int default_block_size = 512; + + +template +__global__ __launch_bounds__(default_block_size) void generic_kernel_1d( + size_type size, KernelFunction fn, KernelArgs... args) +{ + auto tidx = thread::get_thread_id_flat(); + if (tidx >= size) { + return; + } + fn(tidx, args...); +} + + +template +__global__ __launch_bounds__(default_block_size) void generic_kernel_2d( + size_type rows, size_type cols, KernelFunction fn, KernelArgs... args) +{ + auto tidx = thread::get_thread_id_flat(); + auto col = tidx % cols; + auto row = tidx / cols; + if (row >= rows) { + return; + } + fn(row, col, args...); +} + + +template +void run_kernel(std::shared_ptr exec, KernelFunction fn, + size_type size, KernelArgs &&... args) +{ + gko::hip::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size, block_size); + hipLaunchKernelGGL(generic_kernel_1d, num_blocks, block_size, 0, 0, size, + fn, map_to_device(args)...); +} + +template +void run_kernel(std::shared_ptr exec, KernelFunction fn, + dim<2> size, KernelArgs &&... args) +{ + gko::hip::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + hipLaunchKernelGGL(generic_kernel_2d, num_blocks, block_size, 0, 0, size[0], + size[1], fn, map_to_device(args)...); +} + + +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/base/kernel_launch_solver.hip.hpp b/hip/base/kernel_launch_solver.hip.hpp new file mode 100644 index 00000000000..ed8c610d5b2 --- /dev/null +++ b/hip/base/kernel_launch_solver.hip.hpp @@ -0,0 +1,79 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_SOLVER_HPP_ +#error \ + "This file can only be used from inside common/unified/base/kernel_launch_solver.hpp" +#endif + + +#include + + +namespace gko { +namespace kernels { +namespace hip { + + +template +__global__ __launch_bounds__(default_block_size) void generic_kernel_2d_solver( + size_type rows, size_type cols, size_type default_stride, KernelFunction fn, + KernelArgs... args) +{ + auto tidx = thread::get_thread_id_flat(); + auto col = tidx % cols; + auto row = tidx / cols; + if (row >= rows) { + return; + } + fn(row, col, + device_unpack_solver_impl::unpack(args, default_stride)...); +} + + +template +void run_kernel_solver(std::shared_ptr exec, + KernelFunction fn, dim<2> size, size_type default_stride, + KernelArgs &&... args) +{ + gko::hip::device_guard guard{exec->get_device_id()}; + constexpr auto block_size = kernels::hip::default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + hipLaunchKernelGGL(kernels::hip::generic_kernel_2d_solver, num_blocks, + block_size, 0, 0, size[0], size[1], default_stride, fn, + kernels::hip::map_to_device(args)...); +} + + +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/base/math.hip.hpp b/hip/base/math.hip.hpp index a80cc24f989..2cd504dc43e 100644 --- a/hip/base/math.hip.hpp +++ b/hip/base/math.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -#include "common/base/math.hpp.inc" +#include "common/cuda_hip/base/math.hpp.inc" } // namespace gko diff --git a/hip/base/pointer_mode_guard.hip.hpp b/hip/base/pointer_mode_guard.hip.hpp index f5601c5003a..f231909e6a5 100644 --- a/hip/base/pointer_mode_guard.hip.hpp +++ b/hip/base/pointer_mode_guard.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/base/types.hip.hpp b/hip/base/types.hip.hpp index add1b10b40b..c0c7e87e333 100644 --- a/hip/base/types.hip.hpp +++ b/hip/base/types.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -34,10 +34,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_HIP_BASE_TYPES_HIP_HPP_ -#include +#include -#include +#include #include @@ -95,6 +95,48 @@ struct hiplibs_type_impl> { using type = typename hiplibs_type_impl>::type; }; + +template +struct hipblas_type_impl { + using type = T; +}; + +template +struct hipblas_type_impl { + using type = typename hipblas_type_impl::type *; +}; + +template +struct hipblas_type_impl { + using type = typename hipblas_type_impl::type &; +}; + +template +struct hipblas_type_impl { + using type = const typename hipblas_type_impl::type; +}; + +template +struct hipblas_type_impl { + using type = volatile typename hipblas_type_impl::type; +}; + +template <> +struct hipblas_type_impl> { + using type = hipblasComplex; +}; + +template <> +struct hipblas_type_impl> { + using type = hipblasDoubleComplex; +}; + +template +struct hipblas_type_impl> { + using type = typename hipblas_type_impl>::type; +}; + + template struct hip_type_impl { using type = T; @@ -252,6 +294,30 @@ inline hiplibs_type as_hiplibs_type(T val) } +/** + * This is an alias for equivalent of type T used in the HIPBLAS library. + * + * @tparam T a type + */ +template +using hipblas_type = typename detail::hipblas_type_impl::type; + + +/** + * Reinterprets the passed in value as an equivalent type used by the HIPBLAS + * library. + * + * @param val the value to reinterpret + * + * @return `val` reinterpreted to type used by HIP libraries + */ +template +inline hipblas_type as_hipblas_type(T val) +{ + return reinterpret_cast>(val); +} + + } // namespace hip } // namespace kernels } // namespace gko diff --git a/hip/base/version.hip.cpp b/hip/base/version.hip.cpp index 5c5473cbd55..e947640306a 100644 --- a/hip/base/version.hip.cpp +++ b/hip/base/version.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/components/absolute_array.hip.cpp b/hip/components/absolute_array.hip.cpp new file mode 100644 index 00000000000..f5e4e9637a0 --- /dev/null +++ b/hip/components/absolute_array.hip.cpp @@ -0,0 +1,86 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/absolute_array.hpp" + + +#include + + +#include "hip/base/types.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +namespace components { + + +constexpr int default_block_size = 512; + + +#include "common/cuda_hip/components/absolute_array.hpp.inc" + + +template +void inplace_absolute_array(std::shared_ptr exec, + ValueType *data, size_type n) +{ + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); + hipLaunchKernelGGL(kernel::inplace_absolute_array_kernel, dim3(grid_size), + dim3(block_size), 0, 0, n, as_hip_type(data)); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL); + + +template +void outplace_absolute_array(std::shared_ptr exec, + const ValueType *in, size_type n, + remove_complex *out) +{ + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); + hipLaunchKernelGGL(kernel::outplace_absolute_array_kernel, dim3(grid_size), + dim3(block_size), 0, 0, n, as_hip_type(in), + as_hip_type(out)); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_ARRAY_KERNEL); + + +} // namespace components +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/components/atomic.hip.hpp b/hip/components/atomic.hip.hpp index 14c664dd876..a6508e87f8c 100644 --- a/hip/components/atomic.hip.hpp +++ b/hip/components/atomic.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,7 +46,7 @@ namespace kernels { namespace hip { -#include "common/components/atomic.hpp.inc" +#include "common/cuda_hip/components/atomic.hpp.inc" /** diff --git a/hip/components/cooperative_groups.hip.hpp b/hip/components/cooperative_groups.hip.hpp index 5f818dca9be..10c1217da11 100644 --- a/hip/components/cooperative_groups.hip.hpp +++ b/hip/components/cooperative_groups.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/components/diagonal_block_manipulation.hip.hpp b/hip/components/diagonal_block_manipulation.hip.hpp index db26e35db1e..8237edcba7d 100644 --- a/hip/components/diagonal_block_manipulation.hip.hpp +++ b/hip/components/diagonal_block_manipulation.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,7 +48,7 @@ namespace hip { namespace csr { -#include "common/components/diagonal_block_manipulation.hpp.inc" +#include "common/cuda_hip/components/diagonal_block_manipulation.hpp.inc" } // namespace csr diff --git a/hip/components/fill_array.hip.cpp b/hip/components/fill_array.hip.cpp index e738a68811e..590a0953b1d 100644 --- a/hip/components/fill_array.hip.cpp +++ b/hip/components/fill_array.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -51,7 +51,7 @@ namespace components { constexpr int default_block_size = 512; -#include "common/components/fill_array.hpp.inc" +#include "common/cuda_hip/components/fill_array.hpp.inc" template @@ -64,10 +64,20 @@ void fill_array(std::shared_ptr exec, ValueType *array, 0, n, as_hip_type(array), as_hip_type(val)); } +GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); -template GKO_DECLARE_FILL_ARRAY_KERNEL(size_type); + +template +void fill_seq_array(std::shared_ptr exec, + ValueType *array, size_type n) +{ + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); + hipLaunchKernelGGL(kernel::fill_seq_array, dim3(grid_size), + dim3(block_size), 0, 0, n, as_hip_type(array)); +} + +GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_SEQ_ARRAY_KERNEL); } // namespace components diff --git a/hip/components/format_conversion.hip.hpp b/hip/components/format_conversion.hip.hpp index f97e70ef438..c0c77869e3f 100644 --- a/hip/components/format_conversion.hip.hpp +++ b/hip/components/format_conversion.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -44,6 +45,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "hip/components/thread_ids.hip.hpp" +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING + + namespace gko { namespace kernels { namespace hip { @@ -121,6 +127,11 @@ __host__ size_type calculate_nwarps(std::shared_ptr exec, multiple = 8; } #endif // GINKGO_HIP_PLATFORM_NVCC +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING + if (_tuning_flag) { + multiple = _tuned_value; + } +#endif // GINKGO_BENCHMARK_ENABLE_TUNING return std::min(multiple * nwarps_in_hip, size_type(ceildiv(nnz, config::warp_size))); } diff --git a/hip/components/intrinsics.hip.hpp b/hip/components/intrinsics.hip.hpp index 8d9d0579013..d1d2d13f6bb 100644 --- a/hip/components/intrinsics.hip.hpp +++ b/hip/components/intrinsics.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -42,7 +42,7 @@ namespace kernels { namespace hip { -#include "common/components/intrinsics.hpp.inc" +#include "common/cuda_hip/components/intrinsics.hpp.inc" } // namespace hip diff --git a/hip/components/merging.hip.hpp b/hip/components/merging.hip.hpp index 30289d41ed2..e3ad7fb1685 100644 --- a/hip/components/merging.hip.hpp +++ b/hip/components/merging.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,7 +45,7 @@ namespace kernels { namespace hip { -#include "common/components/merging.hpp.inc" +#include "common/cuda_hip/components/merging.hpp.inc" } // namespace hip diff --git a/hip/components/prefix_sum.hip.cpp b/hip/components/prefix_sum.hip.cpp index 9947622c565..9302fc07b9a 100644 --- a/hip/components/prefix_sum.hip.cpp +++ b/hip/components/prefix_sum.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -49,7 +49,7 @@ template void prefix_sum(std::shared_ptr exec, IndexType *counts, size_type num_entries) { - // prefix_sum should be on the valid array + // prefix_sum should only be performed on a valid array if (num_entries > 0) { auto num_blocks = ceildiv(num_entries, prefix_sum_block_size); Array block_sum_array(exec, num_blocks - 1); @@ -58,8 +58,8 @@ void prefix_sum(std::shared_ptr exec, IndexType *counts, HIP_KERNEL_NAME(start_prefix_sum), dim3(num_blocks), dim3(prefix_sum_block_size), 0, 0, num_entries, counts, block_sums); - // add the total sum of the previous block only when the number of block - // is larger than 1. + // add the total sum of the previous block only when the number of + // blocks is larger than 1. if (num_blocks > 1) { hipLaunchKernelGGL( HIP_KERNEL_NAME(finalize_prefix_sum), diff --git a/hip/components/prefix_sum.hip.hpp b/hip/components/prefix_sum.hip.hpp index de5aeb168e5..fbb87e2fa69 100644 --- a/hip/components/prefix_sum.hip.hpp +++ b/hip/components/prefix_sum.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,7 +48,7 @@ namespace kernels { namespace hip { -#include "common/components/prefix_sum.hpp.inc" +#include "common/cuda_hip/components/prefix_sum.hpp.inc" } // namespace hip diff --git a/hip/components/reduction.hip.hpp b/hip/components/reduction.hip.hpp index 425dc5b5023..f1c9c8d965f 100644 --- a/hip/components/reduction.hip.hpp +++ b/hip/components/reduction.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -58,7 +58,7 @@ namespace hip { constexpr int default_block_size = 512; -#include "common/components/reduction.hpp.inc" +#include "common/cuda_hip/components/reduction.hpp.inc" /** diff --git a/hip/components/searching.hip.hpp b/hip/components/searching.hip.hpp index 7611b23fdee..11c348f7a6f 100644 --- a/hip/components/searching.hip.hpp +++ b/hip/components/searching.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ namespace kernels { namespace hip { -#include "common/components/searching.hpp.inc" +#include "common/cuda_hip/components/searching.hpp.inc" } // namespace hip diff --git a/hip/components/segment_scan.hip.hpp b/hip/components/segment_scan.hip.hpp index eae5953813a..743ea26d692 100644 --- a/hip/components/segment_scan.hip.hpp +++ b/hip/components/segment_scan.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -42,7 +42,7 @@ namespace kernels { namespace hip { -#include "common/components/segment_scan.hpp.inc" +#include "common/cuda_hip/components/segment_scan.hpp.inc" } // namespace hip diff --git a/hip/components/sorting.hip.hpp b/hip/components/sorting.hip.hpp index 704c8f9dd07..7c7fc4404da 100644 --- a/hip/components/sorting.hip.hpp +++ b/hip/components/sorting.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ namespace kernels { namespace hip { -#include "common/components/sorting.hpp.inc" +#include "common/cuda_hip/components/sorting.hpp.inc" } // namespace hip diff --git a/hip/components/thread_ids.hip.hpp b/hip/components/thread_ids.hip.hpp index 6016c26cf68..79c774e3ec9 100644 --- a/hip/components/thread_ids.hip.hpp +++ b/hip/components/thread_ids.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,7 +48,7 @@ namespace hip { namespace thread { -#include "common/components/thread_ids.hpp.inc" +#include "common/cuda_hip/components/thread_ids.hpp.inc" } // namespace thread diff --git a/hip/components/uninitialized_array.hip.hpp b/hip/components/uninitialized_array.hip.hpp index 7780ebb10f5..80b5cc74676 100644 --- a/hip/components/uninitialized_array.hip.hpp +++ b/hip/components/uninitialized_array.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -42,7 +42,7 @@ namespace kernels { namespace hip { -#include "common/components/uninitialized_array.hpp.inc" +#include "common/cuda_hip/components/uninitialized_array.hpp.inc" } // namespace hip diff --git a/hip/components/warp_blas.hip.hpp b/hip/components/warp_blas.hip.hpp index 52aa2acaf07..4611e505376 100644 --- a/hip/components/warp_blas.hip.hpp +++ b/hip/components/warp_blas.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -50,7 +50,7 @@ namespace kernels { namespace hip { -#include "common/components/warp_blas.hpp.inc" +#include "common/cuda_hip/components/warp_blas.hpp.inc" } // namespace hip diff --git a/hip/factorization/factorization_kernels.hip.cpp b/hip/factorization/factorization_kernels.hip.cpp index bfe00c9b0b8..ec4ab4806ee 100644 --- a/hip/factorization/factorization_kernels.hip.cpp +++ b/hip/factorization/factorization_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -62,7 +62,7 @@ namespace factorization { constexpr int default_block_size{512}; -#include "common/factorization/factorization_kernels.hpp.inc" +#include "common/cuda_hip/factorization/factorization_kernels.hpp.inc" template diff --git a/hip/factorization/ic_kernels.hip.cpp b/hip/factorization/ic_kernels.hip.cpp new file mode 100644 index 00000000000..4510a18781c --- /dev/null +++ b/hip/factorization/ic_kernels.hip.cpp @@ -0,0 +1,97 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/ic_kernels.hpp" + + +#include + + +#include + + +#include "hip/base/device_guard.hip.hpp" +#include "hip/base/hipsparse_bindings.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The ic factorization namespace. + * + * @ingroup factor + */ +namespace ic_factorization { + + +template +void compute(std::shared_ptr exec, + matrix::Csr *m) +{ + const auto id = exec->get_device_id(); + auto handle = exec->get_hipsparse_handle(); + gko::hip::device_guard g{id}; + auto desc = hipsparse::create_mat_descr(); + auto info = hipsparse::create_ic0_info(); + + // get buffer size for IC + IndexType num_rows = m->get_size()[0]; + IndexType nnz = m->get_num_stored_elements(); + size_type buffer_size{}; + hipsparse::ic0_buffer_size(handle, num_rows, nnz, desc, + m->get_const_values(), m->get_const_row_ptrs(), + m->get_const_col_idxs(), info, buffer_size); + + Array buffer{exec, buffer_size}; + + // set up IC(0) + hipsparse::ic0_analysis(handle, num_rows, nnz, desc, m->get_const_values(), + m->get_const_row_ptrs(), m->get_const_col_idxs(), + info, HIPSPARSE_SOLVE_POLICY_USE_LEVEL, + buffer.get_data()); + + hipsparse::ic0(handle, num_rows, nnz, desc, m->get_values(), + m->get_const_row_ptrs(), m->get_const_col_idxs(), info, + HIPSPARSE_SOLVE_POLICY_USE_LEVEL, buffer.get_data()); + + hipsparse::destroy_ic0_info(info); + hipsparse::destroy(desc); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); + + +} // namespace ic_factorization +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/factorization/ilu_kernels.hip.cpp b/hip/factorization/ilu_kernels.hip.cpp index 8888856e898..e22a8140ea9 100644 --- a/hip/factorization/ilu_kernels.hip.cpp +++ b/hip/factorization/ilu_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/factorization/par_ic_kernels.hip.cpp b/hip/factorization/par_ic_kernels.hip.cpp new file mode 100644 index 00000000000..17e556046e2 --- /dev/null +++ b/hip/factorization/par_ic_kernels.hip.cpp @@ -0,0 +1,110 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/par_ic_kernels.hpp" + + +#include +#include +#include + + +#include "hip/base/math.hip.hpp" +#include "hip/base/types.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The parallel IC factorization namespace. + * + * @ingroup factor + */ +namespace par_ic_factorization { + + +constexpr int default_block_size = 512; + + +// subwarp sizes for all warp-parallel kernels (sweep) +using compiled_kernels = + syn::value_list; + + +#include "common/cuda_hip/factorization/par_ic_kernels.hpp.inc" + + +template +void init_factor(std::shared_ptr exec, + matrix::Csr *l) +{ + auto num_rows = l->get_size()[0]; + auto num_blocks = ceildiv(num_rows, default_block_size); + auto l_row_ptrs = l->get_const_row_ptrs(); + auto l_vals = l->get_values(); + hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::ic_init), num_blocks, + default_block_size, 0, 0, l_row_ptrs, + as_hip_type(l_vals), num_rows); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); + + +template +void compute_factor(std::shared_ptr exec, + size_type iterations, + const matrix::Coo *a_lower, + matrix::Csr *l) +{ + auto nnz = l->get_num_stored_elements(); + auto num_blocks = ceildiv(nnz, default_block_size); + for (size_type i = 0; i < iterations; ++i) { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(kernel::ic_sweep), num_blocks, default_block_size, + 0, 0, a_lower->get_const_row_idxs(), a_lower->get_const_col_idxs(), + as_hip_type(a_lower->get_const_values()), l->get_const_row_ptrs(), + l->get_const_col_idxs(), as_hip_type(l->get_values()), + static_cast(l->get_num_stored_elements())); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); + + +} // namespace par_ic_factorization +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/factorization/par_ict_kernels.hip.cpp b/hip/factorization/par_ict_kernels.hip.cpp index d987ff36856..a67c29d10a7 100644 --- a/hip/factorization/par_ict_kernels.hip.cpp +++ b/hip/factorization/par_ict_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -68,7 +68,7 @@ namespace hip { namespace par_ict_factorization { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; // subwarp sizes for all warp-parallel kernels (filter, add_candidates) @@ -76,8 +76,8 @@ using compiled_kernels = syn::value_list; -#include "common/factorization/par_ict_spgeam_kernels.hpp.inc" -#include "common/factorization/par_ict_sweep_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc" namespace { @@ -86,18 +86,18 @@ namespace { template void add_candidates(syn::value_list, std::shared_ptr exec, - const matrix::Csr *llt, + const matrix::Csr *llh, const matrix::Csr *a, const matrix::Csr *l, matrix::Csr *l_new) { - auto num_rows = static_cast(llt->get_size()[0]); + auto num_rows = static_cast(llh->get_size()[0]); auto subwarps_per_block = default_block_size / subwarp_size; auto num_blocks = ceildiv(num_rows, subwarps_per_block); matrix::CsrBuilder l_new_builder(l_new); - auto llt_row_ptrs = llt->get_const_row_ptrs(); - auto llt_col_idxs = llt->get_const_col_idxs(); - auto llt_vals = llt->get_const_values(); + auto llh_row_ptrs = llh->get_const_row_ptrs(); + auto llh_col_idxs = llh->get_const_col_idxs(); + auto llh_vals = llh->get_const_values(); auto a_row_ptrs = a->get_const_row_ptrs(); auto a_col_idxs = a->get_const_col_idxs(); auto a_vals = a->get_const_values(); @@ -108,8 +108,8 @@ void add_candidates(syn::value_list, // count non-zeros per row hipLaunchKernelGGL( HIP_KERNEL_NAME(kernel::ict_tri_spgeam_nnz), - dim3(num_blocks), dim3(default_block_size), 0, 0, llt_row_ptrs, - llt_col_idxs, a_row_ptrs, a_col_idxs, l_new_row_ptrs, num_rows); + dim3(num_blocks), dim3(default_block_size), 0, 0, llh_row_ptrs, + llh_col_idxs, a_row_ptrs, a_col_idxs, l_new_row_ptrs, num_rows); // build row ptrs components::prefix_sum(exec, l_new_row_ptrs, num_rows + 1); @@ -125,8 +125,8 @@ void add_candidates(syn::value_list, // fill columns and values hipLaunchKernelGGL( HIP_KERNEL_NAME(kernel::ict_tri_spgeam_init), - dim3(num_blocks), dim3(default_block_size), 0, 0, llt_row_ptrs, - llt_col_idxs, as_hip_type(llt_vals), a_row_ptrs, a_col_idxs, + dim3(num_blocks), dim3(default_block_size), 0, 0, llh_row_ptrs, + llh_col_idxs, as_hip_type(llh_vals), a_row_ptrs, a_col_idxs, as_hip_type(a_vals), l_row_ptrs, l_col_idxs, as_hip_type(l_vals), l_new_row_ptrs, l_new_col_idxs, as_hip_type(l_new_vals), num_rows); } @@ -163,14 +163,14 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_compute_factor, compute_factor); template void add_candidates(std::shared_ptr exec, - const matrix::Csr *llt, + const matrix::Csr *llh, const matrix::Csr *a, const matrix::Csr *l, matrix::Csr *l_new) { auto num_rows = a->get_size()[0]; auto total_nnz = - llt->get_num_stored_elements() + a->get_num_stored_elements(); + llh->get_num_stored_elements() + a->get_num_stored_elements(); auto total_nnz_per_row = total_nnz / num_rows; select_add_candidates( compiled_kernels(), @@ -178,7 +178,7 @@ void add_candidates(std::shared_ptr exec, return total_nnz_per_row <= compiled_subwarp_size || compiled_subwarp_size == config::warp_size; }, - syn::value_list(), syn::type_list<>(), exec, llt, a, l, l_new); + syn::value_list(), syn::type_list<>(), exec, llh, a, l, l_new); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/hip/factorization/par_ilu_kernels.hip.cpp b/hip/factorization/par_ilu_kernels.hip.cpp index d8caeb90d16..b414874930f 100644 --- a/hip/factorization/par_ilu_kernels.hip.cpp +++ b/hip/factorization/par_ilu_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -59,7 +59,7 @@ namespace par_ilu_factorization { constexpr int default_block_size{512}; -#include "common/factorization/par_ilu_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilu_kernels.hpp.inc" template @@ -77,17 +77,15 @@ void compute_l_u_factors(std::shared_ptr exec, ceildiv(num_elements, static_cast(block_size.x))), 1, 1}; for (size_type i = 0; i < iterations; ++i) { - hipLaunchKernelGGL(kernel::compute_l_u_factors, dim3(grid_dim), - dim3(block_size), 0, 0, num_elements, - as_hip_type(system_matrix->get_const_row_idxs()), - as_hip_type(system_matrix->get_const_col_idxs()), - as_hip_type(system_matrix->get_const_values()), - as_hip_type(l_factor->get_const_row_ptrs()), - as_hip_type(l_factor->get_const_col_idxs()), - as_hip_type(l_factor->get_values()), - as_hip_type(u_factor->get_const_row_ptrs()), - as_hip_type(u_factor->get_const_col_idxs()), - as_hip_type(u_factor->get_values())); + hipLaunchKernelGGL( + kernel::compute_l_u_factors, dim3(grid_dim), dim3(block_size), 0, 0, + num_elements, system_matrix->get_const_row_idxs(), + system_matrix->get_const_col_idxs(), + as_hip_type(system_matrix->get_const_values()), + l_factor->get_const_row_ptrs(), l_factor->get_const_col_idxs(), + as_hip_type(l_factor->get_values()), u_factor->get_const_row_ptrs(), + u_factor->get_const_col_idxs(), + as_hip_type(u_factor->get_values())); } } diff --git a/hip/factorization/par_ilut_approx_filter_kernel.hip.cpp b/hip/factorization/par_ilut_approx_filter_kernel.hip.cpp index 1d3e1eba6f7..319fdf4210c 100644 --- a/hip/factorization/par_ilut_approx_filter_kernel.hip.cpp +++ b/hip/factorization/par_ilut_approx_filter_kernel.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,10 +33,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/factorization/par_ilut_kernels.hpp" -#include +#include -#include +#include #include @@ -47,7 +47,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/prefix_sum.hpp" -#include "core/factorization/par_ilut_kernels.hpp" #include "core/matrix/coo_builder.hpp" #include "core/matrix/csr_builder.hpp" #include "core/matrix/csr_kernels.hpp" @@ -80,8 +79,8 @@ using compiled_kernels = syn::value_list; -#include "common/factorization/par_ilut_filter_kernels.hpp.inc" -#include "common/factorization/par_ilut_select_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc" template diff --git a/hip/factorization/par_ilut_filter_kernel.hip.cpp b/hip/factorization/par_ilut_filter_kernel.hip.cpp index f1b57bd9f32..e8997ebeb40 100644 --- a/hip/factorization/par_ilut_filter_kernel.hip.cpp +++ b/hip/factorization/par_ilut_filter_kernel.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -67,7 +67,7 @@ namespace hip { namespace par_ilut_factorization { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; // subwarp sizes for filter kernels @@ -75,7 +75,7 @@ using compiled_kernels = syn::value_list; -#include "common/factorization/par_ilut_filter_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc" namespace { @@ -163,4 +163,4 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( } // namespace par_ilut_factorization } // namespace hip } // namespace kernels -} // namespace gko \ No newline at end of file +} // namespace gko diff --git a/hip/factorization/par_ilut_select_common.hip.cpp b/hip/factorization/par_ilut_select_common.hip.cpp index 92431c6b0a3..4e453270d78 100644 --- a/hip/factorization/par_ilut_select_common.hip.cpp +++ b/hip/factorization/par_ilut_select_common.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -61,7 +61,7 @@ namespace hip { namespace par_ilut_factorization { -#include "common/factorization/par_ilut_select_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc" template diff --git a/hip/factorization/par_ilut_select_common.hip.hpp b/hip/factorization/par_ilut_select_common.hip.hpp index ce77d3a6921..38f653abf6e 100644 --- a/hip/factorization/par_ilut_select_common.hip.hpp +++ b/hip/factorization/par_ilut_select_common.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,8 +45,8 @@ namespace hip { namespace par_ilut_factorization { -constexpr auto default_block_size = 512; -constexpr auto items_per_thread = 16; +constexpr int default_block_size = 512; +constexpr int items_per_thread = 16; template diff --git a/hip/factorization/par_ilut_select_kernel.hip.cpp b/hip/factorization/par_ilut_select_kernel.hip.cpp index 9d8c73562e9..00adc9fcba1 100644 --- a/hip/factorization/par_ilut_select_kernel.hip.cpp +++ b/hip/factorization/par_ilut_select_kernel.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,10 +33,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/factorization/par_ilut_kernels.hpp" -#include +#include -#include +#include #include @@ -66,7 +66,7 @@ namespace hip { namespace par_ilut_factorization { -#include "common/factorization/par_ilut_select_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc" template diff --git a/hip/factorization/par_ilut_spgeam_kernel.hip.cpp b/hip/factorization/par_ilut_spgeam_kernel.hip.cpp index 3d00ce153ba..d7815de1d2a 100644 --- a/hip/factorization/par_ilut_spgeam_kernel.hip.cpp +++ b/hip/factorization/par_ilut_spgeam_kernel.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -68,7 +68,7 @@ namespace hip { namespace par_ilut_factorization { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; // subwarp sizes for add_candidates kernels @@ -76,7 +76,7 @@ using compiled_kernels = syn::value_list; -#include "common/factorization/par_ilut_spgeam_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_spgeam_kernels.hpp.inc" namespace { diff --git a/hip/factorization/par_ilut_sweep_kernel.hip.cpp b/hip/factorization/par_ilut_sweep_kernel.hip.cpp index 15fb33ec34e..b00a225a6b4 100644 --- a/hip/factorization/par_ilut_sweep_kernel.hip.cpp +++ b/hip/factorization/par_ilut_sweep_kernel.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -68,7 +68,7 @@ namespace hip { namespace par_ilut_factorization { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; // subwarp sizes for all warp-parallel kernels (filter, add_candidates) @@ -76,7 +76,7 @@ using compiled_kernels = syn::value_list; -#include "common/factorization/par_ilut_sweep_kernels.hpp.inc" +#include "common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc" namespace { diff --git a/hip/get_info.cmake b/hip/get_info.cmake index f98083dc468..1610ac0eee4 100644 --- a/hip/get_info.cmake +++ b/hip/get_info.cmake @@ -1,21 +1,15 @@ -set(log_types "detailed_log") -foreach(log_type ${log_types}) - ginkgo_print_module_header(${${log_type}} "HIP") - set(print_var "GINKGO_HIPCONFIG_PATH;GINKGO_HIP_AMDGPU;GINKGO_HIP_HCC_COMPILER_FLAGS;GINKGO_HIP_NVCC_COMPILER_FLAGS;GINKGO_HIP_THRUST_PATH" - ) - foreach(var ${print_var}) - ginkgo_print_variable(${${log_type}} ${var} ) - endforeach() -endforeach() -foreach(log_type ${log_types}) - ginkgo_print_module_footer(${${log_type}} "HIP variables:") - set(print_var "HIP_VERSION;HIP_COMPILER;HIP_PATH;ROCM_PATH;HIP_PLATFORM;HIP_ROOT_DIR;HCC_PATH;HIP_RUNTIME;HIPBLAS_PATH;HIPSPARSE_PATH;HIP_CLANG_INCLUDE_PATH;HIP_CLANG_PATH;HIP_HIPCC_EXECUTABLE;HIP_HIPCONFIG_EXECUTABLE;HIP_HOST_COMPILATION_CPP" - ) - foreach(var ${print_var}) - ginkgo_print_variable(${${log_type}} ${var} ) - endforeach() - ginkgo_print_flags(${detailed_log} "HIP_HCC_FLAGS") - ginkgo_print_flags(${detailed_log} "HIP_HIPCC_FLAGS") - ginkgo_print_flags(${detailed_log} "HIP_NVCC_FLAGS") - ginkgo_print_module_footer(${detailed_log} "") -endforeach() +ginkgo_print_module_header(${detailed_log} "HIP") +ginkgo_print_foreach_variable(${detailed_log} + "GINKGO_HIPCONFIG_PATH;GINKGO_HIP_AMDGPU" + "GINKGO_HIP_CLANG_COMPILER_FLAGS;GINKGO_HIP_NVCC_COMPILER_FLAGS" + "GINKGO_HIP_THRUST_PATH;GINKGO_AMD_ARCH_FLAGS") +ginkgo_print_module_footer(${detailed_log} "HIP variables:") +ginkgo_print_foreach_variable(${detailed_log} + "HIP_VERSION;HIP_COMPILER;HIP_PATH;ROCM_PATH" + "HIP_PLATFORM;HIP_ROOT_DIR;HIP_RUNTIME;HIPBLAS_PATH;HIPSPARSE_PATH" + "HIPRAND_PATH;ROCRAND_PATH;HIP_CLANG_INCLUDE_PATH;HIP_CLANG_PATH" + "HIP_HIPCC_EXECUTABLE;HIP_HIPCONFIG_EXECUTABLE;HIP_HOST_COMPILATION_CPP") +ginkgo_print_flags(${detailed_log} "HIP_HIPCC_FLAGS") +ginkgo_print_flags(${detailed_log} "HIP_NVCC_FLAGS") +ginkgo_print_flags(${detailed_log} "HIP_CLANG_FLAGS") +ginkgo_print_module_footer(${detailed_log} "") diff --git a/hip/matrix/coo_kernels.hip.cpp b/hip/matrix/coo_kernels.hip.cpp index 3e9e4f6b190..b1318230c7a 100644 --- a/hip/matrix/coo_kernels.hip.cpp +++ b/hip/matrix/coo_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,7 +43,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include "core/components/fill_array.hpp" #include "core/matrix/dense_kernels.hpp" #include "hip/base/config.hip.hpp" #include "hip/base/hipsparse_bindings.hip.hpp" @@ -77,7 +76,7 @@ constexpr int warps_in_block = 4; constexpr int spmv_block_size = warps_in_block * config::warp_size; -#include "common/matrix/coo_kernels.hpp.inc" +#include "common/cuda_hip/matrix/coo_kernels.hpp.inc" template @@ -85,9 +84,7 @@ void spmv(std::shared_ptr exec, const matrix::Coo *a, const matrix::Dense *b, matrix::Dense *c) { - components::fill_array(exec, c->get_values(), c->get_num_stored_elements(), - zero()); - + dense::fill(exec, c, zero()); spmv2(exec, a, b, c); } @@ -258,30 +255,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_COO_CONVERT_TO_DENSE_KERNEL); -template -void extract_diagonal(std::shared_ptr exec, - const matrix::Coo *orig, - matrix::Diagonal *diag) -{ - const auto nnz = orig->get_num_stored_elements(); - const auto diag_size = diag->get_size()[0]; - const auto num_blocks = ceildiv(nnz, default_block_size); - - const auto orig_values = orig->get_const_values(); - const auto orig_row_idxs = orig->get_const_row_idxs(); - const auto orig_col_idxs = orig->get_const_col_idxs(); - auto diag_values = diag->get_values(); - - hipLaunchKernelGGL(kernel::extract_diagonal, dim3(num_blocks), - dim3(default_block_size), 0, 0, nnz, - as_hip_type(orig_values), as_hip_type(orig_row_idxs), - as_hip_type(orig_col_idxs), as_hip_type(diag_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_COO_EXTRACT_DIAGONAL_KERNEL); - - } // namespace coo } // namespace hip } // namespace kernels diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index 145a84271d5..b2a712e1924 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -83,7 +83,6 @@ namespace csr { constexpr int default_block_size = 512; constexpr int warps_in_block = 4; constexpr int spmv_block_size = warps_in_block * config::warp_size; -constexpr int wsize = config::warp_size; constexpr int classical_overweight = 32; @@ -100,7 +99,7 @@ using spgeam_kernels = syn::value_list; -#include "common/matrix/csr_kernels.hpp.inc" +#include "common/cuda_hip/matrix/csr_kernels.hpp.inc" namespace host_kernel { @@ -953,7 +952,7 @@ void conj_transpose(std::shared_ptr exec, orig->get_size()[1], orig->get_num_stored_elements(), orig->get_const_values(), orig->get_const_row_ptrs(), orig->get_const_col_idxs(), trans->get_values(), - trans->get_col_idxs(), trans->get_row_ptrs(), copyValues, idxBase); + trans->get_row_ptrs(), trans->get_col_idxs(), copyValues, idxBase); hipLaunchKernelGGL(conjugate_kernel, dim3(grid_size), dim3(block_size), 0, 0, trans->get_num_stored_elements(), @@ -968,49 +967,85 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *row_permuted) - GKO_NOT_IMPLEMENTED; +void inv_symm_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *permuted) +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(inv_row_ptr_permute_kernel), + count_num_blocks, default_block_size, 0, 0, num_rows, + perm, orig->get_const_row_ptrs(), + permuted->get_row_ptrs()); + components::prefix_sum(exec, permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(inv_symm_permute_kernel), + copy_num_blocks, default_block_size, 0, 0, num_rows, perm, + orig->get_const_row_ptrs(), orig->get_const_col_idxs(), + as_hip_type(orig->get_const_values()), permuted->get_row_ptrs(), + permuted->get_col_idxs(), as_hip_type(permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) - GKO_NOT_IMPLEMENTED; +void row_permute(std::shared_ptr exec, const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *row_permuted) +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(row_ptr_permute_kernel), + count_num_blocks, default_block_size, 0, 0, num_rows, + perm, orig->get_const_row_ptrs(), + row_permuted->get_row_ptrs()); + components::prefix_sum(exec, row_permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(row_permute_kernel), copy_num_blocks, + default_block_size, 0, 0, num_rows, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), as_hip_type(orig->get_const_values()), + row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(), + as_hip_type(row_permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COLUMN_PERMUTE_KERNEL); + GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, + const IndexType *perm, const matrix::Csr *orig, matrix::Csr *row_permuted) - GKO_NOT_IMPLEMENTED; +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(inv_row_ptr_permute_kernel), + count_num_blocks, default_block_size, 0, 0, num_rows, + perm, orig->get_const_row_ptrs(), + row_permuted->get_row_ptrs()); + components::prefix_sum(exec, row_permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(inv_row_permute_kernel), + copy_num_blocks, default_block_size, 0, 0, num_rows, perm, + orig->get_const_row_ptrs(), orig->get_const_col_idxs(), + as_hip_type(orig->get_const_values()), row_permuted->get_row_ptrs(), + row_permuted->get_col_idxs(), as_hip_type(row_permuted->get_values())); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); -template -void inverse_column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Csr *orig, - matrix::Csr *column_permuted) - GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL); - - template void calculate_max_nnz_per_row(std::shared_ptr exec, const matrix::Csr *source, diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index 824d2712b7f..02ff05b5b3f 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -67,10 +67,10 @@ namespace hip { namespace dense { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; -#include "common/matrix/dense_kernels.hpp.inc" +#include "common/cuda_hip/matrix/dense_kernels.hpp.inc" template @@ -121,97 +121,72 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template -void scale(std::shared_ptr exec, - const matrix::Dense *alpha, matrix::Dense *x) +void compute_dot(std::shared_ptr exec, + const matrix::Dense *x, + const matrix::Dense *y, + matrix::Dense *result) { - if (hipblas::is_supported::value && x->get_size()[1] == 1) { - hipblas::scal(exec->get_hipblas_handle(), x->get_size()[0], - alpha->get_const_values(), x->get_values(), - x->get_stride()); + if (hipblas::is_supported::value) { + // TODO: write a custom kernel which does this more efficiently + for (size_type col = 0; col < x->get_size()[1]; ++col) { + hipblas::dot(exec->get_hipblas_handle(), x->get_size()[0], + x->get_const_values() + col, x->get_stride(), + y->get_const_values() + col, y->get_stride(), + result->get_values() + col); + } } else { - // TODO: tune this parameter - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(x->get_size()[0] * x->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, - block_size / config::warp_size}; - hipLaunchKernelGGL( - HIP_KERNEL_NAME(kernel::scale), dim3(grid_dim), - dim3(block_dim), 0, 0, x->get_size()[0], x->get_size()[1], - alpha->get_size()[1], as_hip_type(alpha->get_const_values()), - as_hip_type(x->get_values()), x->get_stride()); - } -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SCALE_KERNEL); - + // TODO: these are tuning parameters obtained experimentally, once + // we decide how to handle this uniformly, they should be modified + // appropriately + constexpr int work_per_thread = 32; + constexpr int block_size = 1024; -template -void add_scaled(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *x, matrix::Dense *y) -{ - if (hipblas::is_supported::value && x->get_size()[1] == 1) { - hipblas::axpy(exec->get_hipblas_handle(), x->get_size()[0], - alpha->get_const_values(), x->get_const_values(), - x->get_stride(), y->get_values(), y->get_stride()); - } else { - // TODO: tune this parameter - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(x->get_size()[0] * x->get_size()[1], block_size); + constexpr auto work_per_block = work_per_thread * block_size; + const dim3 grid_dim = ceildiv(x->get_size()[0], work_per_block); const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - hipLaunchKernelGGL( - HIP_KERNEL_NAME(kernel::add_scaled), dim3(grid_dim), - dim3(block_dim), 0, 0, x->get_size()[0], x->get_size()[1], - alpha->get_size()[1], as_hip_type(alpha->get_const_values()), - as_hip_type(x->get_const_values()), x->get_stride(), - as_hip_type(y->get_values()), y->get_stride()); + Array work(exec, grid_dim.x); + // TODO: write a kernel which does this more efficiently + for (size_type col = 0; col < x->get_size()[1]; ++col) { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(kernel::compute_partial_dot), + dim3(grid_dim), dim3(block_dim), 0, 0, x->get_size()[0], + as_hip_type(x->get_const_values() + col), x->get_stride(), + as_hip_type(y->get_const_values() + col), y->get_stride(), + as_hip_type(work.get_data())); + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + kernel::finalize_sum_reduce_computation), + dim3(1), dim3(block_dim), 0, 0, grid_dim.x, + as_hip_type(work.get_const_data()), + as_hip_type(result->get_values() + col)); + } } } -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_KERNEL); - - -template -void add_scaled_diag(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Diagonal *x, - matrix::Dense *y) -{ - const auto size = y->get_size()[0]; - const auto grid_dim = ceildiv(size, default_block_size); - - hipLaunchKernelGGL(kernel::add_scaled_diag, grid_dim, default_block_size, 0, - 0, size, as_hip_type(alpha->get_const_values()), - as_hip_type(x->get_const_values()), - as_hip_type(y->get_values()), y->get_stride()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); template -void compute_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) +void compute_conj_dot(std::shared_ptr exec, + const matrix::Dense *x, + const matrix::Dense *y, + matrix::Dense *result) { if (hipblas::is_supported::value) { // TODO: write a custom kernel which does this more efficiently for (size_type col = 0; col < x->get_size()[1]; ++col) { - hipblas::dot(exec->get_hipblas_handle(), x->get_size()[0], - x->get_const_values() + col, x->get_stride(), - y->get_const_values() + col, y->get_stride(), - result->get_values() + col); + hipblas::conj_dot(exec->get_hipblas_handle(), x->get_size()[0], + x->get_const_values() + col, x->get_stride(), + y->get_const_values() + col, y->get_stride(), + result->get_values() + col); } } else { // TODO: these are tuning parameters obtained experimentally, once // we decide how to handle this uniformly, they should be modified // appropriately - constexpr auto work_per_thread = 32; - constexpr auto block_size = 1024; + constexpr int work_per_thread = 32; + constexpr int block_size = 1024; constexpr auto work_per_block = work_per_thread * block_size; const dim3 grid_dim = ceildiv(x->get_size()[0], work_per_block); @@ -221,13 +196,14 @@ void compute_dot(std::shared_ptr exec, // TODO: write a kernel which does this more efficiently for (size_type col = 0; col < x->get_size()[1]; ++col) { hipLaunchKernelGGL( - HIP_KERNEL_NAME(kernel::compute_partial_dot), + HIP_KERNEL_NAME(kernel::compute_partial_conj_dot), dim3(grid_dim), dim3(block_dim), 0, 0, x->get_size()[0], as_hip_type(x->get_const_values() + col), x->get_stride(), as_hip_type(y->get_const_values() + col), y->get_stride(), as_hip_type(work.get_data())); hipLaunchKernelGGL( - HIP_KERNEL_NAME(kernel::finalize_dot_computation), + HIP_KERNEL_NAME( + kernel::finalize_sum_reduce_computation), dim3(1), dim3(block_dim), 0, 0, grid_dim.x, as_hip_type(work.get_const_data()), as_hip_type(result->get_values() + col)); @@ -235,7 +211,7 @@ void compute_dot(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); template @@ -254,8 +230,8 @@ void compute_norm2(std::shared_ptr exec, // TODO: these are tuning parameters obtained experimentally, once // we decide how to handle this uniformly, they should be modified // appropriately - constexpr auto work_per_thread = 32; - constexpr auto block_size = 1024; + constexpr int work_per_thread = 32; + constexpr int block_size = 1024; constexpr auto work_per_block = work_per_thread * block_size; const dim3 grid_dim = ceildiv(x->get_size()[0], work_per_block); @@ -270,7 +246,8 @@ void compute_norm2(std::shared_ptr exec, as_hip_type(x->get_const_values() + col), x->get_stride(), as_hip_type(work.get_data())); hipLaunchKernelGGL( - HIP_KERNEL_NAME(kernel::finalize_norm2_computation), + HIP_KERNEL_NAME( + kernel::finalize_sqrt_reduce_computation), dim3(1), dim3(block_dim), 0, 0, grid_dim.x, as_hip_type(work.get_const_data()), as_hip_type(result->get_values() + col)); @@ -597,7 +574,7 @@ void transpose(std::shared_ptr exec, } }; -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TRANSPOSE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template @@ -622,111 +599,7 @@ void conj_transpose(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CONJ_TRANSPOSE_KERNEL); - - -template -void row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *row_permuted) -{ - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - hipLaunchKernelGGL( - kernel::row_permute, dim3(grid_dim), dim3(block_dim), 0, 0, - orig->get_size()[0], orig->get_size()[1], - as_hip_type(permutation_indices->get_const_data()), - as_hip_type(orig->get_const_values()), orig->get_stride(), - as_hip_type(row_permuted->get_values()), row_permuted->get_stride()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ROW_PERMUTE_KERNEL); - - -template -void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *column_permuted) -{ - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - hipLaunchKernelGGL( - kernel::column_permute, dim3(grid_dim), dim3(block_dim), 0, - 0, orig->get_size()[0], orig->get_size()[1], - as_hip_type(permutation_indices->get_const_data()), - as_hip_type(orig->get_const_values()), orig->get_stride(), - as_hip_type(column_permuted->get_values()), - column_permuted->get_stride()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_COLUMN_PERMUTE_KERNEL); - - -template -void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *row_permuted) -{ - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - hipLaunchKernelGGL( - kernel::inverse_row_permute, dim3(grid_dim), - dim3(block_dim), 0, 0, orig->get_size()[0], orig->get_size()[1], - as_hip_type(permutation_indices->get_const_data()), - as_hip_type(orig->get_const_values()), orig->get_stride(), - as_hip_type(row_permuted->get_values()), row_permuted->get_stride()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_INVERSE_ROW_PERMUTE_KERNEL); - - -template -void inverse_column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *column_permuted) -{ - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - hipLaunchKernelGGL( - kernel::inverse_column_permute, dim3(grid_dim), - dim3(block_dim), 0, 0, orig->get_size()[0], orig->get_size()[1], - as_hip_type(permutation_indices->get_const_data()), - as_hip_type(orig->get_const_values()), orig->get_stride(), - as_hip_type(column_permuted->get_values()), - column_permuted->get_stride()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_INVERSE_COLUMN_PERMUTE_KERNEL); - - -template -void extract_diagonal(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Diagonal *diag) -{ - const dim3 grid_dim = ceildiv(diag->get_size()[0], default_block_size); - hipLaunchKernelGGL(kernel::extract_diagonal, dim3(grid_dim), - dim3(default_block_size), 0, 0, orig->get_size()[0], - as_hip_type(orig->get_const_values()), - orig->get_stride(), as_hip_type(diag->get_values())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_EXTRACT_DIAGONAL_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); } // namespace dense diff --git a/hip/matrix/diagonal_kernels.hip.cpp b/hip/matrix/diagonal_kernels.hip.cpp index 3dc510c0195..a75d4aa6669 100644 --- a/hip/matrix/diagonal_kernels.hip.cpp +++ b/hip/matrix/diagonal_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -57,63 +57,10 @@ namespace hip { namespace diagonal { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; -#include "common/matrix/diagonal_kernels.hpp.inc" - - -template -void apply_to_dense(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Dense *b, - matrix::Dense *c) -{ - const auto b_size = b->get_size(); - const auto num_rows = b_size[0]; - const auto num_cols = b_size[1]; - const auto b_stride = b->get_stride(); - const auto c_stride = c->get_stride(); - const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size); - - const auto diag_values = a->get_const_values(); - const auto b_values = b->get_const_values(); - auto c_values = c->get_values(); - - hipLaunchKernelGGL(kernel::apply_to_dense, dim3(grid_dim), - dim3(default_block_size), 0, 0, num_rows, num_cols, - as_hip_type(diag_values), b_stride, - as_hip_type(b_values), c_stride, as_hip_type(c_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL); - - -template -void right_apply_to_dense(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Dense *b, - matrix::Dense *c) -{ - const auto b_size = b->get_size(); - const auto num_rows = b_size[0]; - const auto num_cols = b_size[1]; - const auto b_stride = b->get_stride(); - const auto c_stride = c->get_stride(); - const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size); - - const auto diag_values = a->get_const_values(); - const auto b_values = b->get_const_values(); - auto c_values = c->get_values(); - - hipLaunchKernelGGL(kernel::right_apply_to_dense, dim3(grid_dim), - dim3(default_block_size), 0, 0, num_rows, num_cols, - as_hip_type(diag_values), b_stride, - as_hip_type(b_values), c_stride, as_hip_type(c_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_DENSE_KERNEL); +#include "common/cuda_hip/matrix/diagonal_kernels.hpp.inc" template @@ -139,68 +86,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_DIAGONAL_APPLY_TO_CSR_KERNEL); -template -void right_apply_to_csr(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Csr *b, - matrix::Csr *c) -{ - const auto num_nnz = b->get_num_stored_elements(); - const auto diag_values = a->get_const_values(); - c->copy_from(b); - auto csr_values = c->get_values(); - const auto csr_col_idxs = c->get_const_col_idxs(); - - const auto grid_dim = ceildiv(num_nnz, default_block_size); - hipLaunchKernelGGL(kernel::right_apply_to_csr, grid_dim, default_block_size, - 0, 0, num_nnz, as_hip_type(diag_values), - as_hip_type(csr_col_idxs), as_hip_type(csr_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_CSR_KERNEL); - - -template -void convert_to_csr(std::shared_ptr exec, - const matrix::Diagonal *source, - matrix::Csr *result) -{ - const auto size = source->get_size()[0]; - const auto grid_dim = ceildiv(size, default_block_size); - - const auto diag_values = source->get_const_values(); - auto row_ptrs = result->get_row_ptrs(); - auto col_idxs = result->get_col_idxs(); - auto csr_values = result->get_values(); - - hipLaunchKernelGGL(kernel::convert_to_csr, grid_dim, default_block_size, 0, - 0, size, as_hip_type(diag_values), as_hip_type(row_ptrs), - as_hip_type(col_idxs), as_hip_type(csr_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_DIAGONAL_CONVERT_TO_CSR_KERNEL); - - -template -void conj_transpose(std::shared_ptr exec, - const matrix::Diagonal *orig, - matrix::Diagonal *trans) -{ - const auto size = orig->get_size()[0]; - const auto grid_dim = ceildiv(size, default_block_size); - const auto orig_values = orig->get_const_values(); - auto trans_values = trans->get_values(); - - hipLaunchKernelGGL(kernel::conj_transpose, grid_dim, default_block_size, 0, - 0, size, as_hip_type(orig_values), - as_hip_type(trans_values)); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_CONJ_TRANSPOSE_KERNEL); - - } // namespace diagonal } // namespace hip } // namespace kernels diff --git a/hip/matrix/ell_kernels.hip.cpp b/hip/matrix/ell_kernels.hip.cpp index 91c9a3678af..27ced862d93 100644 --- a/hip/matrix/ell_kernels.hip.cpp +++ b/hip/matrix/ell_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,6 +46,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "accessor/reduced_row_major.hpp" +#include "core/base/mixed_precision_types.hpp" #include "core/components/fill_array.hpp" #include "core/components/prefix_sum.hpp" #include "core/matrix/dense_kernels.hpp" @@ -105,21 +107,43 @@ constexpr int max_thread_per_worker = 32; using compiled_kernels = syn::value_list; -#include "common/matrix/ell_kernels.hpp.inc" +#include "common/cuda_hip/matrix/ell_kernels.hpp.inc" namespace { -template +template +GKO_INLINE auto as_hip_accessor( + const acc::range> &acc) +{ + return acc::range< + acc::reduced_row_major, hip_type>>( + acc.get_accessor().get_size(), + as_hip_type(acc.get_accessor().get_stored_data()), + acc.get_accessor().get_stride()); +} + + +template void abstract_spmv(syn::value_list, int num_worker_per_row, - const matrix::Ell *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Ell *a, + const matrix::Dense *b, + matrix::Dense *c, + const matrix::Dense *alpha = nullptr, + const matrix::Dense *beta = nullptr) { + using a_accessor = + gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>; + using b_accessor = + gko::acc::reduced_row_major<2, OutputValueType, const InputValueType>; + const auto nrows = a->get_size()[0]; + const auto stride = a->get_stride(); + const auto num_stored_elements_per_row = + a->get_num_stored_elements_per_row(); + constexpr int num_thread_per_worker = (info == 0) ? max_thread_per_worker : info; constexpr bool atomic = (info == 0); @@ -127,24 +151,31 @@ void abstract_spmv(syn::value_list, int num_worker_per_row, num_thread_per_worker, 1); const dim3 grid_size(ceildiv(nrows * num_worker_per_row, block_size.x), b->get_size()[1], 1); + + const auto a_vals = gko::acc::range( + std::array{{num_stored_elements_per_row * stride}}, + a->get_const_values()); + const auto b_vals = gko::acc::range( + std::array{{b->get_size()[0], b->get_size()[1]}}, + b->get_const_values(), std::array{{b->get_stride()}}); + if (alpha == nullptr && beta == nullptr) { hipLaunchKernelGGL( HIP_KERNEL_NAME(kernel::spmv), dim3(grid_size), dim3(block_size), 0, 0, nrows, num_worker_per_row, - as_hip_type(a->get_const_values()), a->get_const_col_idxs(), - a->get_stride(), a->get_num_stored_elements_per_row(), - as_hip_type(b->get_const_values()), b->get_stride(), + as_hip_accessor(a_vals), a->get_const_col_idxs(), stride, + num_stored_elements_per_row, as_hip_accessor(b_vals), as_hip_type(c->get_values()), c->get_stride()); } else if (alpha != nullptr && beta != nullptr) { + const auto alpha_val = gko::acc::range( + std::array{1}, alpha->get_const_values()); hipLaunchKernelGGL( HIP_KERNEL_NAME(kernel::spmv), dim3(grid_size), dim3(block_size), 0, 0, nrows, num_worker_per_row, - as_hip_type(alpha->get_const_values()), - as_hip_type(a->get_const_values()), a->get_const_col_idxs(), - a->get_stride(), a->get_num_stored_elements_per_row(), - as_hip_type(b->get_const_values()), b->get_stride(), - as_hip_type(beta->get_const_values()), as_hip_type(c->get_values()), - c->get_stride()); + as_hip_accessor(alpha_val), as_hip_accessor(a_vals), + a->get_const_col_idxs(), stride, num_stored_elements_per_row, + as_hip_accessor(b_vals), as_hip_type(beta->get_const_values()), + as_hip_type(c->get_values()), c->get_stride()); } else { GKO_KERNEL_NOT_FOUND; } @@ -197,10 +228,12 @@ std::array compute_thread_worker_and_atomicity( } // namespace -template +template void spmv(std::shared_ptr exec, - const matrix::Ell *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Ell *a, + const matrix::Dense *b, + matrix::Dense *c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -215,7 +248,8 @@ void spmv(std::shared_ptr exec, const int info = (!atomic) * num_thread_per_worker; if (atomic) { components::fill_array(exec, c->get_values(), - c->get_num_stored_elements(), zero()); + c->get_num_stored_elements(), + zero()); } select_abstract_spmv( compiled_kernels(), @@ -224,16 +258,18 @@ void spmv(std::shared_ptr exec, c); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_ELL_SPMV_KERNEL); +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ELL_SPMV_KERNEL); -template +template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Ell *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense *alpha, + const matrix::Ell *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -256,7 +292,7 @@ void advanced_spmv(std::shared_ptr exec, alpha, beta); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL); diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp new file mode 100644 index 00000000000..13ca94ad5f7 --- /dev/null +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -0,0 +1,173 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/fbcsr_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include +#include +#include + + +#include "hip/base/config.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The fixed-size block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +template +void spmv(std::shared_ptr exec, + const matrix::Fbcsr *a, + const matrix::Dense *b, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); + + +template +void advanced_spmv(std::shared_ptr exec, + const matrix::Dense *alpha, + const matrix::Fbcsr *a, + const matrix::Dense *b, + const matrix::Dense *beta, + matrix::Dense *c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); + + +template +void convert_to_dense(std::shared_ptr exec, + const matrix::Fbcsr *source, + matrix::Dense *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); + + +template +void convert_to_csr(const std::shared_ptr exec, + const matrix::Fbcsr *const source, + matrix::Csr *const result) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); + + +template +void transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); + + +template +void conj_transpose(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Fbcsr *trans) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); + + +template +void calculate_max_nnz_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + size_type *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); + + +template +void calculate_nonzeros_per_row( + std::shared_ptr exec, + const matrix::Fbcsr *source, + Array *result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); + + +template +void is_sorted_by_column_index( + std::shared_ptr exec, + const matrix::Fbcsr *to_check, + bool *is_sorted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); + + +template +void sort_by_column_index(const std::shared_ptr exec, + matrix::Fbcsr *const to_sort) + GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); + + +template +void extract_diagonal(std::shared_ptr exec, + const matrix::Fbcsr *orig, + matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); + + +} // namespace fbcsr +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/matrix/hybrid_kernels.hip.cpp b/hip/matrix/hybrid_kernels.hip.cpp index e9efb0eb8ee..1d7f46ea743 100644 --- a/hip/matrix/hybrid_kernels.hip.cpp +++ b/hip/matrix/hybrid_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -69,7 +69,7 @@ constexpr int default_block_size = 512; constexpr int warps_in_block = 4; -#include "common/matrix/hybrid_kernels.hpp.inc" +#include "common/cuda_hip/matrix/hybrid_kernels.hpp.inc" template diff --git a/hip/matrix/sellp_kernels.hip.cpp b/hip/matrix/sellp_kernels.hip.cpp index 7d0a6ad353b..52f4f38e798 100644 --- a/hip/matrix/sellp_kernels.hip.cpp +++ b/hip/matrix/sellp_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -62,10 +62,10 @@ namespace hip { namespace sellp { -constexpr auto default_block_size = 512; +constexpr int default_block_size = 512; -#include "common/matrix/sellp_kernels.hpp.inc" +#include "common/cuda_hip/matrix/sellp_kernels.hpp.inc" template diff --git a/hip/matrix/sparsity_csr_kernels.hip.cpp b/hip/matrix/sparsity_csr_kernels.hip.cpp index 8ab3066f1ff..6747806eca4 100644 --- a/hip/matrix/sparsity_csr_kernels.hip.cpp +++ b/hip/matrix/sparsity_csr_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/multigrid/amgx_pgm_kernels.hip.cpp b/hip/multigrid/amgx_pgm_kernels.hip.cpp new file mode 100644 index 00000000000..ddc9dd90333 --- /dev/null +++ b/hip/multigrid/amgx_pgm_kernels.hip.cpp @@ -0,0 +1,189 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/multigrid/amgx_pgm_kernels.hpp" + + +#include + + +#include +#include +#include + + +#include +#include +#include + + +#include "core/components/fill_array.hpp" +#include "core/components/prefix_sum.hpp" +#include "core/matrix/csr_builder.hpp" +#include "core/matrix/csr_kernels.hpp" +#include "hip/base/hipsparse_bindings.hip.hpp" +#include "hip/base/math.hip.hpp" +#include "hip/base/types.hip.hpp" +#include "hip/components/atomic.hip.hpp" +#include "hip/components/reduction.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The AMGX_PGM solver namespace. + * + * @ingroup amgx_pgm + */ +namespace amgx_pgm { + + +constexpr int default_block_size = 512; + + +#include "common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc" + + +template +void match_edge(std::shared_ptr exec, + const Array &strongest_neighbor, + Array &agg) +{ + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + hipLaunchKernelGGL(kernel::match_edge_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + strongest_neighbor.get_const_data(), agg.get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); + + +template +void count_unagg(std::shared_ptr exec, + const Array &agg, IndexType *num_unagg) +{ + Array active_agg(exec, agg.get_num_elems()); + const dim3 grid(ceildiv(active_agg.get_num_elems(), default_block_size)); + hipLaunchKernelGGL(kernel::activate_kernel, dim3(grid), + dim3(default_block_size), 0, 0, + active_agg.get_num_elems(), agg.get_const_data(), + active_agg.get_data()); + *num_unagg = reduce_add_array(exec, active_agg.get_num_elems(), + active_agg.get_const_data()); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); + + +template +void renumber(std::shared_ptr exec, Array &agg, + IndexType *num_agg) +{ + const auto num = agg.get_num_elems(); + Array agg_map(exec, num + 1); + const dim3 grid(ceildiv(num, default_block_size)); + hipLaunchKernelGGL(kernel::fill_agg_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + agg.get_const_data(), agg_map.get_data()); + components::prefix_sum(exec, agg_map.get_data(), agg_map.get_num_elems()); + hipLaunchKernelGGL(kernel::renumber_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + agg_map.get_const_data(), agg.get_data()); + *num_agg = exec->copy_val_to_host(agg_map.get_const_data() + num); +} + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); + + +template +void find_strongest_neighbor( + std::shared_ptr exec, + const matrix::Csr *weight_mtx, + const matrix::Diagonal *diag, Array &agg, + Array &strongest_neighbor) +{ + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + hipLaunchKernelGGL(kernel::find_strongest_neighbor_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), + weight_mtx->get_const_values(), diag->get_const_values(), + agg.get_data(), strongest_neighbor.get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR); + + +template +void assign_to_exist_agg(std::shared_ptr exec, + const matrix::Csr *weight_mtx, + const matrix::Diagonal *diag, + Array &agg, + Array &intermediate_agg) +{ + const auto num = agg.get_num_elems(); + const dim3 grid(ceildiv(num, default_block_size)); + + if (intermediate_agg.get_num_elems() > 0) { + // determinstic kernel + hipLaunchKernelGGL( + kernel::assign_to_exist_agg_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + weight_mtx->get_const_row_ptrs(), weight_mtx->get_const_col_idxs(), + weight_mtx->get_const_values(), diag->get_const_values(), + agg.get_const_data(), intermediate_agg.get_data()); + // Copy the intermediate_agg to agg + agg = intermediate_agg; + } else { + // undeterminstic kernel + hipLaunchKernelGGL(kernel::assign_to_exist_agg_kernel, dim3(grid), + dim3(default_block_size), 0, 0, num, + weight_mtx->get_const_row_ptrs(), + weight_mtx->get_const_col_idxs(), + weight_mtx->get_const_values(), + diag->get_const_values(), agg.get_data()); + } +} + +GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); + + +} // namespace amgx_pgm +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/preconditioner/isai_kernels.hip.cpp b/hip/preconditioner/isai_kernels.hip.cpp index f2289eba530..3a96cacfecb 100644 --- a/hip/preconditioner/isai_kernels.hip.cpp +++ b/hip/preconditioner/isai_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -69,7 +69,9 @@ constexpr int subwarps_per_block{2}; constexpr int default_block_size{subwarps_per_block * subwarp_size}; -#include "common/preconditioner/isai_kernels.hpp.inc" +#include "common/cuda_hip/components/atomic.hpp.inc" +#include "common/cuda_hip/components/warp_blas.hpp.inc" +#include "common/cuda_hip/preconditioner/isai_kernels.hpp.inc" template @@ -110,6 +112,33 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ISAI_GENERATE_TRI_INVERSE_KERNEL); +template +void generate_general_inverse(std::shared_ptr exec, + const matrix::Csr *input, + matrix::Csr *inverse, + IndexType *excess_rhs_ptrs, + IndexType *excess_nz_ptrs, bool spd) +{ + const auto num_rows = input->get_size()[0]; + + const dim3 block(default_block_size, 1, 1); + const dim3 grid(ceildiv(num_rows, block.x / subwarp_size), 1, 1); + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + kernel::generate_general_inverse), + grid, block, 0, 0, static_cast(num_rows), + input->get_const_row_ptrs(), input->get_const_col_idxs(), + as_hip_type(input->get_const_values()), inverse->get_row_ptrs(), + inverse->get_col_idxs(), as_hip_type(inverse->get_values()), + excess_rhs_ptrs, excess_nz_ptrs, spd); + components::prefix_sum(exec, excess_rhs_ptrs, num_rows + 1); + components::prefix_sum(exec, excess_nz_ptrs, num_rows + 1); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_GENERATE_GENERAL_INVERSE_KERNEL); + + template void generate_excess_system(std::shared_ptr exec, const matrix::Csr *input, @@ -117,12 +146,13 @@ void generate_excess_system(std::shared_ptr exec, const IndexType *excess_rhs_ptrs, const IndexType *excess_nz_ptrs, matrix::Csr *excess_system, - matrix::Dense *excess_rhs) + matrix::Dense *excess_rhs, + size_type e_start, size_type e_end) { const auto num_rows = input->get_size()[0]; const dim3 block(default_block_size, 1, 1); - const dim3 grid(ceildiv(num_rows, block.x / subwarp_size), 1, 1); + const dim3 grid(ceildiv(e_end - e_start, block.x / subwarp_size), 1, 1); hipLaunchKernelGGL( HIP_KERNEL_NAME(kernel::generate_excess_system), grid, block, 0, 0, static_cast(num_rows), @@ -131,29 +161,48 @@ void generate_excess_system(std::shared_ptr exec, inverse->get_const_col_idxs(), excess_rhs_ptrs, excess_nz_ptrs, excess_system->get_row_ptrs(), excess_system->get_col_idxs(), as_hip_type(excess_system->get_values()), - as_hip_type(excess_rhs->get_values())); + as_hip_type(excess_rhs->get_values()), e_start, e_end); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ISAI_GENERATE_EXCESS_SYSTEM_KERNEL); +template +void scale_excess_solution(std::shared_ptr, + const IndexType *excess_block_ptrs, + matrix::Dense *excess_solution, + size_type e_start, size_type e_end) +{ + const dim3 block(default_block_size, 1, 1); + const dim3 grid(ceildiv(e_end - e_start, block.x / subwarp_size), 1, 1); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(kernel::scale_excess_solution), grid, + block, 0, 0, excess_block_ptrs, + as_hip_type(excess_solution->get_values()), e_start, e_end); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL); + + template void scatter_excess_solution(std::shared_ptr exec, const IndexType *excess_rhs_ptrs, const matrix::Dense *excess_solution, - matrix::Csr *inverse) + matrix::Csr *inverse, + size_type e_start, size_type e_end) { const auto num_rows = inverse->get_size()[0]; const dim3 block(default_block_size, 1, 1); - const dim3 grid(ceildiv(num_rows, block.x / subwarp_size), 1, 1); + const dim3 grid(ceildiv(e_end - e_start, block.x / subwarp_size), 1, 1); hipLaunchKernelGGL( HIP_KERNEL_NAME(kernel::copy_excess_solution), grid, block, 0, 0, static_cast(num_rows), inverse->get_const_row_ptrs(), excess_rhs_ptrs, as_hip_type(excess_solution->get_const_values()), - as_hip_type(inverse->get_values())); + as_hip_type(inverse->get_values()), e_start, e_end); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/hip/preconditioner/jacobi_advanced_apply_kernel.hip.cpp b/hip/preconditioner/jacobi_advanced_apply_kernel.hip.cpp index d7d3e87970c..6ac6717447a 100644 --- a/hip/preconditioner/jacobi_advanced_apply_kernel.hip.cpp +++ b/hip/preconditioner/jacobi_advanced_apply_kernel.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -63,7 +63,7 @@ namespace hip { namespace jacobi { -#include "common/preconditioner/jacobi_advanced_apply_kernel.hpp.inc" +#include "common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc" namespace { diff --git a/hip/preconditioner/jacobi_common.hip.hpp b/hip/preconditioner/jacobi_common.hip.hpp index d81dd3f9e97..2c94e7ce3b4 100644 --- a/hip/preconditioner/jacobi_common.hip.hpp +++ b/hip/preconditioner/jacobi_common.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/preconditioner/jacobi_generate_kernel.hip.cpp b/hip/preconditioner/jacobi_generate_kernel.hip.cpp index 6f8def4af6e..6f0e4789f3e 100644 --- a/hip/preconditioner/jacobi_generate_kernel.hip.cpp +++ b/hip/preconditioner/jacobi_generate_kernel.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -66,7 +66,7 @@ namespace hip { namespace jacobi { -#include "common/preconditioner/jacobi_generate_kernel.hpp.inc" +#include "common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc" namespace { diff --git a/hip/preconditioner/jacobi_kernels.hip.cpp b/hip/preconditioner/jacobi_kernels.hip.cpp index b2d249f12b7..40e3aff69c5 100644 --- a/hip/preconditioner/jacobi_kernels.hip.cpp +++ b/hip/preconditioner/jacobi_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -73,7 +73,7 @@ constexpr int default_num_warps = 32; constexpr int default_grid_size = 32 * 32 * 128; -#include "common/preconditioner/jacobi_kernels.hpp.inc" +#include "common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc" template diff --git a/hip/preconditioner/jacobi_simple_apply_kernel.hip.cpp b/hip/preconditioner/jacobi_simple_apply_kernel.hip.cpp index 0c2fefb1afc..0adbedc7473 100644 --- a/hip/preconditioner/jacobi_simple_apply_kernel.hip.cpp +++ b/hip/preconditioner/jacobi_simple_apply_kernel.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -63,7 +63,7 @@ namespace hip { namespace jacobi { -#include "common/preconditioner/jacobi_simple_apply_kernel.hpp.inc" +#include "common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc" namespace { diff --git a/hip/reorder/rcm_kernels.hip.cpp b/hip/reorder/rcm_kernels.hip.cpp new file mode 100644 index 00000000000..2f5f8c32ef3 --- /dev/null +++ b/hip/reorder/rcm_kernels.hip.cpp @@ -0,0 +1,83 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/reorder/rcm_kernels.hpp" + + +#include +#include +#include +#include +#include +#include + + +#include "hip/base/math.hip.hpp" +#include "hip/base/types.hip.hpp" +#include "hip/components/prefix_sum.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The reordering namespace. + * + * @ingroup reorder + */ +namespace rcm { + + +template +void get_degree_of_nodes(std::shared_ptr exec, + const IndexType num_vertices, + const IndexType *const row_ptrs, + IndexType *const degrees) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); + + +template +void get_permutation( + std::shared_ptr exec, const IndexType num_vertices, + const IndexType *const row_ptrs, const IndexType *const col_idxs, + const IndexType *const degrees, IndexType *const permutation, + IndexType *const inv_permutation, + const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); + + +} // namespace rcm +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/solver/bicg_kernels.hip.cpp b/hip/solver/bicg_kernels.hip.cpp deleted file mode 100644 index e773520b258..00000000000 --- a/hip/solver/bicg_kernels.hip.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include "core/solver/bicg_kernels.hpp" - - -#include - - -#include -#include - - -#include "hip/base/math.hip.hpp" -#include "hip/base/types.hip.hpp" -#include "hip/components/thread_ids.hip.hpp" - - -namespace gko { -namespace kernels { -namespace hip { -/** - * @brief The BICG solver namespace. - * - * @ingroup bicg - */ -namespace bicg { - - -constexpr int default_block_size = 512; - - -#include "common/solver/bicg_kernels.hpp.inc" - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *r2, - matrix::Dense *z2, matrix::Dense *p2, - matrix::Dense *q2, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(b->get_size()[0] * b->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - initialize_kernel, dim3(grid_size), dim3(block_size), 0, 0, - b->get_size()[0], b->get_size()[1], b->get_stride(), - as_hip_type(b->get_const_values()), as_hip_type(r->get_values()), - as_hip_type(z->get_values()), as_hip_type(p->get_values()), - as_hip_type(q->get_values()), as_hip_type(r2->get_values()), - as_hip_type(z2->get_values()), as_hip_type(p2->get_values()), - as_hip_type(q2->get_values()), as_hip_type(prev_rho->get_values()), - as_hip_type(rho->get_values()), as_hip_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_INITIALIZE_KERNEL); - - -template -void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - matrix::Dense *p2, const matrix::Dense *z2, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - step_1_kernel, dim3(grid_size), dim3(block_size), 0, 0, - p->get_size()[0], p->get_size()[1], p->get_stride(), - as_hip_type(p->get_values()), as_hip_type(z->get_const_values()), - as_hip_type(p2->get_values()), as_hip_type(z2->get_const_values()), - as_hip_type(rho->get_const_values()), - as_hip_type(prev_rho->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_1_KERNEL); - - -template -void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - matrix::Dense *r2, const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *q2, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - step_2_kernel, dim3(grid_size), dim3(block_size), 0, 0, - p->get_size()[0], p->get_size()[1], p->get_stride(), x->get_stride(), - as_hip_type(x->get_values()), as_hip_type(r->get_values()), - as_hip_type(r2->get_values()), as_hip_type(p->get_const_values()), - as_hip_type(q->get_const_values()), as_hip_type(q2->get_const_values()), - as_hip_type(beta->get_const_values()), - as_hip_type(rho->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_2_KERNEL); - - -} // namespace bicg -} // namespace hip -} // namespace kernels -} // namespace gko diff --git a/hip/solver/bicgstab_kernels.hip.cpp b/hip/solver/bicgstab_kernels.hip.cpp deleted file mode 100644 index a8776876f6f..00000000000 --- a/hip/solver/bicgstab_kernels.hip.cpp +++ /dev/null @@ -1,205 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include "core/solver/bicgstab_kernels.hpp" - - -#include - - -#include -#include - - -#include "hip/base/math.hip.hpp" -#include "hip/base/types.hip.hpp" -#include "hip/components/thread_ids.hip.hpp" - - -namespace gko { -namespace kernels { -namespace hip { -/** - * @brief The BICGSTAB solver namespace. - * - * @ingroup bicgstab - */ -namespace bicgstab { - - -constexpr int default_block_size = 512; - - -#include "common/solver/bicgstab_kernels.hpp.inc" - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *rr, matrix::Dense *y, - matrix::Dense *s, matrix::Dense *t, - matrix::Dense *z, matrix::Dense *v, - matrix::Dense *p, matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *alpha, - matrix::Dense *beta, matrix::Dense *gamma, - matrix::Dense *omega, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(b->get_size()[0] * b->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - initialize_kernel, dim3(grid_size), dim3(block_size), 0, 0, - b->get_size()[0], b->get_size()[1], b->get_stride(), - as_hip_type(b->get_const_values()), as_hip_type(r->get_values()), - as_hip_type(rr->get_values()), as_hip_type(y->get_values()), - as_hip_type(s->get_values()), as_hip_type(t->get_values()), - as_hip_type(z->get_values()), as_hip_type(v->get_values()), - as_hip_type(p->get_values()), as_hip_type(prev_rho->get_values()), - as_hip_type(rho->get_values()), as_hip_type(alpha->get_values()), - as_hip_type(beta->get_values()), as_hip_type(gamma->get_values()), - as_hip_type(omega->get_values()), as_hip_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_INITIALIZE_KERNEL); - - -template -void step_1(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *p, - const matrix::Dense *v, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const matrix::Dense *alpha, - const matrix::Dense *omega, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(r->get_size()[0] * r->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL(step_1_kernel, dim3(grid_size), dim3(block_size), 0, 0, - r->get_size()[0], r->get_size()[1], r->get_stride(), - as_hip_type(r->get_const_values()), - as_hip_type(p->get_values()), - as_hip_type(v->get_const_values()), - as_hip_type(rho->get_const_values()), - as_hip_type(prev_rho->get_const_values()), - as_hip_type(alpha->get_const_values()), - as_hip_type(omega->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_1_KERNEL); - - -template -void step_2(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *s, - const matrix::Dense *v, - const matrix::Dense *rho, - matrix::Dense *alpha, - const matrix::Dense *beta, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(r->get_size()[0] * r->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - step_2_kernel, dim3(grid_size), dim3(block_size), 0, 0, - r->get_size()[0], r->get_size()[1], r->get_stride(), - as_hip_type(r->get_const_values()), as_hip_type(s->get_values()), - as_hip_type(v->get_const_values()), - as_hip_type(rho->get_const_values()), as_hip_type(alpha->get_values()), - as_hip_type(beta->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_2_KERNEL); - - -template -void step_3( - std::shared_ptr exec, matrix::Dense *x, - matrix::Dense *r, const matrix::Dense *s, - const matrix::Dense *t, const matrix::Dense *y, - const matrix::Dense *z, const matrix::Dense *alpha, - const matrix::Dense *beta, const matrix::Dense *gamma, - matrix::Dense *omega, const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(r->get_size()[0] * r->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - step_3_kernel, dim3(grid_size), dim3(block_size), 0, 0, - r->get_size()[0], r->get_size()[1], r->get_stride(), x->get_stride(), - as_hip_type(x->get_values()), as_hip_type(r->get_values()), - as_hip_type(s->get_const_values()), as_hip_type(t->get_const_values()), - as_hip_type(y->get_const_values()), as_hip_type(z->get_const_values()), - as_hip_type(alpha->get_const_values()), - as_hip_type(beta->get_const_values()), - as_hip_type(gamma->get_const_values()), - as_hip_type(omega->get_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_3_KERNEL); - - -template -void finalize(std::shared_ptr exec, - matrix::Dense *x, const matrix::Dense *y, - const matrix::Dense *alpha, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(y->get_size()[0] * y->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL(finalize_kernel, dim3(grid_size), dim3(block_size), 0, 0, - y->get_size()[0], y->get_size()[1], y->get_stride(), - x->get_stride(), as_hip_type(x->get_values()), - as_hip_type(y->get_const_values()), - as_hip_type(alpha->get_const_values()), - as_hip_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_FINALIZE_KERNEL); - - -} // namespace bicgstab -} // namespace hip -} // namespace kernels -} // namespace gko diff --git a/hip/solver/cb_gmres_kernels.hip.cpp b/hip/solver/cb_gmres_kernels.hip.cpp new file mode 100644 index 00000000000..23bf0546cd6 --- /dev/null +++ b/hip/solver/cb_gmres_kernels.hip.cpp @@ -0,0 +1,546 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/cb_gmres_kernels.hpp" + + +#include + + +#include +#include +#include +#include + + +#include "accessor/range.hpp" +#include "accessor/reduced_row_major.hpp" +#include "accessor/scaled_reduced_row_major.hpp" +#include "core/components/fill_array.hpp" +#include "core/matrix/dense_kernels.hpp" +#include "core/solver/cb_gmres_accessor.hpp" +#include "hip/base/config.hip.hpp" +#include "hip/base/math.hip.hpp" +#include "hip/base/types.hip.hpp" +#include "hip/components/atomic.hip.hpp" +#include "hip/components/cooperative_groups.hip.hpp" +#include "hip/components/reduction.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" +#include "hip/components/uninitialized_array.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The CB_GMRES solver namespace. + * + * @ingroup cb_gmres + */ +namespace cb_gmres { + + +constexpr int default_block_size = 512; +// default_dot_dim can not be 64 in hip because 64 * 64 exceeds their max block +// size limit. +constexpr int default_dot_dim = 32; +constexpr int default_dot_size = default_dot_dim * default_dot_dim; + + +#include "common/cuda_hip/solver/cb_gmres_kernels.hpp.inc" + + +// Specialization, so the Accessor can use the same function as regular pointers +template +GKO_INLINE auto as_hip_accessor( + const acc::range> &acc) +{ + return acc::range< + acc::reduced_row_major, hip_type>>( + acc.get_accessor().get_size(), + as_hip_type(acc.get_accessor().get_stored_data()), + acc.get_accessor().get_stride()); +} + +template +GKO_INLINE auto as_hip_accessor( + const acc::range> + &acc) +{ + return acc::range, + hip_type, mask>>( + acc.get_accessor().get_size(), + as_hip_type(acc.get_accessor().get_stored_data()), + acc.get_accessor().get_storage_stride(), + as_hip_type(acc.get_accessor().get_scalar()), + acc.get_accessor().get_scalar_stride()); +} + + +template +void zero_matrix(size_type m, size_type n, size_type stride, ValueType *array) +{ + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); + hipLaunchKernelGGL(zero_matrix_kernel, grid_size, block_size, 0, 0, m, n, + stride, as_hip_type(array)); +} + + +template +void initialize_1(std::shared_ptr exec, + const matrix::Dense *b, + matrix::Dense *residual, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + Array *stop_status, size_type krylov_dim) +{ + const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), + krylov_dim * b->get_size()[1]); + const dim3 grid_dim(ceildiv(num_threads, default_block_size), 1, 1); + const dim3 block_dim(default_block_size, 1, 1); + constexpr auto block_size = default_block_size; + + hipLaunchKernelGGL( + initialize_1_kernel, grid_dim, block_dim, 0, 0, + b->get_size()[0], b->get_size()[1], krylov_dim, + as_hip_type(b->get_const_values()), b->get_stride(), + as_hip_type(residual->get_values()), residual->get_stride(), + as_hip_type(givens_sin->get_values()), givens_sin->get_stride(), + as_hip_type(givens_cos->get_values()), givens_cos->get_stride(), + as_hip_type(stop_status->get_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL); + + +template +void initialize_2(std::shared_ptr exec, + const matrix::Dense *residual, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + matrix::Dense> *arnoldi_norm, + Accessor3d krylov_bases, + matrix::Dense *next_krylov_basis, + Array *final_iter_nums, size_type krylov_dim) +{ + constexpr bool use_scalar = + gko::cb_gmres::detail::has_3d_scaled_accessor::value; + const auto num_rows = residual->get_size()[0]; + const auto num_rhs = residual->get_size()[1]; + const auto krylov_stride = + gko::cb_gmres::helper_functions_accessor::get_stride( + krylov_bases); + const dim3 grid_dim_1( + ceildiv((krylov_dim + 1) * krylov_stride[0], default_block_size), 1, 1); + const dim3 block_dim(default_block_size, 1, 1); + constexpr auto block_size = default_block_size; + const auto stride_arnoldi = arnoldi_norm->get_stride(); + + hipLaunchKernelGGL(initialize_2_1_kernel, grid_dim_1, block_dim, + 0, 0, residual->get_size()[0], residual->get_size()[1], + krylov_dim, as_hip_accessor(krylov_bases), + as_hip_type(residual_norm_collection->get_values()), + residual_norm_collection->get_stride()); + kernels::hip::dense::compute_norm2(exec, residual, residual_norm); + + if (use_scalar) { + components::fill_array(exec, + arnoldi_norm->get_values() + 2 * stride_arnoldi, + num_rhs, zero>()); + const dim3 grid_size_nrm(ceildiv(num_rhs, default_dot_dim), + exec->get_num_multiprocessor() * 2); + const dim3 block_size_nrm(default_dot_dim, default_dot_dim); + hipLaunchKernelGGL( + multinorminf_without_stop_kernel, grid_size_nrm, block_size_nrm, 0, + 0, num_rows, num_rhs, as_hip_type(residual->get_const_values()), + residual->get_stride(), + as_hip_type(arnoldi_norm->get_values() + 2 * stride_arnoldi), 0); + } + + if (gko::cb_gmres::detail::has_3d_scaled_accessor::value) { + hipLaunchKernelGGL( + set_scalar_kernel, + dim3(ceildiv(num_rhs * (krylov_dim + 1), default_block_size)), + dim3(default_block_size), 0, 0, num_rhs, krylov_dim + 1, + as_hip_type(residual_norm->get_const_values()), + residual_norm->get_stride(), + as_hip_type(arnoldi_norm->get_const_values() + 2 * stride_arnoldi), + stride_arnoldi, as_hip_accessor(krylov_bases)); + } + + const dim3 grid_dim_2( + ceildiv(num_rows * krylov_stride[1], default_block_size), 1, 1); + hipLaunchKernelGGL(initialize_2_2_kernel, grid_dim_2, block_dim, + 0, 0, residual->get_size()[0], residual->get_size()[1], + as_hip_type(residual->get_const_values()), + residual->get_stride(), + as_hip_type(residual_norm->get_const_values()), + as_hip_type(residual_norm_collection->get_values()), + as_hip_accessor(krylov_bases), + as_hip_type(next_krylov_basis->get_values()), + next_krylov_basis->get_stride(), + as_hip_type(final_iter_nums->get_data())); +} + +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE( + GKO_DECLARE_CB_GMRES_INITIALIZE_2_KERNEL); + + +template +void finish_arnoldi_CGS(std::shared_ptr exec, + matrix::Dense *next_krylov_basis, + Accessor3dim krylov_bases, + matrix::Dense *hessenberg_iter, + matrix::Dense *buffer_iter, + matrix::Dense> *arnoldi_norm, + size_type iter, const stopping_status *stop_status, + stopping_status *reorth_status, + Array *num_reorth) +{ + using non_complex = remove_complex; + // optimization parameter + constexpr int singledot_block_size = default_dot_dim; + constexpr bool use_scalar = + gko::cb_gmres::detail::has_3d_scaled_accessor::value; + const auto stride_next_krylov = next_krylov_basis->get_stride(); + const auto stride_hessenberg = hessenberg_iter->get_stride(); + const auto stride_buffer = buffer_iter->get_stride(); + const auto stride_arnoldi = arnoldi_norm->get_stride(); + const auto dim_size = next_krylov_basis->get_size(); + const dim3 grid_size(ceildiv(dim_size[1], default_dot_dim), + exec->get_num_multiprocessor() * 2); + const dim3 grid_size_num_iters(ceildiv(dim_size[1], default_dot_dim), + exec->get_num_multiprocessor() * 2, + iter + 1); + const dim3 block_size(default_dot_dim, default_dot_dim); + // Note: having iter first (instead of row_idx information) is likely + // beneficial for avoiding atomic_add conflicts, but that needs + // further investigation. + const dim3 grid_size_iters_single(exec->get_num_multiprocessor() * 2, + iter + 1); + const dim3 block_size_iters_single(singledot_block_size); + size_type num_reorth_host; + + components::fill_array(exec, arnoldi_norm->get_values(), dim_size[1], + zero()); + hipLaunchKernelGGL( + multinorm2_kernel, grid_size, block_size, 0, 0, dim_size[0], + dim_size[1], as_hip_type(next_krylov_basis->get_const_values()), + stride_next_krylov, as_hip_type(arnoldi_norm->get_values()), + as_hip_type(stop_status)); + // nrmP = norm(next_krylov_basis + zero_matrix(iter + 1, dim_size[1], stride_hessenberg, + hessenberg_iter->get_values()); + if (dim_size[1] > 1) { + hipLaunchKernelGGL(multidot_kernel, + grid_size_num_iters, block_size, 0, 0, dim_size[0], + dim_size[1], + as_hip_type(next_krylov_basis->get_const_values()), + stride_next_krylov, as_hip_accessor(krylov_bases), + as_hip_type(hessenberg_iter->get_values()), + stride_hessenberg, as_hip_type(stop_status)); + } else { + hipLaunchKernelGGL(singledot_kernel, + grid_size_iters_single, block_size_iters_single, 0, + 0, dim_size[0], + as_hip_type(next_krylov_basis->get_const_values()), + stride_next_krylov, as_hip_accessor(krylov_bases), + as_hip_type(hessenberg_iter->get_values()), + stride_hessenberg, as_hip_type(stop_status)); + } + // for i in 1:iter + // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) + // end + hipLaunchKernelGGL( + update_next_krylov_kernel, + dim3(ceildiv(dim_size[0] * stride_next_krylov, default_block_size)), + dim3(default_block_size), 0, 0, iter + 1, dim_size[0], dim_size[1], + as_hip_type(next_krylov_basis->get_values()), stride_next_krylov, + as_hip_accessor(krylov_bases), + as_hip_type(hessenberg_iter->get_const_values()), stride_hessenberg, + as_hip_type(stop_status)); + + // for i in 1:iter + // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) + // end + components::fill_array(exec, arnoldi_norm->get_values() + stride_arnoldi, + dim_size[1], zero()); + if (use_scalar) { + components::fill_array(exec, + arnoldi_norm->get_values() + 2 * stride_arnoldi, + dim_size[1], zero()); + } + hipLaunchKernelGGL( + multinorm2_inf_kernel, grid_size, block_size, 0, 0, + dim_size[0], dim_size[1], + as_hip_type(next_krylov_basis->get_const_values()), stride_next_krylov, + as_hip_type(arnoldi_norm->get_values() + stride_arnoldi), + as_hip_type(arnoldi_norm->get_values() + 2 * stride_arnoldi), + as_hip_type(stop_status)); + // nrmN = norm(next_krylov_basis) + components::fill_array(exec, num_reorth->get_data(), 1, zero()); + hipLaunchKernelGGL( + check_arnoldi_norms, + dim3(ceildiv(dim_size[1], default_block_size)), + dim3(default_block_size), 0, 0, dim_size[1], + as_hip_type(arnoldi_norm->get_values()), stride_arnoldi, + as_hip_type(hessenberg_iter->get_values()), stride_hessenberg, iter + 1, + as_hip_accessor(krylov_bases), as_hip_type(stop_status), + as_hip_type(reorth_status), as_hip_type(num_reorth->get_data())); + num_reorth_host = exec->copy_val_to_host(num_reorth->get_const_data()); + // num_reorth_host := number of next_krylov vector to be reorthogonalization + for (size_type l = 1; (num_reorth_host > 0) && (l < 3); l++) { + zero_matrix(iter + 1, dim_size[1], stride_buffer, + buffer_iter->get_values()); + if (dim_size[1] > 1) { + hipLaunchKernelGGL( + multidot_kernel, grid_size_num_iters, + block_size, 0, 0, dim_size[0], dim_size[1], + as_hip_type(next_krylov_basis->get_const_values()), + stride_next_krylov, as_hip_accessor(krylov_bases), + as_hip_type(buffer_iter->get_values()), stride_buffer, + as_hip_type(stop_status)); + } else { + hipLaunchKernelGGL( + singledot_kernel, grid_size_iters_single, + block_size_iters_single, 0, 0, dim_size[0], + as_hip_type(next_krylov_basis->get_const_values()), + stride_next_krylov, as_hip_accessor(krylov_bases), + as_hip_type(buffer_iter->get_values()), stride_buffer, + as_hip_type(stop_status)); + } + // for i in 1:iter + // hessenberg(iter, i) = next_krylov_basis' * krylov_bases(:, i) + // end + hipLaunchKernelGGL( + update_next_krylov_and_add_kernel, + dim3(ceildiv(dim_size[0] * stride_next_krylov, default_block_size)), + dim3(default_block_size), 0, 0, iter + 1, dim_size[0], dim_size[1], + as_hip_type(next_krylov_basis->get_values()), stride_next_krylov, + as_hip_accessor(krylov_bases), + as_hip_type(hessenberg_iter->get_values()), stride_hessenberg, + as_hip_type(buffer_iter->get_const_values()), stride_buffer, + as_hip_type(stop_status), as_hip_type(reorth_status)); + // for i in 1:iter + // next_krylov_basis -= hessenberg(iter, i) * krylov_bases(:, i) + // end + components::fill_array(exec, + arnoldi_norm->get_values() + stride_arnoldi, + dim_size[1], zero()); + if (use_scalar) { + components::fill_array( + exec, arnoldi_norm->get_values() + 2 * stride_arnoldi, + dim_size[1], zero()); + } + hipLaunchKernelGGL( + multinorm2_inf_kernel, grid_size, block_size, 0, 0, + dim_size[0], dim_size[1], + as_hip_type(next_krylov_basis->get_const_values()), + stride_next_krylov, + as_hip_type(arnoldi_norm->get_values() + stride_arnoldi), + as_hip_type(arnoldi_norm->get_values() + 2 * stride_arnoldi), + as_hip_type(stop_status)); + // nrmN = norm(next_krylov_basis) + components::fill_array(exec, num_reorth->get_data(), 1, + zero()); + hipLaunchKernelGGL( + check_arnoldi_norms, + dim3(ceildiv(dim_size[1], default_block_size)), + dim3(default_block_size), 0, 0, dim_size[1], + as_hip_type(arnoldi_norm->get_values()), stride_arnoldi, + as_hip_type(hessenberg_iter->get_values()), stride_hessenberg, + iter + 1, as_hip_accessor(krylov_bases), as_hip_type(stop_status), + as_hip_type(reorth_status), as_hip_type(num_reorth->get_data())); + num_reorth_host = exec->copy_val_to_host(num_reorth->get_const_data()); + // num_reorth_host := number of next_krylov vector to be + // reorthogonalization + } + + hipLaunchKernelGGL( + update_krylov_next_krylov_kernel, + dim3(ceildiv(dim_size[0] * stride_next_krylov, default_block_size)), + dim3(default_block_size), 0, 0, iter, dim_size[0], dim_size[1], + as_hip_type(next_krylov_basis->get_values()), stride_next_krylov, + as_hip_accessor(krylov_bases), + as_hip_type(hessenberg_iter->get_const_values()), stride_hessenberg, + as_hip_type(stop_status)); + // next_krylov_basis /= hessenberg(iter, iter + 1) + // krylov_bases(:, iter + 1) = next_krylov_basis + // End of arnoldi +} + +template +void givens_rotation(std::shared_ptr exec, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + matrix::Dense *hessenberg_iter, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + size_type iter, const Array *stop_status) +{ + // TODO: tune block_size for optimal performance + constexpr auto block_size = default_block_size; + const auto num_cols = hessenberg_iter->get_size()[1]; + const dim3 block_dim{block_size, 1, 1}; + const dim3 grid_dim{ + static_cast(ceildiv(num_cols, block_size)), 1, 1}; + + hipLaunchKernelGGL( + givens_rotation_kernel, grid_dim, block_dim, 0, 0, + hessenberg_iter->get_size()[0], hessenberg_iter->get_size()[1], iter, + as_hip_type(hessenberg_iter->get_values()), + hessenberg_iter->get_stride(), as_hip_type(givens_sin->get_values()), + givens_sin->get_stride(), as_hip_type(givens_cos->get_values()), + givens_cos->get_stride(), as_hip_type(residual_norm->get_values()), + as_hip_type(residual_norm_collection->get_values()), + residual_norm_collection->get_stride(), + as_hip_type(stop_status->get_const_data())); +} + + +template +void step_1(std::shared_ptr exec, + matrix::Dense *next_krylov_basis, + matrix::Dense *givens_sin, + matrix::Dense *givens_cos, + matrix::Dense> *residual_norm, + matrix::Dense *residual_norm_collection, + Accessor3d krylov_bases, matrix::Dense *hessenberg_iter, + matrix::Dense *buffer_iter, + matrix::Dense> *arnoldi_norm, + size_type iter, Array *final_iter_nums, + const Array *stop_status, + Array *reorth_status, Array *num_reorth) +{ + hipLaunchKernelGGL( + increase_final_iteration_numbers_kernel, + dim3(static_cast( + ceildiv(final_iter_nums->get_num_elems(), default_block_size))), + dim3(default_block_size), 0, 0, + as_hip_type(final_iter_nums->get_data()), + as_hip_type(stop_status->get_const_data()), + final_iter_nums->get_num_elems()); + finish_arnoldi_CGS(exec, next_krylov_basis, krylov_bases, hessenberg_iter, + buffer_iter, arnoldi_norm, iter, + stop_status->get_const_data(), reorth_status->get_data(), + num_reorth); + givens_rotation(exec, givens_sin, givens_cos, hessenberg_iter, + residual_norm, residual_norm_collection, iter, stop_status); +} + +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_STEP_1_KERNEL); + + +template +void solve_upper_triangular( + const matrix::Dense *residual_norm_collection, + const matrix::Dense *hessenberg, matrix::Dense *y, + const Array *final_iter_nums) +{ + // TODO: tune block_size for optimal performance + constexpr auto block_size = default_block_size; + const auto num_rhs = residual_norm_collection->get_size()[1]; + const dim3 block_dim{block_size, 1, 1}; + const dim3 grid_dim{static_cast(ceildiv(num_rhs, block_size)), + 1, 1}; + + hipLaunchKernelGGL( + solve_upper_triangular_kernel, grid_dim, block_dim, 0, 0, + hessenberg->get_size()[1], num_rhs, + as_hip_type(residual_norm_collection->get_const_values()), + residual_norm_collection->get_stride(), + as_hip_type(hessenberg->get_const_values()), hessenberg->get_stride(), + as_hip_type(y->get_values()), y->get_stride(), + as_hip_type(final_iter_nums->get_const_data())); +} + + +template +void calculate_qy(ConstAccessor3d krylov_bases, size_type num_krylov_bases, + const matrix::Dense *y, + matrix::Dense *before_preconditioner, + const Array *final_iter_nums) +{ + const auto num_rows = before_preconditioner->get_size()[0]; + const auto num_cols = before_preconditioner->get_size()[1]; + const auto stride_before_preconditioner = + before_preconditioner->get_stride(); + + constexpr auto block_size = default_block_size; + const dim3 grid_dim{ + static_cast( + ceildiv(num_rows * stride_before_preconditioner, block_size)), + 1, 1}; + const dim3 block_dim{block_size, 1, 1}; + + + hipLaunchKernelGGL(calculate_Qy_kernel, grid_dim, block_dim, 0, + 0, num_rows, num_cols, as_hip_accessor(krylov_bases), + as_hip_type(y->get_const_values()), y->get_stride(), + as_hip_type(before_preconditioner->get_values()), + stride_before_preconditioner, + as_hip_type(final_iter_nums->get_const_data())); + // Calculate qy + // before_preconditioner = krylov_bases * y +} + + +template +void step_2(std::shared_ptr exec, + const matrix::Dense *residual_norm_collection, + ConstAccessor3d krylov_bases, + const matrix::Dense *hessenberg, + matrix::Dense *y, + matrix::Dense *before_preconditioner, + const Array *final_iter_nums) +{ + // since hessenberg has dims: iters x iters * num_rhs + // krylov_bases has dims: (iters + 1) x sysmtx[0] x num_rhs + const auto iters = + hessenberg->get_size()[1] / before_preconditioner->get_size()[1]; + const auto num_krylov_bases = iters + 1; + solve_upper_triangular(residual_norm_collection, hessenberg, y, + final_iter_nums); + calculate_qy(krylov_bases, num_krylov_bases, y, before_preconditioner, + final_iter_nums); +} + + +GKO_INSTANTIATE_FOR_EACH_CB_GMRES_CONST_TYPE( + GKO_DECLARE_CB_GMRES_STEP_2_KERNEL); + + +} // namespace cb_gmres +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/solver/cg_kernels.hip.cpp b/hip/solver/cg_kernels.hip.cpp deleted file mode 100644 index 688a6ab7f49..00000000000 --- a/hip/solver/cg_kernels.hip.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include "core/solver/cg_kernels.hpp" - - -#include - - -#include -#include - - -#include "hip/base/math.hip.hpp" -#include "hip/base/types.hip.hpp" -#include "hip/components/thread_ids.hip.hpp" - - -namespace gko { -namespace kernels { -namespace hip { -/** - * @brief The CG solver namespace. - * - * @ingroup cg - */ -namespace cg { - - -constexpr int default_block_size = 512; - - -#include "common/solver/cg_kernels.hpp.inc" - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *prev_rho, - matrix::Dense *rho, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(b->get_size()[0] * b->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - initialize_kernel, dim3(grid_size), dim3(block_size), 0, 0, - b->get_size()[0], b->get_size()[1], b->get_stride(), - as_hip_type(b->get_const_values()), as_hip_type(r->get_values()), - as_hip_type(z->get_values()), as_hip_type(p->get_values()), - as_hip_type(q->get_values()), as_hip_type(prev_rho->get_values()), - as_hip_type(rho->get_values()), as_hip_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_INITIALIZE_KERNEL); - - -template -void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL(step_1_kernel, dim3(grid_size), dim3(block_size), 0, 0, - p->get_size()[0], p->get_size()[1], p->get_stride(), - as_hip_type(p->get_values()), - as_hip_type(z->get_const_values()), - as_hip_type(rho->get_const_values()), - as_hip_type(prev_rho->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_1_KERNEL); - - -template -void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - step_2_kernel, dim3(grid_size), dim3(block_size), 0, 0, - p->get_size()[0], p->get_size()[1], p->get_stride(), x->get_stride(), - as_hip_type(x->get_values()), as_hip_type(r->get_values()), - as_hip_type(p->get_const_values()), as_hip_type(q->get_const_values()), - as_hip_type(beta->get_const_values()), - as_hip_type(rho->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_2_KERNEL); - - -} // namespace cg -} // namespace hip -} // namespace kernels -} // namespace gko diff --git a/hip/solver/cgs_kernels.hip.cpp b/hip/solver/cgs_kernels.hip.cpp deleted file mode 100644 index b5597777790..00000000000 --- a/hip/solver/cgs_kernels.hip.cpp +++ /dev/null @@ -1,176 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include "core/solver/cgs_kernels.hpp" - - -#include - - -#include -#include - - -#include "hip/base/math.hip.hpp" -#include "hip/base/types.hip.hpp" -#include "hip/components/thread_ids.hip.hpp" - - -namespace gko { -namespace kernels { -namespace hip { -/** - * @brief The CGS solver namespace. - * - * @ingroup cgs - */ -namespace cgs { - - -constexpr int default_block_size = 512; - - -#include "common/solver/cgs_kernels.hpp.inc" - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *r_tld, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *u, - matrix::Dense *u_hat, - matrix::Dense *v_hat, matrix::Dense *t, - matrix::Dense *alpha, matrix::Dense *beta, - matrix::Dense *gamma, - matrix::Dense *rho_prev, - matrix::Dense *rho, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(b->get_size()[0] * b->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - initialize_kernel, dim3(grid_size), dim3(block_size), 0, 0, - b->get_size()[0], b->get_size()[1], b->get_stride(), - as_hip_type(b->get_const_values()), as_hip_type(r->get_values()), - as_hip_type(r_tld->get_values()), as_hip_type(p->get_values()), - as_hip_type(q->get_values()), as_hip_type(u->get_values()), - as_hip_type(u_hat->get_values()), as_hip_type(v_hat->get_values()), - as_hip_type(t->get_values()), as_hip_type(alpha->get_values()), - as_hip_type(beta->get_values()), as_hip_type(gamma->get_values()), - as_hip_type(rho_prev->get_values()), as_hip_type(rho->get_values()), - as_hip_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_INITIALIZE_KERNEL); - - -template -void step_1(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *u, - matrix::Dense *p, const matrix::Dense *q, - matrix::Dense *beta, const matrix::Dense *rho, - const matrix::Dense *rho_prev, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - step_1_kernel, dim3(grid_size), dim3(block_size), 0, 0, - p->get_size()[0], p->get_size()[1], p->get_stride(), - as_hip_type(r->get_const_values()), as_hip_type(u->get_values()), - as_hip_type(p->get_values()), as_hip_type(q->get_const_values()), - as_hip_type(beta->get_values()), as_hip_type(rho->get_const_values()), - as_hip_type(rho_prev->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_1_KERNEL); - - -template -void step_2(std::shared_ptr exec, - const matrix::Dense *u, - const matrix::Dense *v_hat, matrix::Dense *q, - matrix::Dense *t, matrix::Dense *alpha, - const matrix::Dense *rho, - const matrix::Dense *gamma, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(u->get_size()[0] * u->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - step_2_kernel, dim3(grid_size), dim3(block_size), 0, 0, - u->get_size()[0], u->get_size()[1], u->get_stride(), - as_hip_type(u->get_const_values()), - as_hip_type(v_hat->get_const_values()), as_hip_type(q->get_values()), - as_hip_type(t->get_values()), as_hip_type(alpha->get_values()), - as_hip_type(rho->get_const_values()), - as_hip_type(gamma->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_2_KERNEL); - - -template -void step_3(std::shared_ptr exec, - const matrix::Dense *t, - const matrix::Dense *u_hat, matrix::Dense *r, - matrix::Dense *x, const matrix::Dense *alpha, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(t->get_size()[0] * t->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - step_3_kernel, dim3(grid_size), dim3(block_size), 0, 0, - t->get_size()[0], t->get_size()[1], t->get_stride(), x->get_stride(), - as_hip_type(t->get_const_values()), - as_hip_type(u_hat->get_const_values()), as_hip_type(r->get_values()), - as_hip_type(x->get_values()), as_hip_type(alpha->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_3_KERNEL); - - -} // namespace cgs -} // namespace hip -} // namespace kernels -} // namespace gko diff --git a/hip/solver/common_trs_kernels.hip.hpp b/hip/solver/common_trs_kernels.hip.hpp index 3bf0e56c7fa..8698b23383f 100644 --- a/hip/solver/common_trs_kernels.hip.hpp +++ b/hip/solver/common_trs_kernels.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/solver/fcg_kernels.hip.cpp b/hip/solver/fcg_kernels.hip.cpp deleted file mode 100644 index 750aa5743d7..00000000000 --- a/hip/solver/fcg_kernels.hip.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include "core/solver/fcg_kernels.hpp" - - -#include - - -#include -#include - - -#include "hip/base/math.hip.hpp" -#include "hip/base/types.hip.hpp" -#include "hip/components/thread_ids.hip.hpp" - - -namespace gko { -namespace kernels { -namespace hip { -/** - * @brief The FCG solver namespace. - * - * @ingroup fcg - */ -namespace fcg { - - -constexpr int default_block_size = 512; - - -#include "common/solver/fcg_kernels.hpp.inc" - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *t, - matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *rho_t, - Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(b->get_size()[0] * b->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - initialize_kernel, dim3(grid_size), dim3(block_size), 0, 0, - b->get_size()[0], b->get_size()[1], b->get_stride(), - as_hip_type(b->get_const_values()), as_hip_type(r->get_values()), - as_hip_type(z->get_values()), as_hip_type(p->get_values()), - as_hip_type(q->get_values()), as_hip_type(t->get_values()), - as_hip_type(prev_rho->get_values()), as_hip_type(rho->get_values()), - as_hip_type(rho_t->get_values()), as_hip_type(stop_status->get_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_INITIALIZE_KERNEL); - - -template -void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - const matrix::Dense *rho_t, - const matrix::Dense *prev_rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL(step_1_kernel, dim3(grid_size), dim3(block_size), 0, 0, - p->get_size()[0], p->get_size()[1], p->get_stride(), - as_hip_type(p->get_values()), - as_hip_type(z->get_const_values()), - as_hip_type(rho_t->get_const_values()), - as_hip_type(prev_rho->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_1_KERNEL); - - -template -void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - matrix::Dense *t, const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) -{ - const dim3 block_size(default_block_size, 1, 1); - const dim3 grid_size( - ceildiv(p->get_size()[0] * p->get_stride(), block_size.x), 1, 1); - - hipLaunchKernelGGL( - step_2_kernel, dim3(grid_size), dim3(block_size), 0, 0, - p->get_size()[0], p->get_size()[1], p->get_stride(), x->get_stride(), - as_hip_type(x->get_values()), as_hip_type(r->get_values()), - as_hip_type(t->get_values()), as_hip_type(p->get_const_values()), - as_hip_type(q->get_const_values()), - as_hip_type(beta->get_const_values()), - as_hip_type(rho->get_const_values()), - as_hip_type(stop_status->get_const_data())); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_2_KERNEL); - - -} // namespace fcg -} // namespace hip -} // namespace kernels -} // namespace gko diff --git a/hip/solver/gmres_kernels.hip.cpp b/hip/solver/gmres_kernels.hip.cpp index 2780b149660..38c6477e516 100644 --- a/hip/solver/gmres_kernels.hip.cpp +++ b/hip/solver/gmres_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/components/fill_array.hpp" +#include "core/matrix/dense_kernels.hpp" #include "hip/base/config.hip.hpp" #include "hip/base/hipblas_bindings.hip.hpp" #include "hip/base/math.hip.hpp" @@ -75,7 +76,7 @@ constexpr int default_dot_dim = 32; constexpr int default_dot_size = default_dot_dim * default_dot_dim; -#include "common/solver/gmres_kernels.hpp.inc" +#include "common/cuda_hip/solver/gmres_kernels.hpp.inc" template @@ -122,7 +123,7 @@ void initialize_2(std::shared_ptr exec, const dim3 block_dim(default_block_size, 1, 1); constexpr auto block_size = default_block_size; - residual->compute_norm2(residual_norm); + kernels::hip::dense::compute_norm2(exec, residual, residual_norm); const dim3 grid_dim_2(ceildiv(num_rows * num_rhs, default_block_size), 1, 1); diff --git a/hip/solver/idr_kernels.hip.cpp b/hip/solver/idr_kernels.hip.cpp new file mode 100644 index 00000000000..1e9cb66aa2b --- /dev/null +++ b/hip/solver/idr_kernels.hip.cpp @@ -0,0 +1,368 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/idr_kernels.hpp" + + +#include +#include + + +#include + + +#include +#include + + +#include "core/components/fill_array.hpp" +#include "hip/base/hipblas_bindings.hip.hpp" +#include "hip/base/hiprand_bindings.hip.hpp" +#include "hip/base/math.hip.hpp" +#include "hip/base/types.hip.hpp" +#include "hip/components/atomic.hip.hpp" +#include "hip/components/cooperative_groups.hip.hpp" +#include "hip/components/reduction.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The IDR solver namespace. + * + * @ingroup idr + */ +namespace idr { + + +constexpr int default_block_size = 512; +constexpr int default_dot_dim = 32; +constexpr int default_dot_size = default_dot_dim * default_dot_dim; + + +#include "common/cuda_hip/solver/idr_kernels.hpp.inc" + + +namespace { + + +template +void initialize_m(const size_type nrhs, matrix::Dense *m, + Array *stop_status) +{ + const auto subspace_dim = m->get_size()[0]; + const auto m_stride = m->get_stride(); + + const auto grid_dim = ceildiv(m_stride * subspace_dim, default_block_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(initialize_m_kernel), grid_dim, + default_block_size, 0, 0, subspace_dim, nrhs, + as_hip_type(m->get_values()), m_stride, + as_hip_type(stop_status->get_data())); +} + + +template +void initialize_subspace_vectors(matrix::Dense *subspace_vectors, + bool deterministic) +{ + if (deterministic) { + auto subspace_vectors_data = matrix_data( + subspace_vectors->get_size(), std::normal_distribution<>(0.0, 1.0), + std::ranlux48(15)); + subspace_vectors->read(subspace_vectors_data); + } else { + auto gen = + hiprand::rand_generator(time(NULL), HIPRAND_RNG_PSEUDO_DEFAULT); + hiprand::rand_vector( + gen, + subspace_vectors->get_size()[0] * subspace_vectors->get_stride(), + 0.0, 1.0, subspace_vectors->get_values()); + } +} + + +template +void orthonormalize_subspace_vectors(matrix::Dense *subspace_vectors) +{ + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + orthonormalize_subspace_vectors_kernel), + 1, default_block_size, 0, 0, subspace_vectors->get_size()[0], + subspace_vectors->get_size()[1], + as_hip_type(subspace_vectors->get_values()), + subspace_vectors->get_stride()); +} + + +template +void solve_lower_triangular(const size_type nrhs, + const matrix::Dense *m, + const matrix::Dense *f, + matrix::Dense *c, + const Array *stop_status) +{ + const auto subspace_dim = m->get_size()[0]; + + const auto grid_dim = ceildiv(nrhs, default_block_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(solve_lower_triangular_kernel), grid_dim, + default_block_size, 0, 0, subspace_dim, nrhs, + as_hip_type(m->get_const_values()), m->get_stride(), + as_hip_type(f->get_const_values()), f->get_stride(), + as_hip_type(c->get_values()), c->get_stride(), + as_hip_type(stop_status->get_const_data())); +} + + +template +void update_g_and_u(std::shared_ptr exec, + const size_type nrhs, const size_type k, + const matrix::Dense *p, + const matrix::Dense *m, + matrix::Dense *alpha, + matrix::Dense *g, matrix::Dense *g_k, + matrix::Dense *u, + const Array *stop_status) +{ + const auto size = g->get_size()[0]; + const auto p_stride = p->get_stride(); + + const dim3 grid_dim(ceildiv(nrhs, default_dot_dim), + exec->get_num_multiprocessor() * 2); + const dim3 block_dim(default_dot_dim, default_dot_dim); + + for (size_type i = 0; i < k; i++) { + const auto p_i = p->get_const_values() + i * p_stride; + if (nrhs > 1 || is_complex()) { + components::fill_array(exec, alpha->get_values(), nrhs, + zero()); + hipLaunchKernelGGL( + multidot_kernel, grid_dim, block_dim, 0, 0, size, nrhs, + as_hip_type(p_i), as_hip_type(g_k->get_values()), + g_k->get_stride(), as_hip_type(alpha->get_values()), + as_hip_type(stop_status->get_const_data())); + } else { + hipblas::dot(exec->get_hipblas_handle(), size, p_i, 1, + g_k->get_values(), g_k->get_stride(), + alpha->get_values()); + } + hipLaunchKernelGGL( + update_g_k_and_u_kernel, + ceildiv(size * g_k->get_stride(), default_block_size), + default_block_size, 0, 0, k, i, size, nrhs, + as_hip_type(alpha->get_const_values()), + as_hip_type(m->get_const_values()), m->get_stride(), + as_hip_type(g->get_const_values()), g->get_stride(), + as_hip_type(g_k->get_values()), g_k->get_stride(), + as_hip_type(u->get_values()), u->get_stride(), + as_hip_type(stop_status->get_const_data())); + } + hipLaunchKernelGGL(update_g_kernel, + ceildiv(size * g_k->get_stride(), default_block_size), + default_block_size, 0, 0, k, size, nrhs, + as_hip_type(g_k->get_const_values()), g_k->get_stride(), + as_hip_type(g->get_values()), g->get_stride(), + as_hip_type(stop_status->get_const_data())); +} + + +template +void update_m(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *p, + const matrix::Dense *g_k, matrix::Dense *m, + const Array *stop_status) +{ + const auto size = g_k->get_size()[0]; + const auto subspace_dim = m->get_size()[0]; + const auto p_stride = p->get_stride(); + const auto m_stride = m->get_stride(); + + const dim3 grid_dim(ceildiv(nrhs, default_dot_dim), + exec->get_num_multiprocessor() * 2); + const dim3 block_dim(default_dot_dim, default_dot_dim); + + for (size_type i = k; i < subspace_dim; i++) { + const auto p_i = p->get_const_values() + i * p_stride; + auto m_i = m->get_values() + i * m_stride + k * nrhs; + if (nrhs > 1 || is_complex()) { + components::fill_array(exec, m_i, nrhs, zero()); + hipLaunchKernelGGL(multidot_kernel, grid_dim, block_dim, 0, 0, size, + nrhs, as_hip_type(p_i), + as_hip_type(g_k->get_const_values()), + g_k->get_stride(), as_hip_type(m_i), + as_hip_type(stop_status->get_const_data())); + } else { + hipblas::dot(exec->get_hipblas_handle(), size, p_i, 1, + g_k->get_const_values(), g_k->get_stride(), m_i); + } + } +} + + +template +void update_x_r_and_f(std::shared_ptr exec, + const size_type nrhs, const size_type k, + const matrix::Dense *m, + const matrix::Dense *g, + const matrix::Dense *u, + matrix::Dense *f, matrix::Dense *r, + matrix::Dense *x, + const Array *stop_status) +{ + const auto size = x->get_size()[0]; + const auto subspace_dim = m->get_size()[0]; + + const auto grid_dim = ceildiv(size * x->get_stride(), default_block_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(update_x_r_and_f_kernel), grid_dim, + default_block_size, 0, 0, k, size, subspace_dim, nrhs, + as_hip_type(m->get_const_values()), m->get_stride(), + as_hip_type(g->get_const_values()), g->get_stride(), + as_hip_type(u->get_const_values()), u->get_stride(), + as_hip_type(f->get_values()), f->get_stride(), + as_hip_type(r->get_values()), r->get_stride(), + as_hip_type(x->get_values()), x->get_stride(), + as_hip_type(stop_status->get_const_data())); + components::fill_array(exec, f->get_values() + k * f->get_stride(), nrhs, + zero()); +} + + +} // namespace + + +template +void initialize(std::shared_ptr exec, const size_type nrhs, + matrix::Dense *m, + matrix::Dense *subspace_vectors, bool deterministic, + Array *stop_status) +{ + initialize_m(nrhs, m, stop_status); + initialize_subspace_vectors(subspace_vectors, deterministic); + orthonormalize_subspace_vectors(subspace_vectors); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); + + +template +void step_1(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *m, + const matrix::Dense *f, + const matrix::Dense *residual, + const matrix::Dense *g, matrix::Dense *c, + matrix::Dense *v, + const Array *stop_status) +{ + solve_lower_triangular(nrhs, m, f, c, stop_status); + + const auto num_rows = v->get_size()[0]; + const auto subspace_dim = m->get_size()[0]; + + const auto grid_dim = + ceildiv(v->get_stride() * num_rows, default_block_size); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(step_1_kernel), grid_dim, default_block_size, 0, 0, k, + num_rows, subspace_dim, nrhs, as_hip_type(residual->get_const_values()), + residual->get_stride(), as_hip_type(c->get_const_values()), + c->get_stride(), as_hip_type(g->get_const_values()), g->get_stride(), + as_hip_type(v->get_values()), v->get_stride(), + as_hip_type(stop_status->get_const_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); + + +template +void step_2(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *omega, + const matrix::Dense *preconditioned_vector, + const matrix::Dense *c, matrix::Dense *u, + const Array *stop_status) +{ + const auto num_rows = preconditioned_vector->get_size()[0]; + const auto subspace_dim = u->get_size()[1] / nrhs; + + const auto grid_dim = + ceildiv(u->get_stride() * num_rows, default_block_size); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(step_2_kernel), grid_dim, default_block_size, 0, 0, k, + num_rows, subspace_dim, nrhs, as_hip_type(omega->get_const_values()), + as_hip_type(preconditioned_vector->get_const_values()), + preconditioned_vector->get_stride(), as_hip_type(c->get_const_values()), + c->get_stride(), as_hip_type(u->get_values()), u->get_stride(), + as_hip_type(stop_status->get_const_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); + + +template +void step_3(std::shared_ptr exec, const size_type nrhs, + const size_type k, const matrix::Dense *p, + matrix::Dense *g, matrix::Dense *g_k, + matrix::Dense *u, matrix::Dense *m, + matrix::Dense *f, matrix::Dense *alpha, + matrix::Dense *residual, matrix::Dense *x, + const Array *stop_status) +{ + update_g_and_u(exec, nrhs, k, p, m, alpha, g, g_k, u, stop_status); + update_m(exec, nrhs, k, p, g_k, m, stop_status); + update_x_r_and_f(exec, nrhs, k, m, g, u, f, residual, x, stop_status); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); + + +template +void compute_omega( + std::shared_ptr exec, const size_type nrhs, + const remove_complex kappa, const matrix::Dense *tht, + const matrix::Dense> *residual_norm, + matrix::Dense *omega, const Array *stop_status) +{ + const auto grid_dim = ceildiv(nrhs, config::warp_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(compute_omega_kernel), grid_dim, + config::warp_size, 0, 0, nrhs, kappa, + as_hip_type(tht->get_const_values()), + as_hip_type(residual_norm->get_const_values()), + as_hip_type(omega->get_values()), + as_hip_type(stop_status->get_const_data())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL); + + +} // namespace idr +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/solver/lower_trs_kernels.hip.cpp b/hip/solver/lower_trs_kernels.hip.cpp index d4e66513ebe..3eeb50185ac 100644 --- a/hip/solver/lower_trs_kernels.hip.cpp +++ b/hip/solver/lower_trs_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/solver/upper_trs_kernels.hip.cpp b/hip/solver/upper_trs_kernels.hip.cpp index 0f27b6ceb28..835e2f3803c 100644 --- a/hip/solver/upper_trs_kernels.hip.cpp +++ b/hip/solver/upper_trs_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/stop/criterion_kernels.hip.cpp b/hip/stop/criterion_kernels.hip.cpp index 0c2cf4da378..0443930660e 100644 --- a/hip/stop/criterion_kernels.hip.cpp +++ b/hip/stop/criterion_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/stop/residual_norm_kernels.hip.cpp b/hip/stop/residual_norm_kernels.hip.cpp index d104a29d8a8..e4b3ddf3b95 100644 --- a/hip/stop/residual_norm_kernels.hip.cpp +++ b/hip/stop/residual_norm_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -125,6 +125,83 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE( } // namespace residual_norm + + +/** + * @brief The Implicit Residual norm stopping criterion. + * @ref implicit_resnorm + * @ingroup resnorm + */ +namespace implicit_residual_norm { + + +constexpr int default_block_size = 512; + + +template +__global__ + __launch_bounds__(default_block_size) void implicit_residual_norm_kernel( + size_type num_cols, remove_complex rel_residual_goal, + const ValueType *__restrict__ tau, + const remove_complex *__restrict__ orig_tau, + uint8 stoppingId, bool setFinalized, + stopping_status *__restrict__ stop_status, + bool *__restrict__ device_storage) +{ + const auto tidx = thread::get_thread_id_flat(); + if (tidx < num_cols) { + if (sqrt(abs(tau[tidx])) < rel_residual_goal * orig_tau[tidx]) { + stop_status[tidx].converge(stoppingId, setFinalized); + device_storage[1] = true; + } + // because only false is written to all_converged, write conflicts + // should not cause any problem + else if (!stop_status[tidx].has_stopped()) { + device_storage[0] = false; + } + } +} + + +__global__ __launch_bounds__(1) void init_kernel( + bool *__restrict__ device_storage) +{ + device_storage[0] = true; + device_storage[1] = false; +} + + +template +void implicit_residual_norm( + std::shared_ptr exec, + const matrix::Dense *tau, + const matrix::Dense> *orig_tau, + remove_complex rel_residual_goal, uint8 stoppingId, + bool setFinalized, Array *stop_status, + Array *device_storage, bool *all_converged, bool *one_changed) +{ + hipLaunchKernelGGL((init_kernel), dim3(1), dim3(1), 0, 0, + as_hip_type(device_storage->get_data())); + + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size(ceildiv(tau->get_size()[1], block_size.x), 1, 1); + + hipLaunchKernelGGL((implicit_residual_norm_kernel), dim3(grid_size), + dim3(block_size), 0, 0, tau->get_size()[1], + rel_residual_goal, as_hip_type(tau->get_const_values()), + as_hip_type(orig_tau->get_const_values()), stoppingId, + setFinalized, as_hip_type(stop_status->get_data()), + as_hip_type(device_storage->get_data())); + + /* Represents all_converged, one_changed */ + *all_converged = exec->copy_val_to_host(device_storage->get_const_data()); + *one_changed = exec->copy_val_to_host(device_storage->get_const_data() + 1); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL); + + +} // namespace implicit_residual_norm } // namespace hip } // namespace kernels } // namespace gko diff --git a/hip/test/CMakeLists.txt b/hip/test/CMakeLists.txt index fd1fa2941d8..a5f126893bf 100644 --- a/hip/test/CMakeLists.txt +++ b/hip/test/CMakeLists.txt @@ -1,9 +1,10 @@ -include(${CMAKE_SOURCE_DIR}/cmake/create_test.cmake) +include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake) add_subdirectory(base) add_subdirectory(components) add_subdirectory(factorization) add_subdirectory(matrix) +add_subdirectory(multigrid) add_subdirectory(solver) add_subdirectory(preconditioner) add_subdirectory(stop) diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt index 795ededc410..4529a40c862 100644 --- a/hip/test/base/CMakeLists.txt +++ b/hip/test/base/CMakeLists.txt @@ -1,9 +1,16 @@ ginkgo_create_hip_test(hip_executor) +ginkgo_create_thread_test(hip_executor_reset) +if(GINKGO_HAVE_HWLOC) + find_package(NUMA REQUIRED) + ginkgo_create_hip_test(hip_executor_topology NUMA::NUMA) +endif() +ginkgo_create_hip_test(kernel_launch) +# correct flags for kernel_launch.hpp are set in GINKGO_HIPCC_OPTIONS ginkgo_create_hip_test(lin_op) ginkgo_create_hip_test(math) # Only hcc needs the libraries. nvcc only requires the headers. -if (GINKGO_HIP_PLATFORM MATCHES "hcc") - ginkgo_create_hip_test(exception_helpers roc::hipblas roc::hipsparse) +if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}") + ginkgo_create_hip_test(exception_helpers roc::hipblas roc::hipsparse hip::hiprand roc::rocrand) else() ginkgo_create_hip_test(exception_helpers) endif() diff --git a/hip/test/base/exception_helpers.hip.cpp b/hip/test/base/exception_helpers.hip.cpp index 8261cc24f0e..88d0e14de94 100644 --- a/hip/test/base/exception_helpers.hip.cpp +++ b/hip/test/base/exception_helpers.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include @@ -68,6 +69,18 @@ TEST(AssertNoHipblasErrors, DoesNotThrowOnSuccess) } +TEST(AssertNoHiprandErrors, ThrowsOnError) +{ + ASSERT_THROW(GKO_ASSERT_NO_HIPRAND_ERRORS(1), gko::HiprandError); +} + + +TEST(AssertNoHiprandErrors, DoesNotThrowOnSuccess) +{ + ASSERT_NO_THROW(GKO_ASSERT_NO_HIPRAND_ERRORS(HIPRAND_STATUS_SUCCESS)); +} + + TEST(AssertNoHipsparseErrors, ThrowsOnError) { ASSERT_THROW(GKO_ASSERT_NO_HIPSPARSE_ERRORS(1), gko::HipsparseError); diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp index 635639fc21e..c789004ddb8 100644 --- a/hip/test/base/hip_executor.hip.cpp +++ b/hip/test/base/hip_executor.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -49,7 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include - +#include "common/cuda_hip/base/executor.hpp.inc" #include "hip/test/utils.hip.hpp" @@ -75,6 +75,11 @@ class ExampleOperation : public gko::Operation { value = -3; } + void run(std::shared_ptr) const override + { + value = -4; + } + void run(std::shared_ptr) const override { hipGetDevice(&value); @@ -86,7 +91,11 @@ class ExampleOperation : public gko::Operation { class HipExecutor : public ::testing::Test { protected: - HipExecutor() : omp(gko::OmpExecutor::create()), hip(nullptr), hip2(nullptr) + HipExecutor() + : omp(gko::OmpExecutor::create()), + hip(nullptr), + hip2(nullptr), + hip3(nullptr) {} void SetUp() @@ -95,6 +104,8 @@ class HipExecutor : public ::testing::Test { hip = gko::HipExecutor::create(0, omp); hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1, omp); + hip3 = gko::HipExecutor::create(0, omp, false, + gko::allocation_mode::unified_global); } void TearDown() @@ -108,6 +119,7 @@ class HipExecutor : public ::testing::Test { std::shared_ptr omp; std::shared_ptr hip; std::shared_ptr hip2; + std::shared_ptr hip3; }; @@ -180,6 +192,39 @@ TEST_F(HipExecutor, CopiesDataToHip) } +__global__ void check_data2(int *data) +{ + if (data[0] != 4 || data[1] != 8) { +#if GINKGO_HIP_PLATFORM_HCC + asm("s_trap 0x02;"); +#else // GINKGO_HIP_PLATFORM_NVCC + asm("trap;"); +#endif + } +} + + +#if GINKGO_HIP_PLATFORM_NVCC + + +TEST_F(HipExecutor, CanAllocateOnUnifiedMemory) +{ + int orig[] = {3, 8}; + auto *copy = hip3->alloc(2); + + hip3->copy_from(omp.get(), 2, orig, copy); + + check_data<<<1, 1>>>(copy); + ASSERT_NO_THROW(hip3->synchronize()); + copy[0] = 4; + check_data2<<<1, 1>>>(copy); + hip3->free(copy); +} + + +#endif + + __global__ void init_data(int *data) { data[0] = 3; @@ -248,7 +293,7 @@ TEST_F(HipExecutor, CopiesDataFromHipToHip) omp->copy_from(hip2.get(), 2, copy_hip2, copy); EXPECT_EQ(3, copy[0]); ASSERT_EQ(8, copy[1]); - hip->free(copy_hip2); + hip2->free(copy_hip2); hip->free(orig); } @@ -260,4 +305,37 @@ TEST_F(HipExecutor, Synchronizes) } +TEST_F(HipExecutor, ExecInfoSetsCorrectProperties) +{ + auto dev_id = hip->get_device_id(); + auto num_sm = 0; + auto major = 0; + auto minor = 0; + auto max_threads_per_block = 0; + auto warp_size = 0; + GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( + &num_sm, hipDeviceAttributeMultiprocessorCount, dev_id)); + GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( + &major, hipDeviceAttributeComputeCapabilityMajor, dev_id)); + GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( + &minor, hipDeviceAttributeComputeCapabilityMinor, dev_id)); + GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute( + &max_threads_per_block, hipDeviceAttributeMaxThreadsPerBlock, dev_id)); + GKO_ASSERT_NO_HIP_ERRORS( + hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, dev_id)); +#if GINKGO_HIP_PLATFORM_NVCC + auto num_cores = convert_sm_ver_to_cores(major, minor); +#else + auto num_cores = warp_size * 4; +#endif + + ASSERT_EQ(hip->get_major_version(), major); + ASSERT_EQ(hip->get_minor_version(), minor); + ASSERT_EQ(hip->get_num_multiprocessor(), num_sm); + ASSERT_EQ(hip->get_warp_size(), warp_size); + ASSERT_EQ(hip->get_num_warps(), num_sm * (num_cores / warp_size)); + ASSERT_EQ(hip->get_num_warps_per_sm(), num_cores / warp_size); +} + + } // namespace diff --git a/hip/test/base/hip_executor_reset.cpp b/hip/test/base/hip_executor_reset.cpp new file mode 100644 index 00000000000..33a9c8040c0 --- /dev/null +++ b/hip/test/base/hip_executor_reset.cpp @@ -0,0 +1,87 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +namespace { + + +#define GTEST_ASSERT_NO_EXIT(statement) \ + ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "") + + +TEST(DeviceReset, HipCuda) +{ + GTEST_ASSERT_NO_EXIT({ + auto ref = gko::ReferenceExecutor::create(); + auto hip = gko::HipExecutor::create(0, ref, true); + auto cuda = gko::CudaExecutor::create(0, ref, true); + }); +} + + +TEST(DeviceReset, CudaHip) +{ + GTEST_ASSERT_NO_EXIT({ + auto ref = gko::ReferenceExecutor::create(); + auto cuda = gko::CudaExecutor::create(0, ref, true); + auto hip = gko::HipExecutor::create(0, ref, true); + }); +} + + +void func() +{ + auto ref = gko::ReferenceExecutor::create(); + auto exec = gko::HipExecutor::create(0, ref, true); +} + + +TEST(DeviceReset, HipHip) +{ + GTEST_ASSERT_NO_EXIT({ + std::thread t1(func); + std::thread t2(func); + t1.join(); + t2.join(); + }); +} + + +} // namespace diff --git a/hip/test/base/hip_executor_topology.hip.cpp b/hip/test/base/hip_executor_topology.hip.cpp new file mode 100644 index 00000000000..0f52ebb23d5 --- /dev/null +++ b/hip/test/base/hip_executor_topology.hip.cpp @@ -0,0 +1,168 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +// force-top: on +// prevent compilation failure related to disappearing assert(...) statements +#include +// force-top: off + + +#include + + +#include +#include +#include + + +#if defined(__unix__) || defined(__APPLE__) +#include +#include +#endif + + +#include + + +#include +#include + + +#include "hip/test/utils.hip.hpp" + + +namespace { + + +class HipExecutor : public ::testing::Test { +protected: + HipExecutor() : omp(gko::OmpExecutor::create()), hip(nullptr), hip2(nullptr) + {} + + void SetUp() + { + ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); + hip = gko::HipExecutor::create(0, omp); + hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1, + omp); + } + + void TearDown() + { + if (hip != nullptr) { + // ensure that previous calls finished and didn't throw an error + ASSERT_NO_THROW(hip->synchronize()); + } + } + + std::shared_ptr omp; + std::shared_ptr hip; + std::shared_ptr hip2; +}; + + +#if GKO_HAVE_HWLOC + + +inline int get_cpu_os_id(int log_id) +{ + return gko::MachineTopology::get_instance()->get_pu(log_id)->os_id; +} + + +inline int get_core_os_id(int log_id) +{ + return gko::MachineTopology::get_instance()->get_core(log_id)->os_id; +} + + +TEST_F(HipExecutor, CanBindToSinglePu) +{ + hip = gko::HipExecutor::create(0, gko::OmpExecutor::create()); + + const int bind_pu = 1; + gko::MachineTopology::get_instance()->bind_to_pu(bind_pu); + + auto cpu_sys = sched_getcpu(); + ASSERT_TRUE(cpu_sys == get_cpu_os_id(1)); +} + + +TEST_F(HipExecutor, CanBindToPus) +{ + hip = gko::HipExecutor::create(0, gko::OmpExecutor::create()); + + std::vector bind_pus = {1, 3}; + gko::MachineTopology::get_instance()->bind_to_pus(bind_pus); + + auto cpu_sys = sched_getcpu(); + ASSERT_TRUE(cpu_sys == get_cpu_os_id(3) || cpu_sys == get_cpu_os_id(1)); +} + + +TEST_F(HipExecutor, CanBindToCores) +{ + hip = gko::HipExecutor::create(0, gko::OmpExecutor::create()); + + std::vector bind_cores = {1, 3}; + gko::MachineTopology::get_instance()->bind_to_cores(bind_cores); + + auto cpu_sys = sched_getcpu(); + ASSERT_TRUE(cpu_sys == get_core_os_id(3) || cpu_sys == get_core_os_id(1)); +} + + +TEST_F(HipExecutor, ClosestCpusIsPopulated) +{ + hip = gko::HipExecutor::create(0, gko::OmpExecutor::create()); + auto close_cpus0 = hip->get_closest_pus(); + + ASSERT_NE(close_cpus0[0], -1); +} + + +TEST_F(HipExecutor, KnowsItsNuma) +{ + hip = gko::HipExecutor::create(0, gko::OmpExecutor::create()); + auto numa0 = hip->get_closest_numa(); + auto close_cpu0 = hip->get_closest_pus(); + + auto numa_sys0 = numa_node_of_cpu(get_cpu_os_id(close_cpu0[0])); + + ASSERT_TRUE(numa0 == numa_sys0); +} + + +#endif + + +} // namespace diff --git a/hip/test/base/kernel_launch.hip.cpp b/hip/test/base/kernel_launch.hip.cpp new file mode 100644 index 00000000000..261c552056f --- /dev/null +++ b/hip/test/base/kernel_launch.hip.cpp @@ -0,0 +1,277 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "common/unified/base/kernel_launch.hpp" + + +#include +#include + + +#include + + +#include +#include +#include +#include + + +#include "common/unified/base/kernel_launch_solver.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +using gko::dim; +using gko::size_type; +using std::is_same; + + +class KernelLaunch : public ::testing::Test { +protected: + KernelLaunch() + : exec(gko::HipExecutor::create(0, gko::ReferenceExecutor::create())), + zero_array(exec->get_master(), 16), + iota_array(exec->get_master(), 16), + iota_transp_array(exec->get_master(), 16), + iota_dense(gko::matrix::Dense<>::create(exec, dim<2>{4, 4})), + zero_dense(gko::matrix::Dense<>::create(exec, dim<2>{4, 4}, 6)), + zero_dense2(gko::matrix::Dense<>::create(exec, dim<2>{4, 4}, 5)), + vec_dense(gko::matrix::Dense<>::create(exec, dim<2>{1, 4})) + { + auto ref_iota_dense = + gko::matrix::Dense<>::create(exec->get_master(), dim<2>{4, 4}); + for (int i = 0; i < 16; i++) { + zero_array.get_data()[i] = 0; + iota_array.get_data()[i] = i; + iota_transp_array.get_data()[i] = (i % 4 * 4) + i / 4; + ref_iota_dense->at(i / 4, i % 4) = i; + } + zero_dense->fill(0.0); + zero_dense2->fill(0.0); + iota_dense->copy_from(ref_iota_dense.get()); + zero_array.set_executor(exec); + iota_array.set_executor(exec); + iota_transp_array.set_executor(exec); + } + + std::shared_ptr exec; + gko::Array zero_array; + gko::Array iota_array; + gko::Array iota_transp_array; + std::unique_ptr> iota_dense; + std::unique_ptr> zero_dense; + std::unique_ptr> zero_dense2; + std::unique_ptr> vec_dense; +}; + + +// nvcc doesn't like device lambdas declared in complex classes, move it out +void run1d(std::shared_ptr exec, size_type dim, int *data) +{ + gko::kernels::hip::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto d) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + d[i] = i; + }, + dim, data); +} + +TEST_F(KernelLaunch, Runs1D) +{ + run1d(exec, zero_array.get_num_elems(), zero_array.get_data()); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_array); +} + + +void run1d(std::shared_ptr exec, gko::Array &data) +{ + gko::kernels::hip::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto d, auto d_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + if (d == d_ptr) { + d[i] = i; + } else { + d[i] = 0; + } + }, + data.get_num_elems(), data, data.get_const_data()); +} + +TEST_F(KernelLaunch, Runs1DArray) +{ + run1d(exec, zero_array); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_array); +} + + +void run1d(std::shared_ptr exec, gko::matrix::Dense<> *m) +{ + gko::kernels::hip::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto d, auto d2, auto d_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, + "type"); + static_assert(is_same::value, + "type"); + bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; + bool strides_correct = d.stride == 5 && d2.stride == 5; + bool accessors_2d_correct = + &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && + &d2(0, 0) == d_ptr && &d2(1, 0) == d_ptr + d.stride; + bool accessors_1d_correct = &d[0] == d_ptr && &d2[0] == d_ptr; + if (pointers_correct && strides_correct && accessors_2d_correct && + accessors_1d_correct) { + d(i / 4, i % 4) = i; + } else { + d(i / 4, i % 4) = 0; + } + }, + 16, m, static_cast *>(m), + m->get_const_values()); +} + +TEST_F(KernelLaunch, Runs1DDense) +{ + run1d(exec, zero_dense2.get()); + + GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); +} + + +void run2d(std::shared_ptr exec, int *data) +{ + gko::kernels::hip::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto j, auto d) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + d[i + 4 * j] = 4 * i + j; + }, + dim<2>{4, 4}, data); +} + +TEST_F(KernelLaunch, Runs2D) +{ + run2d(exec, zero_array.get_data()); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_transp_array); +} + + +void run2d(std::shared_ptr exec, gko::Array &data) +{ + gko::kernels::hip::run_kernel( + exec, + [] GKO_KERNEL(auto i, auto j, auto d, auto d_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + if (d == d_ptr) { + d[i + 4 * j] = 4 * i + j; + } else { + d[i + 4 * j] = 0; + } + }, + dim<2>{4, 4}, data, data.get_const_data()); +} + +TEST_F(KernelLaunch, Runs2DArray) +{ + run2d(exec, zero_array); + + GKO_ASSERT_ARRAY_EQ(zero_array, iota_transp_array); +} + + +void run2d(std::shared_ptr exec, gko::matrix::Dense<> *m1, + gko::matrix::Dense<> *m2, gko::matrix::Dense<> *m3) +{ + gko::kernels::hip::run_kernel_solver( + exec, + [] GKO_KERNEL(auto i, auto j, auto d, auto d2, auto d_ptr, auto d3, + auto d4, auto d2_ptr, auto d3_ptr) { + static_assert(is_same::value, "index"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, + "type"); + static_assert(is_same::value, + "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && + d3.data == d2_ptr && d4 == d3_ptr; + bool strides_correct = + d.stride == 5 && d2.stride == 5 && d3.stride == 6; + bool accessors_2d_correct = + &d(0, 0) == d_ptr && &d(1, 0) == d_ptr + d.stride && + &d2(0, 0) == d_ptr && &d2(1, 0) == d_ptr + d2.stride && + &d3(0, 0) == d2_ptr && &d3(1, 0) == d2_ptr + d3.stride; + bool accessors_1d_correct = + &d[0] == d_ptr && &d2[0] == d_ptr && &d3[0] == d2_ptr; + if (pointers_correct && strides_correct && accessors_2d_correct && + accessors_1d_correct) { + d(i, j) = 4 * i + j; + } else { + d(i, j) = 0; + } + }, + dim<2>{4, 4}, m2->get_stride(), m1, + static_cast *>(m1), m1->get_const_values(), + gko::kernels::hip::default_stride(m2), + gko::kernels::hip::row_vector(m3), m2->get_values(), m3->get_values()); +} + +TEST_F(KernelLaunch, Runs2DDense) +{ + run2d(exec, zero_dense2.get(), zero_dense.get(), vec_dense.get()); + + GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); +} + + +} // namespace diff --git a/hip/test/base/lin_op.hip.cpp b/hip/test/base/lin_op.hip.cpp index 1977912be9e..4e540d80081 100644 --- a/hip/test/base/lin_op.hip.cpp +++ b/hip/test/base/lin_op.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/base/math.hip.cpp b/hip/test/base/math.hip.cpp index 818506a8d25..95d626225de 100644 --- a/hip/test/base/math.hip.cpp +++ b/hip/test/base/math.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/components/CMakeLists.txt b/hip/test/components/CMakeLists.txt index b3bec2595f9..f0bae60fe60 100644 --- a/hip/test/components/CMakeLists.txt +++ b/hip/test/components/CMakeLists.txt @@ -1,3 +1,4 @@ +ginkgo_create_hip_test(absolute_array) ginkgo_create_hip_test(cooperative_groups_kernels) ginkgo_create_hip_test(fill_array) ginkgo_create_hip_test(merging_kernels) diff --git a/hip/test/components/absolute_array.hip.cpp b/hip/test/components/absolute_array.hip.cpp new file mode 100644 index 00000000000..69e567ff8a6 --- /dev/null +++ b/hip/test/components/absolute_array.hip.cpp @@ -0,0 +1,132 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/components/absolute_array.hpp" + + +#include +#include +#include + + +#include + + +#include + + +#include "core/test/utils/assertions.hpp" + + +namespace { + + +class AbsoluteArray : public ::testing::Test { +protected: + using value_type = double; + using complex_type = std::complex; + AbsoluteArray() + : ref(gko::ReferenceExecutor::create()), + exec(gko::HipExecutor::create(0, ref)), + total_size(6344), + vals(ref, total_size), + dvals(exec, total_size), + complex_vals(ref, total_size), + dcomplex_vals(exec, total_size) + { + std::fill_n(vals.get_data(), total_size, -1234.0); + dvals = vals; + std::fill_n(complex_vals.get_data(), total_size, complex_type{3, 4}); + dcomplex_vals = complex_vals; + } + + std::shared_ptr ref; + std::shared_ptr exec; + gko::size_type total_size; + gko::Array vals; + gko::Array dvals; + gko::Array complex_vals; + gko::Array dcomplex_vals; +}; + + +TEST_F(AbsoluteArray, InplaceEqualsReference) +{ + gko::kernels::hip::components::inplace_absolute_array( + exec, dvals.get_data(), total_size); + gko::kernels::reference::components::inplace_absolute_array( + ref, vals.get_data(), total_size); + + GKO_ASSERT_ARRAY_EQ(vals, dvals); +} + + +TEST_F(AbsoluteArray, InplaceComplexEqualsReference) +{ + gko::kernels::hip::components::inplace_absolute_array( + exec, dcomplex_vals.get_data(), total_size); + gko::kernels::reference::components::inplace_absolute_array( + ref, complex_vals.get_data(), total_size); + + GKO_ASSERT_ARRAY_EQ(complex_vals, dcomplex_vals); +} + + +TEST_F(AbsoluteArray, OutplaceEqualsReference) +{ + gko::Array abs_vals(ref, total_size); + gko::Array dabs_vals(exec, total_size); + + gko::kernels::hip::components::outplace_absolute_array( + exec, dvals.get_const_data(), total_size, dabs_vals.get_data()); + gko::kernels::reference::components::outplace_absolute_array( + ref, vals.get_const_data(), total_size, abs_vals.get_data()); + + GKO_ASSERT_ARRAY_EQ(abs_vals, dabs_vals); +} + + +TEST_F(AbsoluteArray, OutplaceComplexEqualsReference) +{ + gko::Array abs_vals(ref, total_size); + gko::Array dabs_vals(exec, total_size); + + gko::kernels::hip::components::outplace_absolute_array( + exec, dcomplex_vals.get_const_data(), total_size, dabs_vals.get_data()); + gko::kernels::reference::components::outplace_absolute_array( + ref, complex_vals.get_const_data(), total_size, abs_vals.get_data()); + + GKO_ASSERT_ARRAY_EQ(abs_vals, dabs_vals); +} + + +} // namespace diff --git a/hip/test/components/cooperative_groups_kernels.hip.cpp b/hip/test/components/cooperative_groups_kernels.hip.cpp index 823dcef0df1..ec1ce79e080 100644 --- a/hip/test/components/cooperative_groups_kernels.hip.cpp +++ b/hip/test/components/cooperative_groups_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/components/fill_array.hip.cpp b/hip/test/components/fill_array.hip.cpp index 1c7bfda89d0..348bee3f329 100644 --- a/hip/test/components/fill_array.hip.cpp +++ b/hip/test/components/fill_array.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -57,17 +57,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { +template class FillArray : public ::testing::Test { protected: - using value_type = double; + using value_type = T; FillArray() : ref(gko::ReferenceExecutor::create()), exec(gko::HipExecutor::create(0, ref)), total_size(6344), vals(ref, total_size), - dvals(exec, total_size) + dvals(exec, total_size), + seqs(ref, total_size) { - std::fill_n(vals.get_data(), total_size, 1234.0); + std::fill_n(vals.get_data(), total_size, T(1234)); + std::iota(seqs.get_data(), seqs.get_data() + total_size, 0); } std::shared_ptr ref; @@ -75,14 +78,29 @@ class FillArray : public ::testing::Test { gko::size_type total_size; gko::Array vals; gko::Array dvals; + gko::Array seqs; }; +TYPED_TEST_SUITE(FillArray, gko::test::ValueAndIndexTypes); -TEST_F(FillArray, EqualsReference) + +TYPED_TEST(FillArray, EqualsReference) { - gko::kernels::hip::components::fill_array(exec, dvals.get_data(), - total_size, 1234.0); - GKO_ASSERT_ARRAY_EQ(vals, dvals); + using T = typename TestFixture::value_type; + gko::kernels::hip::components::fill_array( + this->exec, this->dvals.get_data(), this->total_size, T(1234)); + + GKO_ASSERT_ARRAY_EQ(this->vals, this->dvals); +} + + +TYPED_TEST(FillArray, FillSeqEqualsReference) +{ + using T = typename TestFixture::value_type; + gko::kernels::hip::components::fill_seq_array( + this->exec, this->dvals.get_data(), this->total_size); + + GKO_ASSERT_ARRAY_EQ(this->seqs, this->dvals); } diff --git a/hip/test/components/merging_kernels.hip.cpp b/hip/test/components/merging_kernels.hip.cpp index 466c31a48b3..d5acbf2b8c4 100644 --- a/hip/test/components/merging_kernels.hip.cpp +++ b/hip/test/components/merging_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/components/precision_conversion.hip.cpp b/hip/test/components/precision_conversion.hip.cpp index a7b9713b871..f6ef67fb76c 100644 --- a/hip/test/components/precision_conversion.hip.cpp +++ b/hip/test/components/precision_conversion.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/components/prefix_sum.hip.cpp b/hip/test/components/prefix_sum.hip.cpp index 96f91522d06..69577df5a7b 100644 --- a/hip/test/components/prefix_sum.hip.cpp +++ b/hip/test/components/prefix_sum.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/components/searching_kernels.hip.cpp b/hip/test/components/searching_kernels.hip.cpp index e55855e40c3..d22c4a125e0 100644 --- a/hip/test/components/searching_kernels.hip.cpp +++ b/hip/test/components/searching_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/components/sorting_kernels.hip.cpp b/hip/test/components/sorting_kernels.hip.cpp index ca30186096c..ed65b1ea798 100644 --- a/hip/test/components/sorting_kernels.hip.cpp +++ b/hip/test/components/sorting_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -54,8 +54,8 @@ using gko::kernels::hip::bitonic_sort; using gko::kernels::hip::config; -constexpr auto num_elements = 2048; -constexpr auto num_local = 4; +constexpr int num_elements = 2048; +constexpr int num_local = 4; constexpr auto num_threads = num_elements / num_local; diff --git a/hip/test/factorization/CMakeLists.txt b/hip/test/factorization/CMakeLists.txt index da6c40ca680..998601d6b64 100644 --- a/hip/test/factorization/CMakeLists.txt +++ b/hip/test/factorization/CMakeLists.txt @@ -1,4 +1,6 @@ -ginkgo_create_hip_test_special_linkage(ilu_kernels) +ginkgo_create_test(ic_kernels) +ginkgo_create_test(ilu_kernels) +ginkgo_create_hip_test(par_ic_kernels) ginkgo_create_hip_test(par_ict_kernels) ginkgo_create_hip_test(par_ilu_kernels) ginkgo_create_hip_test(par_ilut_kernels) diff --git a/hip/test/factorization/ic_kernels.cpp b/hip/test/factorization/ic_kernels.cpp new file mode 100644 index 00000000000..34899790c7b --- /dev/null +++ b/hip/test/factorization/ic_kernels.cpp @@ -0,0 +1,147 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include + + +#include + + +#include +#include + + +#include "core/test/utils/unsort_matrix.hpp" +#include "hip/test/utils.hip.hpp" +#include "matrices/config.hpp" + + +namespace { + + +class Ic : public ::testing::Test { +protected: + using value_type = gko::default_precision; + using index_type = gko::int32; + using Csr = gko::matrix::Csr; + + std::shared_ptr ref; + std::shared_ptr hip; + std::ranlux48 rand_engine; + std::shared_ptr csr_ref; + std::shared_ptr csr_hip; + + Ic() + : ref(gko::ReferenceExecutor::create()), + hip(gko::HipExecutor::create(0, ref)), + rand_engine(6794) + {} + + void SetUp() override + { + std::string file_name(gko::matrices::location_ani4_mtx); + auto input_file = std::ifstream(file_name, std::ios::in); + if (!input_file) { + FAIL() << "Could not find the file \"" << file_name + << "\", which is required for this test.\n"; + } + csr_ref = gko::read(input_file, ref); + csr_hip = Csr::create(hip); + csr_hip->copy_from(gko::lend(csr_ref)); + } +}; + + +TEST_F(Ic, ComputeICIsEquivalentToRefSorted) +{ + auto ref_fact = gko::factorization::ParIc<>::build() + .with_skip_sorting(true) + .on(ref) + ->generate(csr_ref); + auto hip_fact = gko::factorization::Ic<>::build() + .with_skip_sorting(true) + .on(hip) + ->generate(csr_hip); + + GKO_ASSERT_MTX_NEAR(ref_fact->get_l_factor(), hip_fact->get_l_factor(), + 1e-14); + GKO_ASSERT_MTX_NEAR(ref_fact->get_lt_factor(), hip_fact->get_lt_factor(), + 1e-14); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_l_factor(), + hip_fact->get_l_factor()); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_lt_factor(), + hip_fact->get_lt_factor()); +} + + +TEST_F(Ic, ComputeICIsEquivalentToRefUnsorted) +{ + gko::test::unsort_matrix(gko::lend(csr_ref), rand_engine); + csr_hip->copy_from(gko::lend(csr_ref)); + + auto ref_fact = + gko::factorization::ParIc<>::build().on(ref)->generate(csr_ref); + auto hip_fact = + gko::factorization::Ic<>::build().on(hip)->generate(csr_hip); + + GKO_ASSERT_MTX_NEAR(ref_fact->get_l_factor(), hip_fact->get_l_factor(), + 1e-14); + GKO_ASSERT_MTX_NEAR(ref_fact->get_lt_factor(), hip_fact->get_lt_factor(), + 1e-14); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_l_factor(), + hip_fact->get_l_factor()); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_lt_factor(), + hip_fact->get_lt_factor()); +} + + +TEST_F(Ic, SetsCorrectStrategy) +{ + auto hip_fact = gko::factorization::Ic<>::build() + .with_l_strategy(std::make_shared()) + .on(hip) + ->generate(csr_hip); + + ASSERT_EQ(hip_fact->get_l_factor()->get_strategy()->get_name(), + "merge_path"); + ASSERT_EQ(hip_fact->get_lt_factor()->get_strategy()->get_name(), + "merge_path"); +} + + +} // namespace diff --git a/hip/test/factorization/ilu_kernels.cpp b/hip/test/factorization/ilu_kernels.cpp index b0bffcdd430..e8c7ec41d6f 100644 --- a/hip/test/factorization/ilu_kernels.cpp +++ b/hip/test/factorization/ilu_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/test/utils/unsort_matrix.hpp" #include "hip/test/utils.hip.hpp" #include "matrices/config.hpp" @@ -61,12 +62,14 @@ class Ilu : public ::testing::Test { std::shared_ptr ref; std::shared_ptr hip; + std::ranlux48 rand_engine; std::shared_ptr csr_ref; std::shared_ptr csr_hip; Ilu() : ref(gko::ReferenceExecutor::create()), - hip(gko::HipExecutor::create(0, ref)) + hip(gko::HipExecutor::create(0, ref)), + rand_engine(1337) {} void SetUp() override @@ -84,8 +87,33 @@ class Ilu : public ::testing::Test { }; -TEST_F(Ilu, ComputeILUIsEquivalentToRef) +TEST_F(Ilu, ComputeILUIsEquivalentToRefSorted) { + auto ref_fact = gko::factorization::ParIlu<>::build() + .with_skip_sorting(true) + .on(ref) + ->generate(csr_ref); + auto hip_fact = gko::factorization::Ilu<>::build() + .with_skip_sorting(true) + .on(hip) + ->generate(csr_hip); + + GKO_ASSERT_MTX_NEAR(ref_fact->get_l_factor(), hip_fact->get_l_factor(), + 1e-14); + GKO_ASSERT_MTX_NEAR(ref_fact->get_u_factor(), hip_fact->get_u_factor(), + 1e-14); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_l_factor(), + hip_fact->get_l_factor()); + GKO_ASSERT_MTX_EQ_SPARSITY(ref_fact->get_u_factor(), + hip_fact->get_u_factor()); +} + + +TEST_F(Ilu, ComputeILUIsEquivalentToRefUnsorted) +{ + gko::test::unsort_matrix(gko::lend(csr_ref), rand_engine); + csr_hip->copy_from(gko::lend(csr_ref)); + auto ref_fact = gko::factorization::ParIlu<>::build().on(ref)->generate(csr_ref); auto hip_fact = diff --git a/hip/test/factorization/par_ic_kernels.hip.cpp b/hip/test/factorization/par_ic_kernels.hip.cpp new file mode 100644 index 00000000000..e58b4da93e4 --- /dev/null +++ b/hip/test/factorization/par_ic_kernels.hip.cpp @@ -0,0 +1,167 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/factorization/par_ic_kernels.hpp" + + +#include +#include +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include + + +#include "core/factorization/factorization_kernels.hpp" +#include "core/matrix/csr_builder.hpp" +#include "core/matrix/csr_kernels.hpp" +#include "hip/test/utils.hip.hpp" +#include "matrices/config.hpp" + + +namespace { + + +class ParIc : public ::testing::Test { +protected: + using value_type = double; + using index_type = gko::int32; + using Coo = gko::matrix::Coo; + using Csr = gko::matrix::Csr; + + ParIc() + : mtx_size(585, 585), + rand_engine(10667), + ref(gko::ReferenceExecutor::create()), + hip(gko::HipExecutor::create(0, gko::ReferenceExecutor::create())) + { + mtx_l = gko::test::generate_random_lower_triangular_matrix( + mtx_size[0], mtx_size[0], false, + std::uniform_int_distribution(10, mtx_size[0]), + std::normal_distribution>(0, 10.0), + rand_engine, ref); + + dmtx_ani = Csr::create(hip); + dmtx_l_ani = Csr::create(hip); + dmtx_l_ani_init = Csr::create(hip); + dmtx_l = Csr::create(hip); + dmtx_l->copy_from(lend(mtx_l)); + } + + void SetUp() + { + std::string file_name(gko::matrices::location_ani4_mtx); + auto input_file = std::ifstream(file_name, std::ios::in); + if (!input_file) { + FAIL() << "Could not find the file \"" << file_name + << "\", which is required for this test.\n"; + } + mtx_ani = gko::read(input_file, ref); + mtx_ani->sort_by_column_index(); + + { + mtx_l_ani = Csr::create(ref, mtx_ani->get_size()); + gko::matrix::CsrBuilder l_builder( + lend(mtx_l_ani)); + gko::kernels::reference::factorization::initialize_row_ptrs_l( + ref, lend(mtx_ani), mtx_l_ani->get_row_ptrs()); + auto l_nnz = + mtx_l_ani->get_const_row_ptrs()[mtx_ani->get_size()[0]]; + l_builder.get_col_idx_array().resize_and_reset(l_nnz); + l_builder.get_value_array().resize_and_reset(l_nnz); + gko::kernels::reference::factorization::initialize_l( + ref, lend(mtx_ani), lend(mtx_l_ani), false); + mtx_l_ani_init = Csr::create(ref); + mtx_l_ani_init->copy_from(lend(mtx_l_ani)); + gko::kernels::reference::par_ic_factorization::init_factor( + ref, lend(mtx_l_ani_init)); + } + dmtx_ani->copy_from(lend(mtx_ani)); + dmtx_l_ani->copy_from(lend(mtx_l_ani)); + dmtx_l_ani_init->copy_from(lend(mtx_l_ani_init)); + } + + std::shared_ptr ref; + std::shared_ptr hip; + + const gko::dim<2> mtx_size; + std::default_random_engine rand_engine; + + std::unique_ptr mtx_l; + std::unique_ptr mtx_ani; + std::unique_ptr mtx_l_ani; + std::unique_ptr mtx_l_ani_init; + + std::unique_ptr dmtx_l; + std::unique_ptr dmtx_ani; + std::unique_ptr dmtx_l_ani; + std::unique_ptr dmtx_l_ani_init; +}; + + +TEST_F(ParIc, KernelInitFactorIsEquivalentToRef) +{ + gko::kernels::reference::par_ic_factorization::init_factor(ref, + lend(mtx_l)); + gko::kernels::hip::par_ic_factorization::init_factor(hip, lend(dmtx_l)); + + GKO_ASSERT_MTX_NEAR(mtx_l, dmtx_l, r::value); +} + + +TEST_F(ParIc, KernelComputeFactorIsEquivalentToRef) +{ + auto square_size = mtx_ani->get_size(); + auto mtx_l_coo = Coo::create(ref, square_size); + mtx_l_ani->convert_to(lend(mtx_l_coo)); + auto dmtx_l_coo = Coo::create(hip, square_size); + dmtx_l_coo->copy_from(lend(mtx_l_coo)); + + gko::kernels::reference::par_ic_factorization::compute_factor( + ref, 1, lend(mtx_l_coo), lend(mtx_l_ani_init)); + gko::kernels::hip::par_ic_factorization::compute_factor( + hip, 100, lend(dmtx_l_coo), lend(dmtx_l_ani_init)); + + GKO_ASSERT_MTX_NEAR(mtx_l_ani_init, dmtx_l_ani_init, 1e-4); +} + + +} // namespace diff --git a/hip/test/factorization/par_ict_kernels.hip.cpp b/hip/test/factorization/par_ict_kernels.hip.cpp index b8858dadaa4..6e334da4939 100644 --- a/hip/test/factorization/par_ict_kernels.hip.cpp +++ b/hip/test/factorization/par_ict_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -138,17 +138,17 @@ class ParIct : public ::testing::Test { TEST_F(ParIct, KernelAddCandidatesIsEquivalentToRef) { - auto mtx_llt = Csr::create(ref, mtx_size); - mtx_l->apply(lend(mtx_l->transpose()), lend(mtx_llt)); - auto dmtx_llt = Csr::create(hip, mtx_size); - dmtx_llt->copy_from(lend(mtx_llt)); + auto mtx_llh = Csr::create(ref, mtx_size); + mtx_l->apply(lend(mtx_l->transpose()), lend(mtx_llh)); + auto dmtx_llh = Csr::create(hip, mtx_size); + dmtx_llh->copy_from(lend(mtx_llh)); auto res_mtx_l = Csr::create(ref, mtx_size); auto dres_mtx_l = Csr::create(hip, mtx_size); gko::kernels::reference::par_ict_factorization::add_candidates( - ref, lend(mtx_llt), lend(mtx), lend(mtx_l), lend(res_mtx_l)); + ref, lend(mtx_llh), lend(mtx), lend(mtx_l), lend(res_mtx_l)); gko::kernels::hip::par_ict_factorization::add_candidates( - hip, lend(dmtx_llt), lend(dmtx), lend(dmtx_l), lend(dres_mtx_l)); + hip, lend(dmtx_llh), lend(dmtx), lend(dmtx_l), lend(dres_mtx_l)); GKO_ASSERT_MTX_EQ_SPARSITY(res_mtx_l, dres_mtx_l); GKO_ASSERT_MTX_NEAR(res_mtx_l, dres_mtx_l, 1e-14); diff --git a/hip/test/factorization/par_ilu_kernels.hip.cpp b/hip/test/factorization/par_ilu_kernels.hip.cpp index 96dffed19e1..98811f97af3 100644 --- a/hip/test/factorization/par_ilu_kernels.hip.cpp +++ b/hip/test/factorization/par_ilu_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/factorization/par_ilut_kernels.hip.cpp b/hip/test/factorization/par_ilut_kernels.hip.cpp index 38fb5eb205f..1a362c3717e 100644 --- a/hip/test/factorization/par_ilut_kernels.hip.cpp +++ b/hip/test/factorization/par_ilut_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -72,7 +72,11 @@ class ParIlut : public ::testing::Test { using ComplexCsr = gko::matrix::Csr, index_type>; ParIlut() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 231), +#else : mtx_size(500, 700), +#endif rand_engine(1337), ref(gko::ReferenceExecutor::create()), hip(gko::HipExecutor::create(0, ref)) diff --git a/hip/test/matrix/CMakeLists.txt b/hip/test/matrix/CMakeLists.txt index 94e92f08f5c..4a32b5272f7 100644 --- a/hip/test/matrix/CMakeLists.txt +++ b/hip/test/matrix/CMakeLists.txt @@ -3,5 +3,6 @@ ginkgo_create_hip_test(csr_kernels) ginkgo_create_hip_test(dense_kernels) ginkgo_create_hip_test(diagonal_kernels) ginkgo_create_hip_test(ell_kernels) +ginkgo_create_hip_test(fbcsr_kernels) ginkgo_create_hip_test(hybrid_kernels) ginkgo_create_hip_test(sellp_kernels) diff --git a/hip/test/matrix/coo_kernels.hip.cpp b/hip/test/matrix/coo_kernels.hip.cpp index 3e23bb6f4a4..aeaa9a50206 100644 --- a/hip/test/matrix/coo_kernels.hip.cpp +++ b/hip/test/matrix/coo_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/coo_kernels.hpp" +#include "core/test/utils/unsort_matrix.hpp" #include "hip/test/utils.hip.hpp" @@ -58,6 +59,7 @@ class Coo : public ::testing::Test { protected: using Mtx = gko::matrix::Coo<>; using Vec = gko::matrix::Dense<>; + using ComplexVec = gko::matrix::Dense>; Coo() : rand_engine(42) {} @@ -75,9 +77,10 @@ class Coo : public ::testing::Test { } } - std::unique_ptr gen_mtx(int num_rows, int num_cols) + template + std::unique_ptr gen_mtx(int num_rows, int num_cols) { - return gko::test::generate_random_matrix( + return gko::test::generate_random_matrix( num_rows, num_cols, std::uniform_int_distribution<>(1, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } @@ -102,6 +105,12 @@ class Coo : public ::testing::Test { dbeta->copy_from(beta.get()); } + void unsort_mtx() + { + gko::test::unsort_matrix(mtx.get(), rand_engine); + dmtx->copy_from(mtx.get()); + } + std::shared_ptr ref; std::shared_ptr hip; @@ -133,6 +142,36 @@ TEST_F(Coo, SimpleApplyIsEquivalentToRef) } +TEST_F(Coo, SimpleApplyDoesntOverwritePadding) +{ + set_up_apply_data(); + auto dresult_padded = + Vec::create(hip, dresult->get_size(), dresult->get_stride() + 1); + dresult_padded->copy_from(dresult.get()); + double padding_val{1234.0}; + hip->copy_from(hip->get_master().get(), 1, &padding_val, + dresult_padded->get_values() + 1); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult_padded.get()); + + GKO_ASSERT_MTX_NEAR(dresult_padded, expected, 1e-14); + ASSERT_EQ(hip->copy_val_to_host(dresult_padded->get_values() + 1), 1234.0); +} + + +TEST_F(Coo, SimpleApplyIsEquivalentToRefUnsorted) +{ + set_up_apply_data(); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Coo, AdvancedApplyIsEquivalentToRef) { set_up_apply_data(); @@ -144,6 +183,24 @@ TEST_F(Coo, AdvancedApplyIsEquivalentToRef) } +TEST_F(Coo, AdvancedApplyDoesntOverwritePadding) +{ + set_up_apply_data(); + auto dresult_padded = + Vec::create(hip, dresult->get_size(), dresult->get_stride() + 1); + dresult_padded->copy_from(dresult.get()); + double padding_val{1234.0}; + hip->copy_from(hip->get_master().get(), 1, &padding_val, + dresult_padded->get_values() + 1); + + mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult_padded.get()); + + GKO_ASSERT_MTX_NEAR(dresult_padded, expected, 1e-14); + ASSERT_EQ(hip->copy_val_to_host(dresult_padded->get_values() + 1), 1234.0); +} + + TEST_F(Coo, SimpleApplyAddIsEquivalentToRef) { set_up_apply_data(); @@ -232,6 +289,57 @@ TEST_F(Coo, AdvancedApplyAddToLargeDenseMatrixIsEquivalentToRef) } +TEST_F(Coo, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Coo, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Coo, ApplyAddToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply2(alpha.get(), complex_b.get(), complex_x.get()); + dmtx->apply2(dalpha.get(), dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + TEST_F(Coo, ConvertToDenseIsEquivalentToRef) { set_up_apply_data(); @@ -271,4 +379,26 @@ TEST_F(Coo, ExtractDiagonalIsEquivalentToRef) } +TEST_F(Coo, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +} + + +TEST_F(Coo, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +} + + } // namespace diff --git a/hip/test/matrix/csr_kernels.hip.cpp b/hip/test/matrix/csr_kernels.hip.cpp index 14ec43d6a4a..7a4e7a28ade 100644 --- a/hip/test/matrix/csr_kernels.hip.cpp +++ b/hip/test/matrix/csr_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -52,6 +52,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/csr_kernels.hpp" +#include "core/test/utils/unsort_matrix.hpp" #include "hip/test/utils.hip.hpp" @@ -60,10 +61,20 @@ namespace { class Csr : public ::testing::Test { protected: - using Mtx = gko::matrix::Csr<>; + using Arr = gko::Array; using Vec = gko::matrix::Dense<>; - - Csr() : mtx_size(532, 231), rand_engine(42) {} + using Mtx = gko::matrix::Csr<>; + using ComplexVec = gko::matrix::Dense>; + using ComplexMtx = gko::matrix::Csr>; + + Csr() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 231), +#else + : mtx_size(532, 231), +#endif + rand_engine(42) + {} void SetUp() { @@ -112,38 +123,32 @@ class Csr : public ::testing::Test { dalpha->copy_from(alpha.get()); dbeta = Vec::create(hip); dbeta->copy_from(beta.get()); - } - struct matrix_pair { - std::unique_ptr ref; - std::unique_ptr hip; - }; + std::vector tmp(mtx->get_size()[0], 0); + auto rng = std::default_random_engine{}; + std::iota(tmp.begin(), tmp.end(), 0); + std::shuffle(tmp.begin(), tmp.end(), rng); + std::vector tmp2(mtx->get_size()[1], 0); + std::iota(tmp2.begin(), tmp2.end(), 0); + std::shuffle(tmp2.begin(), tmp2.end(), rng); + rpermute_idxs = std::make_unique(ref, tmp.begin(), tmp.end()); + cpermute_idxs = std::make_unique(ref, tmp2.begin(), tmp2.end()); + } - matrix_pair gen_unsorted_mtx() + void set_up_apply_complex_data( + std::shared_ptr strategy) { - constexpr int min_nnz_per_row = 2; // Must be at least 2 - auto local_mtx_ref = - gen_mtx(mtx_size[0], mtx_size[1], min_nnz_per_row); - for (size_t row = 0; row < mtx_size[0]; ++row) { - const auto row_ptrs = local_mtx_ref->get_const_row_ptrs(); - const auto start_row = row_ptrs[row]; - auto col_idx = local_mtx_ref->get_col_idxs() + start_row; - auto vals = local_mtx_ref->get_values() + start_row; - const auto nnz_in_this_row = row_ptrs[row + 1] - row_ptrs[row]; - auto swap_idx_dist = - std::uniform_int_distribution<>(0, nnz_in_this_row - 1); - // shuffle `nnz_in_this_row / 2` times - for (size_t perm = 0; perm < nnz_in_this_row; perm += 2) { - const auto idx1 = swap_idx_dist(rand_engine); - const auto idx2 = swap_idx_dist(rand_engine); - std::swap(col_idx[idx1], col_idx[idx2]); - std::swap(vals[idx1], vals[idx2]); - } - } - auto local_mtx_hip = Mtx::create(hip); - local_mtx_hip->copy_from(local_mtx_ref.get()); + complex_mtx = ComplexMtx::create(ref, strategy); + complex_mtx->copy_from( + gen_mtx(mtx_size[0], mtx_size[1], 1)); + complex_dmtx = ComplexMtx::create(hip, strategy); + complex_dmtx->copy_from(complex_mtx.get()); + } - return {std::move(local_mtx_ref), std::move(local_mtx_hip)}; + void unsort_mtx() + { + gko::test::unsort_matrix(mtx.get(), rand_engine); + dmtx->copy_from(mtx.get()); } std::shared_ptr ref; @@ -153,6 +158,7 @@ class Csr : public ::testing::Test { std::ranlux48 rand_engine; std::unique_ptr mtx; + std::unique_ptr complex_mtx; std::unique_ptr square_mtx; std::unique_ptr expected; std::unique_ptr y; @@ -160,11 +166,14 @@ class Csr : public ::testing::Test { std::unique_ptr beta; std::unique_ptr dmtx; + std::unique_ptr complex_dmtx; std::unique_ptr square_dmtx; std::unique_ptr dresult; std::unique_ptr dy; std::unique_ptr dalpha; std::unique_ptr dbeta; + std::unique_ptr rpermute_idxs; + std::unique_ptr cpermute_idxs; }; @@ -188,6 +197,18 @@ TEST_F(Csr, SimpleApplyIsEquivalentToRefWithLoadBalance) } +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithLoadBalanceUnsorted) +{ + set_up_apply_data(std::make_shared(hip)); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithLoadBalance) { set_up_apply_data(std::make_shared(hip)); @@ -210,6 +231,18 @@ TEST_F(Csr, SimpleApplyIsEquivalentToRefWithHipsparse) } +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithHipsparseUnsorted) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithHipsparse) { set_up_apply_data(std::make_shared()); @@ -232,6 +265,18 @@ TEST_F(Csr, SimpleApplyIsEquivalentToRefWithMergePath) } +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithMergePathUnsorted) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithMergePath) { set_up_apply_data(std::make_shared()); @@ -254,6 +299,18 @@ TEST_F(Csr, SimpleApplyIsEquivalentToRefWithClassical) } +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithClassicalUnsorted) +{ + set_up_apply_data(std::make_shared()); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Csr, AdvancedApplyIsEquivalentToRefWithClassical) { set_up_apply_data(std::make_shared()); @@ -276,6 +333,18 @@ TEST_F(Csr, SimpleApplyIsEquivalentToRefWithAutomatical) } +TEST_F(Csr, SimpleApplyIsEquivalentToRefWithAutomaticalUnsorted) +{ + set_up_apply_data(std::make_shared(hip)); + unsort_mtx(); + + mtx->apply(y.get(), expected.get()); + dmtx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + TEST_F(Csr, SimpleApplyToDenseMatrixIsEquivalentToRefWithLoadBalance) { set_up_apply_data(std::make_shared(hip), 3); @@ -393,16 +462,61 @@ TEST_F(Csr, AdvancedApplyToIdentityMatrixIsEquivalentToRef) } +TEST_F(Csr, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(hip)); + auto complex_b = gen_mtx(this->mtx_size[1], 3, 1); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(this->mtx_size[0], 3, 1); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Csr, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(hip)); + auto complex_b = gen_mtx(this->mtx_size[1], 3, 1); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(this->mtx_size[0], 3, 1); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + TEST_F(Csr, TransposeIsEquivalentToRef) { set_up_apply_data(std::make_shared(hip)); - auto trans = mtx->transpose(); - auto d_trans = dmtx->transpose(); + auto trans = gko::as(mtx->transpose()); + auto d_trans = gko::as(dmtx->transpose()); - GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), - static_cast(trans.get()), 0.0); - ASSERT_TRUE(static_cast(d_trans.get())->is_sorted_by_column_index()); + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); +} + + +TEST_F(Csr, ConjugateTransposeIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(hip)); + + auto trans = gko::as(mtx->conj_transpose()); + auto d_trans = gko::as(dmtx->conj_transpose()); + + GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0); + ASSERT_TRUE(d_trans->is_sorted_by_column_index()); } @@ -625,6 +739,86 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) } +TEST_F(Csr, IsPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto permuted = gko::as(square_mtx->permute(rpermute_idxs.get())); + auto dpermuted = gko::as(square_dmtx->permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + +TEST_F(Csr, IsInversePermutable) +{ + set_up_apply_data(std::make_shared()); + + auto permuted = + gko::as(square_mtx->inverse_permute(rpermute_idxs.get())); + auto dpermuted = + gko::as(square_dmtx->inverse_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + +TEST_F(Csr, IsRowPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto r_permute = gko::as(mtx->row_permute(rpermute_idxs.get())); + auto dr_permute = gko::as(dmtx->row_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(r_permute, dr_permute); + GKO_ASSERT_MTX_NEAR(r_permute, dr_permute, 0); +} + + +TEST_F(Csr, IsColPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto c_permute = gko::as(mtx->column_permute(cpermute_idxs.get())); + auto dc_permute = gko::as(dmtx->column_permute(cpermute_idxs.get())); + + ASSERT_TRUE(dc_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(c_permute, dc_permute); + GKO_ASSERT_MTX_NEAR(c_permute, dc_permute, 0); +} + + +TEST_F(Csr, IsInverseRowPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto inverse_r_permute = + gko::as(mtx->inverse_row_permute(rpermute_idxs.get())); + auto d_inverse_r_permute = + gko::as(dmtx->inverse_row_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_r_permute, d_inverse_r_permute); + GKO_ASSERT_MTX_NEAR(inverse_r_permute, d_inverse_r_permute, 0); +} + + +TEST_F(Csr, IsInverseColPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto inverse_c_permute = + gko::as(mtx->inverse_column_permute(cpermute_idxs.get())); + auto d_inverse_c_permute = + gko::as(dmtx->inverse_column_permute(cpermute_idxs.get())); + + ASSERT_TRUE(d_inverse_c_permute->is_sorted_by_column_index()); + GKO_ASSERT_MTX_EQ_SPARSITY(inverse_c_permute, d_inverse_c_permute); + GKO_ASSERT_MTX_NEAR(inverse_c_permute, d_inverse_c_permute, 0); +} + + TEST_F(Csr, RecognizeSortedMatrixIsEquivalentToRef) { set_up_apply_data(std::make_shared(hip)); @@ -640,12 +834,13 @@ TEST_F(Csr, RecognizeSortedMatrixIsEquivalentToRef) TEST_F(Csr, RecognizeUnsortedMatrixIsEquivalentToRef) { - auto uns_mtx = gen_unsorted_mtx(); + set_up_apply_data(std::make_shared()); + unsort_mtx(); bool is_sorted_hip{}; bool is_sorted_ref{}; - is_sorted_ref = uns_mtx.ref->is_sorted_by_column_index(); - is_sorted_hip = uns_mtx.hip->is_sorted_by_column_index(); + is_sorted_ref = mtx->is_sorted_by_column_index(); + is_sorted_hip = dmtx->is_sorted_by_column_index(); ASSERT_EQ(is_sorted_ref, is_sorted_hip); } @@ -665,13 +860,14 @@ TEST_F(Csr, SortSortedMatrixIsEquivalentToRef) TEST_F(Csr, SortUnsortedMatrixIsEquivalentToRef) { - auto uns_mtx = gen_unsorted_mtx(); + set_up_apply_data(std::make_shared()); + unsort_mtx(); - uns_mtx.ref->sort_by_column_index(); - uns_mtx.hip->sort_by_column_index(); + mtx->sort_by_column_index(); + dmtx->sort_by_column_index(); // Values must be unchanged, therefore, tolerance is `0` - GKO_ASSERT_MTX_NEAR(uns_mtx.ref, uns_mtx.hip, 0); + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 0); } @@ -711,4 +907,48 @@ TEST_F(Csr, ExtractDiagonalIsEquivalentToRef) } +TEST_F(Csr, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(hip)); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +} + + +TEST_F(Csr, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(std::make_shared(hip)); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +} + + +TEST_F(Csr, InplaceAbsoluteComplexMatrixIsEquivalentToRef) +{ + set_up_apply_complex_data(std::make_shared(hip)); + + complex_mtx->compute_absolute_inplace(); + complex_dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(complex_mtx, complex_dmtx, 1e-14); +} + + +TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef) +{ + set_up_apply_complex_data(std::make_shared(hip)); + + auto abs_mtx = complex_mtx->compute_absolute(); + auto dabs_mtx = complex_dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +} + + } // namespace diff --git a/hip/test/matrix/dense_kernels.hip.cpp b/hip/test/matrix/dense_kernels.hip.cpp index 40de01b14e8..b736dc30117 100644 --- a/hip/test/matrix/dense_kernels.hip.cpp +++ b/hip/test/matrix/dense_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -61,8 +61,13 @@ class Dense : public ::testing::Test { using itype = int; using vtype = double; using Mtx = gko::matrix::Dense; + using MixedMtx = gko::matrix::Dense>; using NormVector = gko::matrix::Dense>; using Arr = gko::Array; + using ComplexMtx = gko::matrix::Dense>; + using Diagonal = gko::matrix::Diagonal; + using MixedComplexMtx = + gko::matrix::Dense>>; Dense() : rand_engine(15) {} @@ -116,6 +121,7 @@ class Dense : public ::testing::Test { expected = gen_mtx(65, 35); alpha = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); + square = gen_mtx(x->get_size()[0], x->get_size()[0]); dx = Mtx::create(hip); dx->copy_from(x.get()); dy = Mtx::create(hip); @@ -126,6 +132,8 @@ class Dense : public ::testing::Test { dalpha->copy_from(alpha.get()); dbeta = Mtx::create(hip); dbeta->copy_from(beta.get()); + dsquare = Mtx::create(hip); + dsquare->copy_from(square.get()); std::vector tmp(x->get_size()[0], 0); auto rng = std::default_random_engine{}; @@ -134,14 +142,25 @@ class Dense : public ::testing::Test { std::vector tmp2(x->get_size()[1], 0); std::iota(tmp2.begin(), tmp2.end(), 0); std::shuffle(tmp2.begin(), tmp2.end(), rng); + std::vector tmp3(x->get_size()[0] / 10); + std::uniform_int_distribution row_dist(0, x->get_size()[0] - 1); + for (auto &i : tmp3) { + i = row_dist(rng); + } rpermute_idxs = std::unique_ptr(new Arr{ref, tmp.begin(), tmp.end()}); - drpermute_idxs = - std::unique_ptr(new Arr{hip, tmp.begin(), tmp.end()}); cpermute_idxs = std::unique_ptr(new Arr{ref, tmp2.begin(), tmp2.end()}); - dcpermute_idxs = - std::unique_ptr(new Arr{hip, tmp2.begin(), tmp2.end()}); + rgather_idxs = + std::unique_ptr(new Arr{ref, tmp3.begin(), tmp3.end()}); + } + + template + std::unique_ptr convert(InputType &&input) + { + auto result = ConvertedType::create(input->get_executor()); + input->convert_to(result.get()); + return result; } std::shared_ptr ref; @@ -154,176 +173,228 @@ class Dense : public ::testing::Test { std::unique_ptr alpha; std::unique_ptr beta; std::unique_ptr expected; + std::unique_ptr square; std::unique_ptr dresult; std::unique_ptr dx; std::unique_ptr dy; std::unique_ptr dalpha; std::unique_ptr dbeta; + std::unique_ptr dsquare; std::unique_ptr rpermute_idxs; - std::unique_ptr drpermute_idxs; std::unique_ptr cpermute_idxs; - std::unique_ptr dcpermute_idxs; + std::unique_ptr rgather_idxs; }; -TEST_F(Dense, SingleVectorHipScaleIsEquivalentToRef) +TEST_F(Dense, SingleVectorHipComputeDotIsEquivalentToRef) { set_up_vector_data(1); - auto result = Mtx::create(ref); - x->scale(alpha.get()); - dx->scale(dalpha.get()); - result->copy_from(dx.get()); + x->compute_dot(y.get(), expected.get()); + dx->compute_dot(dy.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(result, x, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, MultipleVectorHipScaleIsEquivalentToRef) +TEST_F(Dense, MultipleVectorHipComputeDotIsEquivalentToRef) { set_up_vector_data(20); - x->scale(alpha.get()); - dx->scale(dalpha.get()); + x->compute_dot(y.get(), expected.get()); + dx->compute_dot(dy.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(dx, x, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, MultipleVectorHipScaleWithDifferentAlphaIsEquivalentToRef) +TEST_F(Dense, SingleVectorHipComputeConjDotIsEquivalentToRef) { - set_up_vector_data(20, true); + set_up_vector_data(1); - x->scale(alpha.get()); - dx->scale(dalpha.get()); + x->compute_conj_dot(y.get(), expected.get()); + dx->compute_conj_dot(dy.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(dx, x, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, SingleVectorHipAddScaledIsEquivalentToRef) +TEST_F(Dense, MultipleVectorHipComputeConjDotIsEquivalentToRef) { - set_up_vector_data(1); + set_up_vector_data(20); - x->add_scaled(alpha.get(), y.get()); - dx->add_scaled(dalpha.get(), dy.get()); + x->compute_conj_dot(y.get(), expected.get()); + dx->compute_conj_dot(dy.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(dx, x, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, MultipleVectorHipAddScaledIsEquivalentToRef) +TEST_F(Dense, HipComputeNorm2IsEquivalentToRef) { set_up_vector_data(20); + auto norm_size = gko::dim<2>{1, x->get_size()[1]}; + auto norm_expected = NormVector::create(this->ref, norm_size); + auto dnorm = NormVector::create(this->hip, norm_size); - x->add_scaled(alpha.get(), y.get()); - dx->add_scaled(dalpha.get(), dy.get()); + x->compute_norm2(norm_expected.get()); + dx->compute_norm2(dnorm.get()); - GKO_ASSERT_MTX_NEAR(dx, x, 1e-14); + GKO_ASSERT_MTX_NEAR(norm_expected, dnorm, 1e-14); } -TEST_F(Dense, MultipleVectorHipAddScaledWithDifferentAlphaIsEquivalentToRef) +TEST_F(Dense, SimpleApplyIsEquivalentToRef) { - set_up_vector_data(20); + set_up_apply_data(); - x->add_scaled(alpha.get(), y.get()); - dx->add_scaled(dalpha.get(), dy.get()); + x->apply(y.get(), expected.get()); + dx->apply(dy.get(), dresult.get()); - GKO_ASSERT_MTX_NEAR(dx, x, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, AddsScaledDiagIsEquivalentToRef) +TEST_F(Dense, SimpleApplyMixedIsEquivalentToRef) { - auto mat = gen_mtx(532, 532); - gko::Array diag_values(ref, 532); - gko::kernels::reference::components::fill_array(ref, diag_values.get_data(), - 532, Mtx::value_type{2.0}); - auto diag = - gko::matrix::Diagonal::create(ref, 532, diag_values); - alpha = gko::initialize({2.0}, ref); - auto dmat = Mtx::create(hip); - dmat->copy_from(mat.get()); - auto ddiag = gko::matrix::Diagonal::create(hip); - ddiag->copy_from(diag.get()); - dalpha = Mtx::create(hip); - dalpha->copy_from(alpha.get()); - - mat->add_scaled(alpha.get(), diag.get()); - dmat->add_scaled(dalpha.get(), ddiag.get()); - - GKO_ASSERT_MTX_NEAR(mat, dmat, 1e-14); + set_up_apply_data(); + + x->apply(convert(y).get(), convert(expected).get()); + dx->apply(convert(dy).get(), convert(dresult).get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-7); } -TEST_F(Dense, SingleVectorHipComputeDotIsEquivalentToRef) +TEST_F(Dense, AdvancedApplyIsEquivalentToRef) { - set_up_vector_data(1); + set_up_apply_data(); - x->compute_dot(y.get(), expected.get()); - dx->compute_dot(dy.get(), dresult.get()); + x->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Dense, MultipleVectorHipComputeDotIsEquivalentToRef) +TEST_F(Dense, AdvancedApplyMixedIsEquivalentToRef) { - set_up_vector_data(20); + set_up_apply_data(); - x->compute_dot(y.get(), expected.get()); - dx->compute_dot(dy.get(), dresult.get()); + x->apply(convert(alpha).get(), convert(y).get(), + convert(beta).get(), convert(expected).get()); + dx->apply(convert(dalpha).get(), convert(dy).get(), + convert(dbeta).get(), convert(dresult).get()); - GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-7); } -TEST_F(Dense, HipComputeNorm2IsEquivalentToRef) +TEST_F(Dense, ApplyToComplexIsEquivalentToRef) { - set_up_vector_data(20); - auto norm_size = gko::dim<2>{1, x->get_size()[1]}; - auto norm_expected = NormVector::create(this->ref, norm_size); - auto dnorm = NormVector::create(this->hip, norm_size); + set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = ComplexMtx::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = ComplexMtx::create(hip); + dcomplex_x->copy_from(complex_x.get()); - x->compute_norm2(norm_expected.get()); - dx->compute_norm2(dnorm.get()); + x->apply(complex_b.get(), complex_x.get()); + dx->apply(dcomplex_b.get(), dcomplex_x.get()); - GKO_ASSERT_MTX_NEAR(norm_expected, dnorm, 1e-14); + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); } -TEST_F(Dense, SimpleApplyIsEquivalentToRef) +TEST_F(Dense, ApplyToMixedComplexIsEquivalentToRef) { set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = MixedComplexMtx::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = MixedComplexMtx::create(hip); + dcomplex_x->copy_from(complex_x.get()); - x->apply(y.get(), expected.get()); - dx->apply(dy.get(), dresult.get()); + x->apply(complex_b.get(), complex_x.get()); + dx->apply(dcomplex_b.get(), dcomplex_x.get()); - GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-7); } -TEST_F(Dense, AdvancedApplyIsEquivalentToRef) +TEST_F(Dense, AdvancedApplyToComplexIsEquivalentToRef) { set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = ComplexMtx::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = ComplexMtx::create(hip); + dcomplex_x->copy_from(complex_x.get()); - x->apply(alpha.get(), y.get(), beta.get(), expected.get()); - dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + x->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); - GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); } -TEST_F(Dense, IsTransposable) +TEST_F(Dense, AdvancedApplyToMixedComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(25, 1); + auto dcomplex_b = MixedComplexMtx::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(65, 1); + auto dcomplex_x = MixedComplexMtx::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + x->apply(convert(alpha).get(), complex_b.get(), + convert(beta).get(), complex_x.get()); + dx->apply(convert(dalpha).get(), dcomplex_b.get(), + convert(dbeta).get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-7); +} + + +TEST_F(Dense, ComputeDotComplexIsEquivalentToRef) { set_up_apply_data(); + auto complex_b = gen_mtx(1234, 2); + auto dcomplex_b = ComplexMtx::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(1234, 2); + auto dcomplex_x = ComplexMtx::create(hip); + dcomplex_x->copy_from(complex_x.get()); + auto result = ComplexMtx::create(ref, gko::dim<2>{1, 2}); + auto dresult = ComplexMtx::create(hip, gko::dim<2>{1, 2}); + + complex_b->compute_dot(complex_x.get(), result.get()); + dcomplex_b->compute_dot(dcomplex_x.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(result, dresult, 1e-14); +} - auto trans = x->transpose(); - auto dtrans = dx->transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); +TEST_F(Dense, ComputeConjDotComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(1234, 2); + auto dcomplex_b = ComplexMtx::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(1234, 2); + auto dcomplex_x = ComplexMtx::create(hip); + dcomplex_x->copy_from(complex_x.get()); + auto result = ComplexMtx::create(ref, gko::dim<2>{1, 2}); + auto dresult = ComplexMtx::create(hip, gko::dim<2>{1, 2}); + + complex_b->compute_conj_dot(complex_x.get(), result.get()); + dcomplex_b->compute_conj_dot(dcomplex_x.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(result, dresult, 1e-14); } @@ -510,62 +581,35 @@ TEST_F(Dense, CalculateTotalColsIsEquivalentToRef) } -TEST_F(Dense, IsRowPermutable) -{ - set_up_apply_data(); - - auto r_permute = x->row_permute(rpermute_idxs.get()); - auto dr_permute = dx->row_permute(drpermute_idxs.get()); - - GKO_ASSERT_MTX_NEAR(static_cast(r_permute.get()), - static_cast(dr_permute.get()), 0); -} - - -TEST_F(Dense, IsColPermutable) -{ - set_up_apply_data(); - - auto c_permute = x->column_permute(cpermute_idxs.get()); - auto dc_permute = dx->column_permute(dcpermute_idxs.get()); - - GKO_ASSERT_MTX_NEAR(static_cast(c_permute.get()), - static_cast(dc_permute.get()), 0); -} - - -TEST_F(Dense, IsInverseRowPermutable) -{ - set_up_apply_data(); - - auto inverse_r_permute = x->inverse_row_permute(rpermute_idxs.get()); - auto d_inverse_r_permute = dx->inverse_row_permute(drpermute_idxs.get()); - - GKO_ASSERT_MTX_NEAR(static_cast(inverse_r_permute.get()), - static_cast(d_inverse_r_permute.get()), 0); -} - - -TEST_F(Dense, IsInverseColPermutable) +TEST_F(Dense, IsTransposable) { set_up_apply_data(); - auto inverse_c_permute = x->inverse_column_permute(cpermute_idxs.get()); - auto d_inverse_c_permute = dx->inverse_column_permute(dcpermute_idxs.get()); + auto trans = x->transpose(); + auto dtrans = dx->transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(inverse_c_permute.get()), - static_cast(d_inverse_c_permute.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); } -TEST_F(Dense, ExtractDiagonalIsEquivalentToRef) +TEST_F(Dense, IsTransposableIntoDenseCrossExecutor) { set_up_apply_data(); - - auto diag = x->extract_diagonal(); - auto ddiag = dx->extract_diagonal(); - - GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); + auto row_span = gko::span{0, x->get_size()[0] - 2}; + auto col_span = gko::span{0, x->get_size()[1] - 2}; + auto sub_x = x->create_submatrix(row_span, col_span); + auto sub_dx = dx->create_submatrix(row_span, col_span); + // create the target matrices on another executor to + // force temporary clone + auto trans = Mtx::create(ref, gko::transpose(sub_x->get_size())); + auto dtrans = Mtx::create(ref, gko::transpose(sub_x->get_size()), + sub_x->get_size()[0] + 4); + + sub_x->transpose(trans.get()); + sub_dx->transpose(dtrans.get()); + + GKO_ASSERT_MTX_NEAR(dtrans, trans, 0); } diff --git a/hip/test/matrix/diagonal_kernels.hip.cpp b/hip/test/matrix/diagonal_kernels.hip.cpp index 6b87034006e..606e39db7e6 100644 --- a/hip/test/matrix/diagonal_kernels.hip.cpp +++ b/hip/test/matrix/diagonal_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/diagonal_kernels.hpp" -#include "core/test/utils.hpp" +#include "hip/test/utils.hip.hpp" namespace { @@ -62,7 +62,14 @@ class Diagonal : public ::testing::Test { using Dense = gko::matrix::Dense; using ComplexDiag = gko::matrix::Diagonal; - Diagonal() : mtx_size(532, 231), rand_engine(42) {} + Diagonal() +#ifdef GINKGO_FAST_TESTS + : mtx_size(152, 231), +#else + : mtx_size(532, 231), +#endif + rand_engine(42) + {} void SetUp() { @@ -118,10 +125,10 @@ class Diagonal : public ::testing::Test { diag = gen_diag(mtx_size[0]); ddiag = Diag::create(hip); ddiag->copy_from(diag.get()); - dense1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[0]); - dense2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[1]); - denseexpected1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[0]); - denseexpected2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[1]); + dense1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[1]); + dense2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[0]); + denseexpected1 = gen_mtx(mtx_size[0], mtx_size[1], mtx_size[1]); + denseexpected2 = gen_mtx(mtx_size[1], mtx_size[0], mtx_size[0]); ddense1 = Dense::create(hip); ddense1->copy_from(dense1.get()); ddense2 = Dense::create(hip); @@ -249,4 +256,26 @@ TEST_F(Diagonal, ConjTransposeIsEquivalentToRef) } +TEST_F(Diagonal, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + diag->compute_absolute_inplace(); + ddiag->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(diag, ddiag, 1e-14); +} + + +TEST_F(Diagonal, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + auto abs_diag = diag->compute_absolute(); + auto dabs_diag = ddiag->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_diag, dabs_diag, 1e-14); +} + + } // namespace diff --git a/hip/test/matrix/ell_kernels.hip.cpp b/hip/test/matrix/ell_kernels.hip.cpp index fa9a2df717a..13c81fdafcf 100644 --- a/hip/test/matrix/ell_kernels.hip.cpp +++ b/hip/test/matrix/ell_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -48,7 +48,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/ell_kernels.hpp" -#include "core/test/utils.hpp" #include "hip/test/utils.hip.hpp" @@ -59,8 +58,12 @@ class Ell : public ::testing::Test { protected: using Mtx = gko::matrix::Ell<>; using Vec = gko::matrix::Dense<>; + using Vec2 = gko::matrix::Dense; + using ComplexVec = gko::matrix::Dense>; - Ell() : rand_engine(42) {} + Ell() + : rand_engine(42), size{532, 231}, num_els_rowwise{300}, ell_stride{600} + {} void SetUp() { @@ -76,9 +79,10 @@ class Ell : public ::testing::Test { } } - std::unique_ptr gen_mtx(int num_rows, int num_cols) + template + std::unique_ptr gen_mtx(int num_rows, int num_cols) { - return gko::test::generate_random_matrix( + return gko::test::generate_random_matrix( num_rows, num_cols, std::uniform_int_distribution<>(1, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } @@ -91,19 +95,33 @@ class Ell : public ::testing::Test { stride); mtx->copy_from(gen_mtx(num_rows, num_cols)); expected = gen_mtx(num_rows, num_vectors); + expected2 = Vec2::create(ref); + expected2->copy_from(expected.get()); y = gen_mtx(num_cols, num_vectors); + y2 = Vec2::create(ref); + y2->copy_from(y.get()); alpha = gko::initialize({2.0}, ref); + alpha2 = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); + beta2 = gko::initialize({-1.0}, ref); dmtx = Mtx::create(hip); dmtx->copy_from(mtx.get()); dresult = Vec::create(hip); dresult->copy_from(expected.get()); + dresult2 = Vec2::create(hip); + dresult2->copy_from(expected2.get()); dy = Vec::create(hip); dy->copy_from(y.get()); + dy2 = Vec2::create(hip); + dy2->copy_from(y2.get()); dalpha = Vec::create(hip); dalpha->copy_from(alpha.get()); + dalpha2 = Vec2::create(hip); + dalpha2->copy_from(alpha2.get()); dbeta = Vec::create(hip); dbeta->copy_from(beta.get()); + dbeta2 = Vec2::create(hip); + dbeta2->copy_from(beta2.get()); } @@ -111,18 +129,29 @@ class Ell : public ::testing::Test { std::shared_ptr hip; std::ranlux48 rand_engine; + gko::dim<2> size; + gko::size_type num_els_rowwise; + gko::size_type ell_stride; std::unique_ptr mtx; std::unique_ptr expected; + std::unique_ptr expected2; std::unique_ptr y; + std::unique_ptr y2; std::unique_ptr alpha; + std::unique_ptr alpha2; std::unique_ptr beta; + std::unique_ptr beta2; std::unique_ptr dmtx; std::unique_ptr dresult; + std::unique_ptr dresult2; std::unique_ptr dy; + std::unique_ptr dy2; std::unique_ptr dalpha; + std::unique_ptr dalpha2; std::unique_ptr dbeta; + std::unique_ptr dbeta2; }; @@ -137,6 +166,39 @@ TEST_F(Ell, SimpleApplyIsEquivalentToRef) } +TEST_F(Ell, MixedSimpleApplyIsEquivalentToRef1) +{ + set_up_apply_data(); + + mtx->apply(y2.get(), expected2.get()); + dmtx->apply(dy2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedSimpleApplyIsEquivalentToRef2) +{ + set_up_apply_data(); + + mtx->apply(y2.get(), expected.get()); + dmtx->apply(dy2.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedSimpleApplyIsEquivalentToRef3) +{ + set_up_apply_data(); + + mtx->apply(y.get(), expected2.get()); + dmtx->apply(dy.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, AdvancedApplyIsEquivalentToRef) { set_up_apply_data(); @@ -148,9 +210,42 @@ TEST_F(Ell, AdvancedApplyIsEquivalentToRef) } +TEST_F(Ell, MixedAdvancedApplyIsEquivalentToRef1) +{ + set_up_apply_data(); + + mtx->apply(alpha2.get(), y2.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedAdvancedApplyIsEquivalentToRef2) +{ + set_up_apply_data(); + + mtx->apply(alpha2.get(), y2.get(), beta.get(), expected.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedAdvancedApplyIsEquivalentToRef3) +{ + set_up_apply_data(); + + mtx->apply(alpha.get(), y.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, SimpleApplyWithStrideIsEquivalentToRef) { - set_up_apply_data(532, 231, 1, 300, 600); + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); @@ -159,9 +254,42 @@ TEST_F(Ell, SimpleApplyWithStrideIsEquivalentToRef) } +TEST_F(Ell, MixedSimpleApplyWithStrideIsEquivalentToRef1) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected2.get()); + dmtx->apply(dy2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideIsEquivalentToRef2) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected.get()); + dmtx->apply(dy2.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideIsEquivalentToRef3) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(y.get(), expected2.get()); + dmtx->apply(dy.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, AdvancedApplyWithStrideIsEquivalentToRef) { - set_up_apply_data(532, 231, 1, 300, 600); + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); @@ -169,9 +297,42 @@ TEST_F(Ell, AdvancedApplyWithStrideIsEquivalentToRef) } +TEST_F(Ell, MixedAdvancedApplyWithStrideIsEquivalentToRef1) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideIsEquivalentToRef2) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta.get(), expected.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideIsEquivalentToRef3) +{ + set_up_apply_data(size[0], size[1], 1, num_els_rowwise, ell_stride); + + mtx->apply(alpha.get(), y.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, SimpleApplyWithStrideToDenseMatrixIsEquivalentToRef) { - set_up_apply_data(532, 231, 3, 300, 600); + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); @@ -180,9 +341,42 @@ TEST_F(Ell, SimpleApplyWithStrideToDenseMatrixIsEquivalentToRef) } +TEST_F(Ell, MixedSimpleApplyWithStrideToDenseMatrixIsEquivalentToRef1) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected2.get()); + dmtx->apply(dy2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideToDenseMatrixIsEquivalentToRef2) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(y2.get(), expected.get()); + dmtx->apply(dy2.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedSimpleApplyWithStrideToDenseMatrixIsEquivalentToRef3) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(y.get(), expected2.get()); + dmtx->apply(dy.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, AdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef) { - set_up_apply_data(532, 231, 3, 300, 600); + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); @@ -191,6 +385,39 @@ TEST_F(Ell, AdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef) } +TEST_F(Ell, MixedAdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef1) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef2) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(alpha2.get(), y2.get(), beta.get(), expected.get()); + dmtx->apply(dalpha2.get(), dy2.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Ell, MixedAdvancedApplyWithStrideToDenseMatrixIsEquivalentToRef3) +{ + set_up_apply_data(size[0], size[1], 3, num_els_rowwise, ell_stride); + + mtx->apply(alpha.get(), y.get(), beta2.get(), expected2.get()); + dmtx->apply(dalpha.get(), dy.get(), dbeta2.get(), dresult2.get()); + + GKO_ASSERT_MTX_NEAR(dresult2, expected2, 1e-6); +} + + TEST_F(Ell, SimpleApplyByAtomicIsEquivalentToRef) { set_up_apply_data(10, 10000); @@ -279,6 +506,40 @@ TEST_F(Ell, AdvancedApplyOnSmallMatrixIsEquivalentToRef) } +TEST_F(Ell, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(size[1], 3); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(size[0], 3); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Ell, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(size[1], 3); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(size[0], 3); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + TEST_F(Ell, ConvertToDenseIsEquivalentToRef) { set_up_apply_data(); @@ -356,4 +617,26 @@ TEST_F(Ell, ExtractDiagonalIsEquivalentToRef) } +TEST_F(Ell, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +} + + +TEST_F(Ell, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +} + + } // namespace diff --git a/hip/test/matrix/fbcsr_kernels.hip.cpp b/hip/test/matrix/fbcsr_kernels.hip.cpp new file mode 100644 index 00000000000..30c11fba2b8 --- /dev/null +++ b/hip/test/matrix/fbcsr_kernels.hip.cpp @@ -0,0 +1,93 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include "core/test/matrix/fbcsr_sample.hpp" +#include "hip/test/utils.hip.hpp" + + +namespace { + + +class Fbcsr : public ::testing::Test { +protected: + using Mtx = gko::matrix::Fbcsr<>; + + void SetUp() + { + ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + hip = gko::HipExecutor::create(0, ref); + } + + void TearDown() + { + if (hip != nullptr) { + ASSERT_NO_THROW(hip->synchronize()); + } + } + + std::shared_ptr ref; + std::shared_ptr hip; + + std::unique_ptr mtx; +}; + + +TEST_F(Fbcsr, CanWriteFromMatrixOnDevice) +{ + using value_type = Mtx::value_type; + using index_type = Mtx::index_type; + using MatData = gko::matrix_data; + gko::testing::FbcsrSample sample(ref); + auto refmat = sample.generate_fbcsr(); + auto hipmat = Mtx::create(hip); + hipmat->copy_from(gko::lend(refmat)); + MatData refdata; + MatData hipdata; + + refmat->write(refdata); + hipmat->write(hipdata); + + ASSERT_TRUE(refdata.nonzeros == hipdata.nonzeros); +} + + +} // namespace diff --git a/hip/test/matrix/hybrid_kernels.hip.cpp b/hip/test/matrix/hybrid_kernels.hip.cpp index a765cbc2717..a113ced6361 100644 --- a/hip/test/matrix/hybrid_kernels.hip.cpp +++ b/hip/test/matrix/hybrid_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -57,6 +57,7 @@ class Hybrid : public ::testing::Test { protected: using Mtx = gko::matrix::Hybrid<>; using Vec = gko::matrix::Dense<>; + using ComplexVec = gko::matrix::Dense>; Hybrid() : rand_engine(42) {} @@ -74,9 +75,11 @@ class Hybrid : public ::testing::Test { } } - std::unique_ptr gen_mtx(int num_rows, int num_cols, int min_nnz_row) + template + std::unique_ptr gen_mtx(int num_rows, int num_cols, + int min_nnz_row) { - return gko::test::generate_random_matrix( + return gko::test::generate_random_matrix( num_rows, num_cols, std::uniform_int_distribution<>(min_nnz_row, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); @@ -181,6 +184,40 @@ TEST_F(Hybrid, AdvancedApplyToDenseMatrixIsEquivalentToRef) } +TEST_F(Hybrid, ApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3, 1); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3, 1); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Hybrid, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_data(); + auto complex_b = gen_mtx(231, 3, 1); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3, 1); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + TEST_F(Hybrid, CountNonzerosIsEquivalentToRef) { set_up_apply_data(); @@ -231,4 +268,33 @@ TEST_F(Hybrid, ExtractDiagonalIsEquivalentToRef) } +TEST_F(Hybrid, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +} + + +TEST_F(Hybrid, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_data(1, std::make_shared(2)); + using AbsMtx = gko::remove_complex; + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + auto abs_strategy = gko::as(abs_mtx->get_strategy()); + auto dabs_strategy = + gko::as(dabs_mtx->get_strategy()); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); + GKO_ASSERT_EQ(abs_strategy->get_num_columns(), + dabs_strategy->get_num_columns()); + GKO_ASSERT_EQ(abs_strategy->get_num_columns(), 2); +} + + } // namespace diff --git a/hip/test/matrix/sellp_kernels.hip.cpp b/hip/test/matrix/sellp_kernels.hip.cpp index 80c3cf7c113..5df39fc5356 100644 --- a/hip/test/matrix/sellp_kernels.hip.cpp +++ b/hip/test/matrix/sellp_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -58,6 +58,7 @@ class Sellp : public ::testing::Test { protected: using Mtx = gko::matrix::Sellp<>; using Vec = gko::matrix::Dense<>; + using ComplexVec = gko::matrix::Dense>; Sellp() : rand_engine(42) {} @@ -75,46 +76,23 @@ class Sellp : public ::testing::Test { } } - std::unique_ptr gen_mtx(int num_rows, int num_cols) + template + std::unique_ptr gen_mtx(int num_rows, int num_cols) { - return gko::test::generate_random_matrix( + return gko::test::generate_random_matrix( num_rows, num_cols, std::uniform_int_distribution<>(1, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } - void set_up_apply_vector( - int slice_size = gko::matrix::default_slice_size, - int stride_factor = gko::matrix::default_stride_factor, - int total_cols = 0) - { - mtx = Mtx::create(ref); - mtx->copy_from(gen_mtx(532, 231)); - expected = gen_mtx(532, 1); - y = gen_mtx(231, 1); - alpha = gko::initialize({2.0}, ref); - beta = gko::initialize({-1.0}, ref); - dmtx = Mtx::create(hip); - dmtx->copy_from(mtx.get()); - dresult = Vec::create(hip); - dresult->copy_from(expected.get()); - dy = Vec::create(hip); - dy->copy_from(y.get()); - dalpha = Vec::create(hip); - dalpha->copy_from(alpha.get()); - dbeta = Vec::create(hip); - dbeta->copy_from(beta.get()); - } - void set_up_apply_matrix( - int slice_size = gko::matrix::default_slice_size, - int stride_factor = gko::matrix::default_stride_factor, - int total_cols = 0) + int total_cols = 1, int slice_size = gko::matrix::default_slice_size, + int stride_factor = gko::matrix::default_stride_factor) { mtx = Mtx::create(ref); mtx->copy_from(gen_mtx(532, 231)); empty = Mtx::create(ref); - expected = gen_mtx(532, 64); - y = gen_mtx(231, 64); + expected = gen_mtx(532, total_cols); + y = gen_mtx(231, total_cols); alpha = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); dmtx = Mtx::create(hip); @@ -153,79 +131,67 @@ class Sellp : public ::testing::Test { TEST_F(Sellp, SimpleApplyIsEquivalentToRef) { - set_up_apply_vector(); + set_up_apply_matrix(); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } TEST_F(Sellp, AdvancedApplyIsEquivalentToRef) { - set_up_apply_vector(); + set_up_apply_matrix(); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } TEST_F(Sellp, SimpleApplyWithSliceSizeAndStrideFactorIsEquivalentToRef) { - set_up_apply_vector(32, 2); + set_up_apply_matrix(1, 32, 2); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } TEST_F(Sellp, AdvancedApplyWithSliceSizeAndStrideFActorIsEquivalentToRef) { - set_up_apply_vector(32, 2); + set_up_apply_matrix(1, 32, 2); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } TEST_F(Sellp, SimpleApplyMultipleRHSIsEquivalentToRef) { - set_up_apply_matrix(); + set_up_apply_matrix(64); mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } TEST_F(Sellp, AdvancedApplyMultipleRHSIsEquivalentToRef) { - set_up_apply_matrix(); + set_up_apply_matrix(64); mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } @@ -237,9 +203,7 @@ TEST_F(Sellp, mtx->apply(y.get(), expected.get()); dmtx->apply(dy.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } @@ -251,16 +215,47 @@ TEST_F(Sellp, mtx->apply(alpha.get(), y.get(), beta.get(), expected.get()); dmtx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); - auto result = Vec::create(ref); - result->copy_from(dresult.get()); - GKO_ASSERT_MTX_NEAR(result, expected, 1e-14); + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); } -TEST_F(Sellp, ConvertToDenseIsEquivalentToRef) +TEST_F(Sellp, ApplyToComplexIsEquivalentToRef) { - set_up_apply_matrix(); + set_up_apply_matrix(64); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(complex_b.get(), complex_x.get()); + dmtx->apply(dcomplex_b.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + + +TEST_F(Sellp, AdvancedApplyToComplexIsEquivalentToRef) +{ + set_up_apply_matrix(64); + auto complex_b = gen_mtx(231, 3); + auto dcomplex_b = ComplexVec::create(hip); + dcomplex_b->copy_from(complex_b.get()); + auto complex_x = gen_mtx(532, 3); + auto dcomplex_x = ComplexVec::create(hip); + dcomplex_x->copy_from(complex_x.get()); + + mtx->apply(alpha.get(), complex_b.get(), beta.get(), complex_x.get()); + dmtx->apply(dalpha.get(), dcomplex_b.get(), dbeta.get(), dcomplex_x.get()); + + GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 1e-14); +} + +TEST_F(Sellp, ConvertToDenseIsEquivalentToRef) +{ + set_up_apply_matrix(64); auto dense_mtx = gko::matrix::Dense<>::create(ref); auto ddense_mtx = gko::matrix::Dense<>::create(hip); @@ -273,7 +268,7 @@ TEST_F(Sellp, ConvertToDenseIsEquivalentToRef) TEST_F(Sellp, ConvertToCsrIsEquivalentToRef) { - set_up_apply_matrix(); + set_up_apply_matrix(64); auto csr_mtx = gko::matrix::Csr<>::create(ref); auto dcsr_mtx = gko::matrix::Csr<>::create(hip); @@ -287,8 +282,7 @@ TEST_F(Sellp, ConvertToCsrIsEquivalentToRef) TEST_F(Sellp, ConvertEmptyToDenseIsEquivalentToRef) { - set_up_apply_matrix(); - + set_up_apply_matrix(64); auto dense_mtx = gko::matrix::Dense<>::create(ref); auto ddense_mtx = gko::matrix::Dense<>::create(hip); @@ -301,8 +295,7 @@ TEST_F(Sellp, ConvertEmptyToDenseIsEquivalentToRef) TEST_F(Sellp, ConvertEmptyToCsrIsEquivalentToRef) { - set_up_apply_matrix(); - + set_up_apply_matrix(64); auto csr_mtx = gko::matrix::Csr<>::create(ref); auto dcsr_mtx = gko::matrix::Csr<>::create(hip); @@ -315,8 +308,7 @@ TEST_F(Sellp, ConvertEmptyToCsrIsEquivalentToRef) TEST_F(Sellp, CountNonzerosIsEquivalentToRef) { - set_up_apply_matrix(); - + set_up_apply_matrix(64); gko::size_type nnz; gko::size_type dnnz; @@ -329,7 +321,7 @@ TEST_F(Sellp, CountNonzerosIsEquivalentToRef) TEST_F(Sellp, ExtractDiagonalIsEquivalentToRef) { - set_up_apply_matrix(); + set_up_apply_matrix(64); auto diag = mtx->extract_diagonal(); auto ddiag = dmtx->extract_diagonal(); @@ -340,7 +332,7 @@ TEST_F(Sellp, ExtractDiagonalIsEquivalentToRef) TEST_F(Sellp, ExtractDiagonalWithSliceSizeAndStrideFactorIsEquivalentToRef) { - set_up_apply_matrix(32, 2); + set_up_apply_matrix(64, 32, 2); auto diag = mtx->extract_diagonal(); auto ddiag = dmtx->extract_diagonal(); @@ -349,4 +341,26 @@ TEST_F(Sellp, ExtractDiagonalWithSliceSizeAndStrideFactorIsEquivalentToRef) } +TEST_F(Sellp, InplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_matrix(64, 32, 2); + + mtx->compute_absolute_inplace(); + dmtx->compute_absolute_inplace(); + + GKO_ASSERT_MTX_NEAR(mtx, dmtx, 1e-14); +} + + +TEST_F(Sellp, OutplaceAbsoluteMatrixIsEquivalentToRef) +{ + set_up_apply_matrix(64, 32, 2); + + auto abs_mtx = mtx->compute_absolute(); + auto dabs_mtx = dmtx->compute_absolute(); + + GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, 1e-14); +} + + } // namespace diff --git a/hip/test/multigrid/CMakeLists.txt b/hip/test/multigrid/CMakeLists.txt new file mode 100644 index 00000000000..8fe8bbeba48 --- /dev/null +++ b/hip/test/multigrid/CMakeLists.txt @@ -0,0 +1 @@ +ginkgo_create_test(amgx_pgm_kernels) diff --git a/hip/test/multigrid/amgx_pgm_kernels.cpp b/hip/test/multigrid/amgx_pgm_kernels.cpp new file mode 100644 index 00000000000..923e8e0765a --- /dev/null +++ b/hip/test/multigrid/amgx_pgm_kernels.cpp @@ -0,0 +1,315 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/multigrid/amgx_pgm_kernels.hpp" +#include "core/test/utils/matrix_generator.hpp" +#include "hip/test/utils.hip.hpp" + + +namespace { + + +class AmgxPgm : public ::testing::Test { +protected: + using value_type = gko::default_precision; + using index_type = gko::int32; + using Mtx = gko::matrix::Dense<>; + using Csr = gko::matrix::Csr; + using Diag = gko::matrix::Diagonal; + AmgxPgm() : rand_engine(30) {} + + void SetUp() + { + ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + hip = gko::HipExecutor::create(0, ref); + } + + void TearDown() + { + if (hip != nullptr) { + ASSERT_NO_THROW(hip->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + gko::Array gen_array(gko::size_type num, index_type min_val, + index_type max_val) + { + return gko::test::generate_random_array( + num, std::uniform_int_distribution<>(min_val, max_val), rand_engine, + ref); + } + + gko::Array gen_agg_array(gko::size_type num, + gko::size_type num_agg) + { + auto agg_array = gen_array(num, 0, num_agg - 1); + auto agg_array_val = agg_array.get_data(); + std::vector select_agg(num); + std::iota(select_agg.begin(), select_agg.end(), 0); + // use the first num_agg item as the aggregated index. + std::shuffle(select_agg.begin(), select_agg.end(), rand_engine); + // the value of agg_array is the i-th of aggregate group + for (gko::size_type i = 0; i < num; i++) { + agg_array_val[i] = select_agg[agg_array_val[i]]; + } + // the aggregated group must contain the identifier-th element + // agg_val[i] == i holds in the aggregated group whose identifier is i + for (gko::size_type i = 0; i < num_agg; i++) { + auto agg_idx = select_agg[i]; + agg_array_val[agg_idx] = agg_idx; + } + return agg_array; + } + + void initialize_data() + { + m = 597; + n = 300; + int nrhs = 3; + + agg = gen_agg_array(m, n); + // only use 0 ~ n-2 and ensure the end isolated and not yet finished + unfinished_agg = gen_array(m, -1, n - 2); + unfinished_agg.get_data()[n - 1] = -1; + strongest_neighbor = gen_array(m, 0, n - 2); + strongest_neighbor.get_data()[n - 1] = n - 1; + coarse_vector = gen_mtx(n, nrhs); + fine_vector = gen_mtx(m, nrhs); + auto weight = gen_mtx(m, m); + make_weight(weight.get()); + weight_csr = Csr::create(ref); + weight->convert_to(weight_csr.get()); + weight_diag = weight_csr->extract_diagonal(); + auto system_dense = gen_mtx(m, m); + gko::test::make_hpd(system_dense.get()); + system_mtx = Csr::create(ref); + system_dense->convert_to(system_mtx.get()); + + d_agg.set_executor(hip); + d_unfinished_agg.set_executor(hip); + d_strongest_neighbor.set_executor(hip); + d_coarse_vector = Mtx::create(hip); + d_fine_vector = Mtx::create(hip); + d_weight_csr = Csr::create(hip); + d_weight_diag = Diag::create(hip); + d_system_mtx = Csr::create(hip); + d_agg = agg; + d_unfinished_agg = unfinished_agg; + d_strongest_neighbor = strongest_neighbor; + d_coarse_vector->copy_from(coarse_vector.get()); + d_fine_vector->copy_from(fine_vector.get()); + d_weight_csr->copy_from(weight_csr.get()); + d_weight_diag->copy_from(weight_diag.get()); + d_system_mtx->copy_from(system_mtx.get()); + } + + void make_weight(Mtx *mtx) + { + gko::test::make_symmetric(mtx); + // only works for real value cases + mtx->compute_absolute_inplace(); + gko::test::make_diag_dominant(mtx); + } + + std::shared_ptr ref; + std::shared_ptr hip; + + std::ranlux48 rand_engine; + + gko::Array agg; + gko::Array unfinished_agg; + gko::Array strongest_neighbor; + + gko::Array d_agg; + gko::Array d_unfinished_agg; + gko::Array d_strongest_neighbor; + + std::unique_ptr coarse_vector; + std::unique_ptr fine_vector; + std::unique_ptr weight_diag; + std::unique_ptr weight_csr; + std::shared_ptr system_mtx; + + std::unique_ptr d_coarse_vector; + std::unique_ptr d_fine_vector; + std::unique_ptr d_weight_diag; + std::unique_ptr d_weight_csr; + std::shared_ptr d_system_mtx; + + gko::size_type n; + gko::size_type m; +}; + + +TEST_F(AmgxPgm, MatchEdgeIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + + gko::kernels::reference::amgx_pgm::match_edge(ref, strongest_neighbor, x); + gko::kernels::hip::amgx_pgm::match_edge(hip, d_strongest_neighbor, d_x); + + GKO_ASSERT_ARRAY_EQ(d_x, x); +} + + +TEST_F(AmgxPgm, CountUnaggIsEquivalentToRef) +{ + initialize_data(); + index_type num_unagg; + index_type d_num_unagg; + + gko::kernels::reference::amgx_pgm::count_unagg(ref, unfinished_agg, + &num_unagg); + gko::kernels::hip::amgx_pgm::count_unagg(hip, d_unfinished_agg, + &d_num_unagg); + + ASSERT_EQ(d_num_unagg, num_unagg); +} + + +TEST_F(AmgxPgm, RenumberIsEquivalentToRef) +{ + initialize_data(); + index_type num_agg; + index_type d_num_agg; + + gko::kernels::reference::amgx_pgm::renumber(ref, agg, &num_agg); + gko::kernels::hip::amgx_pgm::renumber(hip, d_agg, &d_num_agg); + + ASSERT_EQ(d_num_agg, num_agg); + GKO_ASSERT_ARRAY_EQ(d_agg, agg); + ASSERT_EQ(num_agg, n); +} + + +TEST_F(AmgxPgm, FindStrongestNeighborIsEquivalentToRef) +{ + initialize_data(); + auto snb = strongest_neighbor; + auto d_snb = d_strongest_neighbor; + + gko::kernels::reference::amgx_pgm::find_strongest_neighbor( + ref, weight_csr.get(), weight_diag.get(), agg, snb); + gko::kernels::hip::amgx_pgm::find_strongest_neighbor( + hip, d_weight_csr.get(), d_weight_diag.get(), d_agg, d_snb); + + GKO_ASSERT_ARRAY_EQ(d_snb, snb); +} + + +TEST_F(AmgxPgm, AssignToExistAggIsEquivalentToRef) +{ + initialize_data(); + auto x = unfinished_agg; + auto d_x = d_unfinished_agg; + auto intermediate_agg = x; + auto d_intermediate_agg = d_x; + + gko::kernels::reference::amgx_pgm::assign_to_exist_agg( + ref, weight_csr.get(), weight_diag.get(), x, intermediate_agg); + gko::kernels::hip::amgx_pgm::assign_to_exist_agg( + hip, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); + + GKO_ASSERT_ARRAY_EQ(d_x, x); +} + + +TEST_F(AmgxPgm, AssignToExistAggUnderteminsticIsEquivalentToRef) +{ + initialize_data(); + auto d_x = d_unfinished_agg; + auto d_intermediate_agg = gko::Array(hip, 0); + index_type d_num_unagg; + + gko::kernels::hip::amgx_pgm::assign_to_exist_agg( + hip, d_weight_csr.get(), d_weight_diag.get(), d_x, d_intermediate_agg); + gko::kernels::hip::amgx_pgm::count_unagg(hip, d_agg, &d_num_unagg); + + // only test whether all elements are aggregated. + GKO_ASSERT_EQ(d_num_unagg, 0); +} + + +TEST_F(AmgxPgm, GenerateMgLevelIsEquivalentToRef) +{ + initialize_data(); + auto mg_level_factory = gko::multigrid::AmgxPgm::build() + .with_deterministic(true) + .on(ref); + auto d_mg_level_factory = gko::multigrid::AmgxPgm::build() + .with_deterministic(true) + .on(hip); + + auto mg_level = mg_level_factory->generate(system_mtx); + auto d_mg_level = d_mg_level_factory->generate(d_system_mtx); + + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_restrict_op()), + gko::as(mg_level->get_restrict_op()), 1e-14); + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_coarse_op()), + gko::as(mg_level->get_coarse_op()), 1e-14); + GKO_ASSERT_MTX_NEAR(gko::as(d_mg_level->get_prolong_op()), + gko::as(mg_level->get_prolong_op()), 1e-14); +} + + +} // namespace diff --git a/hip/test/preconditioner/CMakeLists.txt b/hip/test/preconditioner/CMakeLists.txt index 6f974174421..6769092fde5 100644 --- a/hip/test/preconditioner/CMakeLists.txt +++ b/hip/test/preconditioner/CMakeLists.txt @@ -1,2 +1,2 @@ -ginkgo_create_hip_test_special_linkage(jacobi_kernels) +ginkgo_create_test(jacobi_kernels) ginkgo_create_hip_test(isai_kernels) diff --git a/hip/test/preconditioner/isai_kernels.hip.cpp b/hip/test/preconditioner/isai_kernels.hip.cpp index 88f67c0adb3..bfcb226bbc1 100644 --- a/hip/test/preconditioner/isai_kernels.hip.cpp +++ b/hip/test/preconditioner/isai_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -53,7 +53,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -enum struct matrix_type { lower, upper }; +enum struct matrix_type { lower, upper, general, spd }; + + class Isai : public ::testing::Test { protected: using value_type = double; @@ -91,9 +93,24 @@ class Isai : public ::testing::Test { auto nz_dist = std::uniform_int_distribution(1, row_limit); auto val_dist = std::uniform_real_distribution(-1., 1.); mtx = Csr::create(ref); - mtx = gko::test::generate_random_triangular_matrix( - n, n, true, for_lower_tm, nz_dist, val_dist, rand_engine, ref, - gko::dim<2>{n, n}); + if (type == matrix_type::general) { + auto dense_mtx = gko::test::generate_random_matrix( + n, n, nz_dist, val_dist, rand_engine, ref, gko::dim<2>{n, n}); + ensure_diagonal(dense_mtx.get()); + mtx->copy_from(dense_mtx.get()); + } else if (type == matrix_type::spd) { + auto dense_mtx = gko::test::generate_random_band_matrix( + n, row_limit / 4, row_limit / 4, val_dist, rand_engine, ref, + gko::dim<2>{n, n}); + auto transp = gko::as(dense_mtx->transpose()); + auto spd_mtx = Dense::create(ref, gko::dim<2>{n, n}); + dense_mtx->apply(transp.get(), spd_mtx.get()); + mtx->copy_from(spd_mtx.get()); + } else { + mtx = gko::test::generate_random_triangular_matrix( + n, n, true, for_lower_tm, nz_dist, val_dist, rand_engine, ref, + gko::dim<2>{n, n}); + } inverse = clone_allocations(mtx.get()); d_mtx = Csr::create(hip); @@ -102,6 +119,13 @@ class Isai : public ::testing::Test { d_inverse->copy_from(inverse.get()); } + void ensure_diagonal(Dense *mtx) + { + for (int i = 0; i < mtx->get_size()[0]; ++i) { + mtx->at(i, i) = gko::one(); + } + } + std::shared_ptr ref; std::shared_ptr hip; @@ -162,6 +186,52 @@ TEST_F(Isai, HipIsaiGenerateUinverseShortIsEquivalentToRef) } +TEST_F(Isai, HipIsaiGenerateAinverseShortIsEquivalentToRef) +{ + initialize_data(matrix_type::general, 615, 15); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::Array da1(hip, num_rows + 1); + auto da2 = da1; + + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::kernels::hip::isai::generate_general_inverse( + hip, d_mtx.get(), d_inverse.get(), da1.get_data(), da2.get_data(), + false); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse); + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, r::value); + GKO_ASSERT_ARRAY_EQ(a1, da1); + GKO_ASSERT_ARRAY_EQ(a2, da2); + ASSERT_EQ(a1.get_const_data()[num_rows], 0); +} + + +TEST_F(Isai, HipIsaiGenerateSpdinverseShortIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 15); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::Array da1(hip, num_rows + 1); + auto da2 = da1; + + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::kernels::hip::isai::generate_general_inverse( + hip, d_mtx.get(), d_inverse.get(), da1.get_data(), da2.get_data(), + true); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse); + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 15 * r::value); + GKO_ASSERT_ARRAY_EQ(a1, da1); + GKO_ASSERT_ARRAY_EQ(a2, da2); + ASSERT_EQ(a1.get_const_data()[num_rows], 0); +} + + TEST_F(Isai, HipIsaiGenerateLinverseLongIsEquivalentToRef) { initialize_data(matrix_type::lower, 554, 64); @@ -208,6 +278,52 @@ TEST_F(Isai, HipIsaiGenerateUinverseLongIsEquivalentToRef) } +TEST_F(Isai, HipIsaiGenerateAinverseLongIsEquivalentToRef) +{ + initialize_data(matrix_type::general, 695, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::Array da1(hip, num_rows + 1); + auto da2 = da1; + + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::kernels::hip::isai::generate_general_inverse( + hip, d_mtx.get(), d_inverse.get(), da1.get_data(), da2.get_data(), + false); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse); + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 10 * r::value); + GKO_ASSERT_ARRAY_EQ(a1, da1); + GKO_ASSERT_ARRAY_EQ(a2, da2); + ASSERT_GT(a1.get_const_data()[num_rows], 0); +} + + +TEST_F(Isai, HipIsaiGenerateSpdinverseLongIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::Array da1(hip, num_rows + 1); + auto da2 = da1; + + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::kernels::hip::isai::generate_general_inverse( + hip, d_mtx.get(), d_inverse.get(), da1.get_data(), da2.get_data(), + true); + + GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse); + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 10 * r::value); + GKO_ASSERT_ARRAY_EQ(a1, da1); + GKO_ASSERT_ARRAY_EQ(a2, da2); + ASSERT_GT(a1.get_const_data()[num_rows], 0); +} + + TEST_F(Isai, HipIsaiGenerateExcessLinverseLongIsEquivalentToRef) { initialize_data(matrix_type::lower, 518, 40); @@ -227,10 +343,10 @@ TEST_F(Isai, HipIsaiGenerateExcessLinverseLongIsEquivalentToRef) gko::kernels::reference::isai::generate_excess_system( ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), - excess.get(), e_rhs.get()); + excess.get(), e_rhs.get(), 0, num_rows); gko::kernels::hip::isai::generate_excess_system( hip, d_mtx.get(), d_inverse.get(), da1.get_const_data(), - da2.get_const_data(), dexcess.get(), de_rhs.get()); + da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); @@ -258,10 +374,41 @@ TEST_F(Isai, HipIsaiGenerateExcessUinverseLongIsEquivalentToRef) gko::kernels::reference::isai::generate_excess_system( ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), - excess.get(), e_rhs.get()); + excess.get(), e_rhs.get(), 0, num_rows); + gko::kernels::hip::isai::generate_excess_system( + hip, d_mtx.get(), d_inverse.get(), da1.get_const_data(), + da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); + + GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); + GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); + GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, HipIsaiGenerateExcessAinverseLongIsEquivalentToRef) +{ + initialize_data(matrix_type::general, 100, 51); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::Array da1(hip, a1); + gko::Array da2(hip, a2); + auto e_dim = a1.get_data()[num_rows]; + auto e_nnz = a2.get_data()[num_rows]; + auto excess = Csr::create(ref, gko::dim<2>(e_dim, e_dim), e_nnz); + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + auto dexcess = Csr::create(hip, gko::dim<2>(e_dim, e_dim), e_nnz); + auto de_rhs = Dense::create(hip, gko::dim<2>(e_dim, 1)); + + gko::kernels::reference::isai::generate_excess_system( + ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), + excess.get(), e_rhs.get(), 0, num_rows); gko::kernels::hip::isai::generate_excess_system( hip, d_mtx.get(), d_inverse.get(), da1.get_const_data(), - da2.get_const_data(), dexcess.get(), de_rhs.get()); + da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); @@ -270,6 +417,117 @@ TEST_F(Isai, HipIsaiGenerateExcessUinverseLongIsEquivalentToRef) } +TEST_F(Isai, HipIsaiGenerateExcessSpdinverseLongIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::Array da1(hip, a1); + gko::Array da2(hip, a2); + auto e_dim = a1.get_data()[num_rows]; + auto e_nnz = a2.get_data()[num_rows]; + auto excess = Csr::create(ref, gko::dim<2>(e_dim, e_dim), e_nnz); + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + auto dexcess = Csr::create(hip, gko::dim<2>(e_dim, e_dim), e_nnz); + auto de_rhs = Dense::create(hip, gko::dim<2>(e_dim, 1)); + + gko::kernels::reference::isai::generate_excess_system( + ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), + excess.get(), e_rhs.get(), 0, num_rows); + gko::kernels::hip::isai::generate_excess_system( + hip, d_mtx.get(), d_inverse.get(), da1.get_const_data(), + da2.get_const_data(), dexcess.get(), de_rhs.get(), 0, num_rows); + + GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); + GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); + GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, HipIsaiGeneratePartialExcessIsEquivalentToRef) +{ + initialize_data(matrix_type::general, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::Array da1(hip, a1); + gko::Array da2(hip, a2); + auto e_dim = a1.get_data()[10] - a1.get_data()[5]; + auto e_nnz = a2.get_data()[10] - a2.get_data()[5]; + auto excess = Csr::create(ref, gko::dim<2>(e_dim, e_dim), e_nnz); + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + auto dexcess = Csr::create(hip, gko::dim<2>(e_dim, e_dim), e_nnz); + auto de_rhs = Dense::create(hip, gko::dim<2>(e_dim, 1)); + + gko::kernels::reference::isai::generate_excess_system( + ref, mtx.get(), inverse.get(), a1.get_const_data(), a2.get_const_data(), + excess.get(), e_rhs.get(), 5u, 10u); + gko::kernels::hip::isai::generate_excess_system( + hip, d_mtx.get(), d_inverse.get(), da1.get_const_data(), + da2.get_const_data(), dexcess.get(), de_rhs.get(), 5u, 10u); + + GKO_ASSERT_MTX_EQ_SPARSITY(excess, dexcess); + GKO_ASSERT_MTX_NEAR(excess, dexcess, 0); + GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, HipIsaiScaleExcessSolutionIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::Array da1(hip, a1); + auto e_dim = a1.get_data()[num_rows]; + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + std::fill_n(e_rhs->get_values(), e_dim, 123456); + auto de_rhs = Dense::create(hip); + de_rhs->copy_from(lend(e_rhs)); + d_inverse->copy_from(lend(inverse)); + + gko::kernels::reference::isai::scale_excess_solution( + ref, a1.get_const_data(), e_rhs.get(), 0, num_rows); + gko::kernels::hip::isai::scale_excess_solution(hip, da1.get_const_data(), + de_rhs.get(), 0, num_rows); + + GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); +} + + +TEST_F(Isai, HipIsaiScalePartialExcessSolutionIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::Array da1(hip, a1); + auto e_dim = a1.get_data()[10] - a1.get_data()[5]; + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + std::fill_n(e_rhs->get_values(), e_dim, 123456); + auto de_rhs = Dense::create(hip); + de_rhs->copy_from(lend(e_rhs)); + + gko::kernels::reference::isai::scale_excess_solution( + ref, a1.get_const_data(), e_rhs.get(), 5u, 10u); + gko::kernels::hip::isai::scale_excess_solution(hip, da1.get_const_data(), + de_rhs.get(), 5u, 10u); + + GKO_ASSERT_MTX_NEAR(e_rhs, de_rhs, 0); +} + + TEST_F(Isai, HipIsaiScatterExcessSolutionLIsEquivalentToRef) { initialize_data(matrix_type::lower, 572, 52); @@ -287,9 +545,9 @@ TEST_F(Isai, HipIsaiScatterExcessSolutionLIsEquivalentToRef) d_inverse->copy_from(lend(inverse)); gko::kernels::reference::isai::scatter_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), inverse.get()); + ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); gko::kernels::hip::isai::scatter_excess_solution( - hip, da1.get_const_data(), de_rhs.get(), d_inverse.get()); + hip, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); ASSERT_GT(e_dim, 0); @@ -314,9 +572,90 @@ TEST_F(Isai, HipIsaiScatterExcessSolutionUIsEquivalentToRef) d_inverse->copy_from(lend(inverse)); gko::kernels::reference::isai::scatter_excess_solution( - ref, a1.get_const_data(), e_rhs.get(), inverse.get()); + ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); + gko::kernels::hip::isai::scatter_excess_solution( + hip, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); + + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, HipIsaiScatterExcessSolutionAIsEquivalentToRef) +{ + initialize_data(matrix_type::general, 702, 45); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), false); + gko::Array da1(hip, a1); + auto e_dim = a1.get_data()[num_rows]; + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + std::fill_n(e_rhs->get_values(), e_dim, 123456); + auto de_rhs = Dense::create(hip); + de_rhs->copy_from(lend(e_rhs)); + // overwrite -1 values with inverse + d_inverse->copy_from(lend(inverse)); + + gko::kernels::reference::isai::scatter_excess_solution( + ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); + gko::kernels::hip::isai::scatter_excess_solution( + hip, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); + + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, HipIsaiScatterExcessSolutionSpdIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::Array da1(hip, a1); + auto e_dim = a1.get_data()[num_rows]; + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + std::fill_n(e_rhs->get_values(), e_dim, 123456); + auto de_rhs = Dense::create(hip); + de_rhs->copy_from(lend(e_rhs)); + // overwrite -1 values with inverse + d_inverse->copy_from(lend(inverse)); + + gko::kernels::reference::isai::scatter_excess_solution( + ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 0, num_rows); + gko::kernels::hip::isai::scatter_excess_solution( + hip, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 0, num_rows); + + GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); + ASSERT_GT(e_dim, 0); +} + + +TEST_F(Isai, HipIsaiScatterPartialExcessSolutionIsEquivalentToRef) +{ + initialize_data(matrix_type::spd, 100, 64); + const auto num_rows = mtx->get_size()[0]; + gko::Array a1(ref, num_rows + 1); + auto a2 = a1; + gko::kernels::reference::isai::generate_general_inverse( + ref, mtx.get(), inverse.get(), a1.get_data(), a2.get_data(), true); + gko::Array da1(hip, a1); + auto e_dim = a1.get_data()[10] - a1.get_data()[5]; + auto e_rhs = Dense::create(ref, gko::dim<2>(e_dim, 1)); + std::fill_n(e_rhs->get_values(), e_dim, 123456); + auto de_rhs = Dense::create(hip); + de_rhs->copy_from(lend(e_rhs)); + // overwrite -1 values with inverse + d_inverse->copy_from(lend(inverse)); + + gko::kernels::reference::isai::scatter_excess_solution( + ref, a1.get_const_data(), e_rhs.get(), inverse.get(), 5u, 10u); gko::kernels::hip::isai::scatter_excess_solution( - hip, da1.get_const_data(), de_rhs.get(), d_inverse.get()); + hip, da1.get_const_data(), de_rhs.get(), d_inverse.get(), 5u, 10u); GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 0); ASSERT_GT(e_dim, 0); diff --git a/hip/test/preconditioner/jacobi_kernels.cpp b/hip/test/preconditioner/jacobi_kernels.cpp index 868e10fbbad..4ef05dc7fff 100644 --- a/hip/test/preconditioner/jacobi_kernels.cpp +++ b/hip/test/preconditioner/jacobi_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/test/utils/unsort_matrix.hpp" #include "hip/test/utils.hip.hpp" @@ -75,7 +76,7 @@ class Jacobi : public ::testing::Test { std::initializer_list block_precisions, std::initializer_list condition_numbers, gko::uint32 max_block_size, int min_nnz, int max_nnz, int num_rhs = 1, - double accuracy = 0.1) + double accuracy = 0.1, bool skip_sorting = true) { std::ranlux48 engine(42); const auto dim = *(end(block_pointers) - 1); @@ -103,6 +104,7 @@ class Jacobi : public ::testing::Test { .with_max_block_size(max_block_size) .with_block_pointers(block_ptrs) .with_max_block_stride(gko::uint32(hip->get_warp_size())) + .with_skip_sorting(skip_sorting) .on(ref); d_bj_factory = Bj::build() .with_max_block_size(max_block_size) @@ -116,12 +118,14 @@ class Jacobi : public ::testing::Test { .with_max_block_stride(gko::uint32(hip->get_warp_size())) .with_storage_optimization(block_prec) .with_accuracy(accuracy) + .with_skip_sorting(skip_sorting) .on(ref); d_bj_factory = Bj::build() .with_max_block_size(max_block_size) .with_block_pointers(block_ptrs) .with_storage_optimization(block_prec) .with_accuracy(accuracy) + .with_skip_sorting(skip_sorting) .on(hip); } b = gko::test::generate_random_matrix( @@ -294,7 +298,7 @@ TEST_F(Jacobi, } -TEST_F(Jacobi, HipPreconditionerEquivalentToRefWithBlockSize32) +TEST_F(Jacobi, HipPreconditionerEquivalentToRefWithBlockSize32Sorted) { initialize_data({0, 32, 64, 96, 128}, {}, {}, 32, 100, 110); @@ -305,6 +309,19 @@ TEST_F(Jacobi, HipPreconditionerEquivalentToRefWithBlockSize32) } +TEST_F(Jacobi, HipPreconditionerEquivalentToRefWithBlockSize32Unsorted) +{ + std::ranlux48 engine(43); + initialize_data({0, 32, 64, 96, 128}, {}, {}, 32, 100, 110, 1, 0.1, false); + gko::test::unsort_matrix(mtx.get(), engine); + + auto bj = bj_factory->generate(mtx); + auto d_bj = d_bj_factory->generate(mtx); + + GKO_ASSERT_MTX_NEAR(gko::as(d_bj.get()), gko::as(bj.get()), 1e-13); +} + + #if GINKGO_HIP_PLATFORM_HCC TEST_F(Jacobi, HipPreconditionerEquivalentToRefWithBlockSize64) { @@ -426,6 +443,37 @@ TEST_F(Jacobi, HipApplyEquivalentToRef) } +TEST_F(Jacobi, HipScalarApplyEquivalentToRef) +{ + gko::size_type dim = 313; + std::ranlux48 engine(42); + auto dense_smtx = gko::share(gko::test::generate_random_matrix( + dim, dim, std::uniform_int_distribution<>(1, dim), + std::normal_distribution<>(1.0, 2.0), engine, ref)); + gko::test::make_diag_dominant(dense_smtx.get()); + auto smtx = gko::share(Mtx::create(ref)); + smtx->copy_from(dense_smtx.get()); + auto sb = gko::share(gko::test::generate_random_matrix( + dim, 3, std::uniform_int_distribution<>(1, 1), + std::normal_distribution<>(0.0, 1.0), engine, ref)); + auto sx = Vec::create(ref, sb->get_size()); + + auto d_smtx = gko::share(Mtx::create(hip)); + auto d_sb = gko::share(Vec::create(hip)); + auto d_sx = gko::share(Vec::create(hip, sb->get_size())); + d_smtx->copy_from(smtx.get()); + d_sb->copy_from(sb.get()); + + auto sj = Bj::build().with_max_block_size(1u).on(ref)->generate(smtx); + auto d_sj = Bj::build().with_max_block_size(1u).on(hip)->generate(d_smtx); + + sj->apply(sb.get(), sx.get()); + d_sj->apply(d_sb.get(), d_sx.get()); + + GKO_ASSERT_MTX_NEAR(sx.get(), d_sx.get(), 1e-12); +} + + TEST_F(Jacobi, HipLinearCombinationApplyEquivalentToRef) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 13, @@ -444,6 +492,46 @@ TEST_F(Jacobi, HipLinearCombinationApplyEquivalentToRef) } +TEST_F(Jacobi, HipScalarLinearCombinationApplyEquivalentToRef) +{ + gko::size_type dim = 313; + std::ranlux48 engine(42); + auto dense_smtx = gko::share(gko::test::generate_random_matrix( + dim, dim, std::uniform_int_distribution<>(1, dim), + std::normal_distribution<>(1.0, 2.0), engine, ref)); + gko::test::make_diag_dominant(dense_smtx.get()); + auto smtx = gko::share(Mtx::create(ref)); + smtx->copy_from(dense_smtx.get()); + auto sb = gko::share(gko::test::generate_random_matrix( + dim, 3, std::uniform_int_distribution<>(1, 1), + std::normal_distribution<>(0.0, 1.0), engine, ref, gko::dim<2>(dim, 3), + 4)); + auto sx = gko::share(gko::test::generate_random_matrix( + dim, 3, std::uniform_int_distribution<>(1, 1), + std::normal_distribution<>(0.0, 1.0), engine, ref, gko::dim<2>(dim, 3), + 4)); + + auto d_smtx = gko::share(Mtx::create(hip)); + auto d_sb = gko::share(Vec::create(hip)); + auto d_sx = gko::share(Vec::create(hip)); + d_smtx->copy_from(smtx.get()); + d_sb->copy_from(sb.get()); + d_sx->copy_from(sx.get()); + auto alpha = gko::initialize({2.0}, ref); + auto d_alpha = gko::initialize({2.0}, hip); + auto beta = gko::initialize({-1.0}, ref); + auto d_beta = gko::initialize({-1.0}, hip); + + auto sj = Bj::build().with_max_block_size(1u).on(ref)->generate(smtx); + auto d_sj = Bj::build().with_max_block_size(1u).on(hip)->generate(d_smtx); + + sj->apply(alpha.get(), sb.get(), beta.get(), sx.get()); + d_sj->apply(d_alpha.get(), d_sb.get(), d_beta.get(), d_sx.get()); + + GKO_ASSERT_MTX_NEAR(sx.get(), d_sx.get(), 1e-12); +} + + TEST_F(Jacobi, HipApplyToMultipleVectorsEquivalentToRef) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 13, diff --git a/hip/test/solver/CMakeLists.txt b/hip/test/solver/CMakeLists.txt index 3ec7956cf65..29454e2b5e7 100644 --- a/hip/test/solver/CMakeLists.txt +++ b/hip/test/solver/CMakeLists.txt @@ -1,9 +1,5 @@ -ginkgo_create_hip_test_special_linkage(bicg_kernels) -ginkgo_create_hip_test_special_linkage(bicgstab_kernels) -ginkgo_create_hip_test_special_linkage(cg_kernels) -ginkgo_create_hip_test_special_linkage(cgs_kernels) -ginkgo_create_hip_test_special_linkage(fcg_kernels) -ginkgo_create_hip_test_special_linkage(gmres_kernels) -ginkgo_create_hip_test_special_linkage(ir_kernels) -ginkgo_create_hip_test_special_linkage(lower_trs_kernels) -ginkgo_create_hip_test_special_linkage(upper_trs_kernels) +ginkgo_create_test(gmres_kernels) +ginkgo_create_test(cb_gmres_kernels) +ginkgo_create_test(idr_kernels) +ginkgo_create_test(lower_trs_kernels) +ginkgo_create_test(upper_trs_kernels) diff --git a/hip/test/solver/bicg_kernels.cpp b/hip/test/solver/bicg_kernels.cpp deleted file mode 100644 index 67fda77f84b..00000000000 --- a/hip/test/solver/bicg_kernels.cpp +++ /dev/null @@ -1,357 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include -#include -#include - - -#include - - -#include -#include -#include -#include -#include -#include - - -#include "core/solver/bicg_kernels.hpp" -#include "hip/test/utils.hip.hpp" -#include "matrices/config.hpp" - - -namespace { - - -class Bicg : public ::testing::Test { -protected: - using value_type = gko::default_precision; - using index_type = gko::int32; - using Mtx = gko::matrix::Dense<>; - using Csr = gko::matrix::Csr; - Bicg() : rand_engine(30) {} - - void SetUp() - { - ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); - ref = gko::ReferenceExecutor::create(); - hip = gko::HipExecutor::create(0, ref); - - std::string file_name(gko::matrices::location_ani1_mtx); - auto input_file = std::ifstream(file_name, std::ios::in); - if (!input_file) { - FAIL() << "Could not find the file \"" << file_name - << "\", which is required for this test.\n"; - } - csr_ref = gko::read(input_file, ref); - auto csr_hip_temp = Csr::create(hip); - csr_hip_temp->copy_from(gko::lend(csr_ref)); - csr_hip = gko::give(csr_hip_temp); - } - - void TearDown() - { - if (hip != nullptr) { - ASSERT_NO_THROW(hip->synchronize()); - } - } - - std::unique_ptr gen_mtx(int num_rows, int num_cols) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(num_cols, num_cols), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - void initialize_data() - { - int m = 597; - int n = 43; - b = gen_mtx(m, n); - r = gen_mtx(m, n); - z = gen_mtx(m, n); - p = gen_mtx(m, n); - q = gen_mtx(m, n); - r2 = gen_mtx(m, n); - z2 = gen_mtx(m, n); - p2 = gen_mtx(m, n); - q2 = gen_mtx(m, n); - x = gen_mtx(m, n); - beta = gen_mtx(1, n); - prev_rho = gen_mtx(1, n); - rho = gen_mtx(1, n); - stop_status = std::unique_ptr>( - new gko::Array(ref, n)); - for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { - stop_status->get_data()[i].reset(); - } - - d_b = Mtx::create(hip); - d_b->copy_from(b.get()); - d_r = Mtx::create(hip); - d_r->copy_from(r.get()); - d_z = Mtx::create(hip); - d_z->copy_from(z.get()); - d_p = Mtx::create(hip); - d_p->copy_from(p.get()); - d_q = Mtx::create(hip); - d_q->copy_from(q.get()); - d_r2 = Mtx::create(hip); - d_r2->copy_from(r2.get()); - d_z2 = Mtx::create(hip); - d_z2->copy_from(z2.get()); - d_p2 = Mtx::create(hip); - d_p2->copy_from(p2.get()); - d_q2 = Mtx::create(hip); - d_q2->copy_from(q2.get()); - d_x = Mtx::create(hip); - d_x->copy_from(x.get()); - d_beta = Mtx::create(hip); - d_beta->copy_from(beta.get()); - d_prev_rho = Mtx::create(hip); - d_prev_rho->copy_from(prev_rho.get()); - d_rho = Mtx::create(hip); - d_rho->copy_from(rho.get()); - d_stop_status = std::unique_ptr>( - new gko::Array(hip, n)); - *d_stop_status = *stop_status; - } - - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - - std::shared_ptr ref; - std::shared_ptr hip; - - std::ranlux48 rand_engine; - - std::unique_ptr b; - std::unique_ptr r; - std::unique_ptr z; - std::unique_ptr p; - std::unique_ptr q; - std::unique_ptr r2; - std::unique_ptr z2; - std::unique_ptr p2; - std::unique_ptr q2; - std::unique_ptr x; - std::unique_ptr beta; - std::unique_ptr prev_rho; - std::unique_ptr rho; - std::unique_ptr> stop_status; - - std::unique_ptr d_b; - std::unique_ptr d_r; - std::unique_ptr d_z; - std::unique_ptr d_p; - std::unique_ptr d_q; - std::unique_ptr d_r2; - std::unique_ptr d_z2; - std::unique_ptr d_p2; - std::unique_ptr d_q2; - std::unique_ptr d_x; - std::unique_ptr d_beta; - std::unique_ptr d_prev_rho; - std::unique_ptr d_rho; - std::unique_ptr> d_stop_status; - std::shared_ptr csr_ref; - std::shared_ptr csr_hip; -}; - - -TEST_F(Bicg, HipBicgInitializeIsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::bicg::initialize( - ref, b.get(), r.get(), z.get(), p.get(), q.get(), prev_rho.get(), - rho.get(), r2.get(), z2.get(), p2.get(), q2.get(), stop_status.get()); - gko::kernels::hip::bicg::initialize( - hip, d_b.get(), d_r.get(), d_z.get(), d_p.get(), d_q.get(), - d_prev_rho.get(), d_rho.get(), d_r2.get(), d_z2.get(), d_p2.get(), - d_q2.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z, z, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r2, r2, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z2, z2, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p2, p2, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q2, q2, 1e-14); - GKO_ASSERT_MTX_NEAR(d_prev_rho, prev_rho, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho, rho, 1e-14); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); -} - - -TEST_F(Bicg, HipBicgStep1IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::bicg::step_1(ref, p.get(), z.get(), p2.get(), - z2.get(), rho.get(), prev_rho.get(), - stop_status.get()); - gko::kernels::hip::bicg::step_1(hip, d_p.get(), d_z.get(), d_p2.get(), - d_z2.get(), d_rho.get(), d_prev_rho.get(), - d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z, z, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p2, p2, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z2, z2, 1e-14); -} - - -TEST_F(Bicg, HipBicgStep2IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::bicg::step_2( - ref, x.get(), r.get(), r2.get(), p.get(), q.get(), q2.get(), beta.get(), - rho.get(), stop_status.get()); - gko::kernels::hip::bicg::step_2( - hip, d_x.get(), d_r.get(), d_r2.get(), d_p.get(), d_q.get(), d_q2.get(), - d_beta.get(), d_rho.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r2, r2, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q2, q2, 1e-14); -} - - -TEST_F(Bicg, ApplyWithSpdMatrixIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = Mtx::create(hip); - d_mtx->copy_from(mtx.get()); - auto d_x = Mtx::create(hip); - d_x->copy_from(x.get()); - auto d_b = Mtx::create(hip); - d_b->copy_from(b.get()); - auto bicg_factory = - gko::solver::Bicg<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(ref), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(ref)) - .on(ref); - auto d_bicg_factory = - gko::solver::Bicg<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(hip), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(hip)) - .on(hip); - auto solver = bicg_factory->generate(std::move(mtx)); - auto d_solver = d_bicg_factory->generate(std::move(d_mtx)); - - solver->apply(b.get(), x.get()); - d_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -TEST_F(Bicg, ApplyWithSuiteSparseMatrixIsEquivalentToRef) -{ - auto x = gen_mtx(36, 1); - auto b = gen_mtx(36, 1); - auto d_x = Mtx::create(hip); - d_x->copy_from(x.get()); - auto d_b = Mtx::create(hip); - d_b->copy_from(b.get()); - auto bicg_factory = - gko::solver::Bicg<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(ref), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(ref)) - .on(ref); - auto d_bicg_factory = - gko::solver::Bicg<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(hip), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(hip)) - .on(hip); - auto solver = bicg_factory->generate(std::move(csr_ref)); - auto d_solver = d_bicg_factory->generate(std::move(csr_hip)); - - solver->apply(b.get(), x.get()); - d_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -} // namespace diff --git a/hip/test/solver/bicgstab_kernels.cpp b/hip/test/solver/bicgstab_kernels.cpp deleted file mode 100644 index 999b40bebaa..00000000000 --- a/hip/test/solver/bicgstab_kernels.cpp +++ /dev/null @@ -1,357 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include -#include - - -#include "core/solver/bicgstab_kernels.hpp" -#include "hip/test/utils.hip.hpp" - - -namespace { - - -class Bicgstab : public ::testing::Test { -protected: - using Mtx = gko::matrix::Dense<>; - using Solver = gko::solver::Bicgstab<>; - - Bicgstab() : rand_engine(30) {} - - void SetUp() - { - ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); - ref = gko::ReferenceExecutor::create(); - hip = gko::HipExecutor::create(0, ref); - - mtx = gen_mtx(123, 123); - make_diag_dominant(mtx.get()); - d_mtx = Mtx::create(hip); - d_mtx->copy_from(mtx.get()); - - hip_bicgstab_factory = - Solver::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(246u).on(hip), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-15) - .on(hip)) - .on(hip); - ref_bicgstab_factory = - Solver::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(246u).on(ref), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-15) - .on(ref)) - .on(ref); - } - - void TearDown() - { - if (hip != nullptr) { - ASSERT_NO_THROW(hip->synchronize()); - } - } - - std::unique_ptr gen_mtx(int num_rows, int num_cols) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(num_cols, num_cols), - std::normal_distribution<>(0.0, 1.0), rand_engine, ref); - } - - void initialize_data() - { - int m = 597; - int n = 17; - x = gen_mtx(m, n); - b = gen_mtx(m, n); - r = gen_mtx(m, n); - z = gen_mtx(m, n); - p = gen_mtx(m, n); - rr = gen_mtx(m, n); - s = gen_mtx(m, n); - t = gen_mtx(m, n); - y = gen_mtx(m, n); - v = gen_mtx(m, n); - prev_rho = gen_mtx(1, n); - rho = gen_mtx(1, n); - alpha = gen_mtx(1, n); - beta = gen_mtx(1, n); - gamma = gen_mtx(1, n); - omega = gen_mtx(1, n); - stop_status = std::unique_ptr>( - new gko::Array(ref, n)); - for (size_t i = 0; i < n; ++i) { - stop_status->get_data()[i].reset(); - } - - d_x = Mtx::create(hip); - d_b = Mtx::create(hip); - d_r = Mtx::create(hip); - d_z = Mtx::create(hip); - d_p = Mtx::create(hip); - d_t = Mtx::create(hip); - d_s = Mtx::create(hip); - d_y = Mtx::create(hip); - d_v = Mtx::create(hip); - d_rr = Mtx::create(hip); - d_prev_rho = Mtx::create(hip); - d_rho = Mtx::create(hip); - d_alpha = Mtx::create(hip); - d_beta = Mtx::create(hip); - d_gamma = Mtx::create(hip); - d_omega = Mtx::create(hip); - d_stop_status = std::unique_ptr>( - new gko::Array(hip)); - - d_x->copy_from(x.get()); - d_b->copy_from(b.get()); - d_r->copy_from(r.get()); - d_z->copy_from(z.get()); - d_p->copy_from(p.get()); - d_v->copy_from(v.get()); - d_y->copy_from(y.get()); - d_t->copy_from(t.get()); - d_s->copy_from(s.get()); - d_rr->copy_from(rr.get()); - d_prev_rho->copy_from(prev_rho.get()); - d_rho->copy_from(rho.get()); - d_alpha->copy_from(alpha.get()); - d_beta->copy_from(beta.get()); - d_gamma->copy_from(gamma.get()); - d_omega->copy_from(omega.get()); - *d_stop_status = - *stop_status; // copy_from is not a public member function of Array - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - std::shared_ptr ref; - std::shared_ptr hip; - - std::ranlux48 rand_engine; - - std::shared_ptr mtx; - std::shared_ptr d_mtx; - std::unique_ptr hip_bicgstab_factory; - std::unique_ptr ref_bicgstab_factory; - - std::unique_ptr x; - std::unique_ptr b; - std::unique_ptr r; - std::unique_ptr z; - std::unique_ptr p; - std::unique_ptr rr; - std::unique_ptr s; - std::unique_ptr t; - std::unique_ptr y; - std::unique_ptr v; - std::unique_ptr prev_rho; - std::unique_ptr rho; - std::unique_ptr alpha; - std::unique_ptr beta; - std::unique_ptr gamma; - std::unique_ptr omega; - std::unique_ptr> stop_status; - - std::unique_ptr d_x; - std::unique_ptr d_b; - std::unique_ptr d_r; - std::unique_ptr d_z; - std::unique_ptr d_p; - std::unique_ptr d_t; - std::unique_ptr d_s; - std::unique_ptr d_y; - std::unique_ptr d_v; - std::unique_ptr d_rr; - std::unique_ptr d_prev_rho; - std::unique_ptr d_rho; - std::unique_ptr d_alpha; - std::unique_ptr d_beta; - std::unique_ptr d_gamma; - std::unique_ptr d_omega; - std::unique_ptr> d_stop_status; -}; - - -TEST_F(Bicgstab, HipBicgstabInitializeIsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::bicgstab::initialize( - ref, b.get(), r.get(), rr.get(), y.get(), s.get(), t.get(), z.get(), - v.get(), p.get(), prev_rho.get(), rho.get(), alpha.get(), beta.get(), - gamma.get(), omega.get(), stop_status.get()); - gko::kernels::hip::bicgstab::initialize( - hip, d_b.get(), d_r.get(), d_rr.get(), d_y.get(), d_s.get(), d_t.get(), - d_z.get(), d_v.get(), d_p.get(), d_prev_rho.get(), d_rho.get(), - d_alpha.get(), d_beta.get(), d_gamma.get(), d_omega.get(), - d_stop_status.get()); - - GKO_EXPECT_MTX_NEAR(d_r, r, 1e-14); - GKO_EXPECT_MTX_NEAR(d_z, z, 1e-14); - GKO_EXPECT_MTX_NEAR(d_p, p, 1e-14); - GKO_EXPECT_MTX_NEAR(d_y, y, 1e-14); - GKO_EXPECT_MTX_NEAR(d_t, t, 1e-14); - GKO_EXPECT_MTX_NEAR(d_s, s, 1e-14); - GKO_EXPECT_MTX_NEAR(d_rr, rr, 1e-14); - GKO_EXPECT_MTX_NEAR(d_v, v, 1e-14); - GKO_EXPECT_MTX_NEAR(d_prev_rho, prev_rho, 1e-14); - GKO_EXPECT_MTX_NEAR(d_rho, rho, 1e-14); - GKO_EXPECT_MTX_NEAR(d_alpha, alpha, 1e-14); - GKO_EXPECT_MTX_NEAR(d_beta, beta, 1e-14); - GKO_EXPECT_MTX_NEAR(d_gamma, gamma, 1e-14); - GKO_EXPECT_MTX_NEAR(d_omega, omega, 1e-14); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); -} - - -TEST_F(Bicgstab, HipBicgstabStep1IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::bicgstab::step_1( - ref, r.get(), p.get(), v.get(), rho.get(), prev_rho.get(), alpha.get(), - omega.get(), stop_status.get()); - gko::kernels::hip::bicgstab::step_1( - hip, d_r.get(), d_p.get(), d_v.get(), d_rho.get(), d_prev_rho.get(), - d_alpha.get(), d_omega.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); -} - - -TEST_F(Bicgstab, HipBicgstabStep2IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::bicgstab::step_2(ref, r.get(), s.get(), v.get(), - rho.get(), alpha.get(), - beta.get(), stop_status.get()); - gko::kernels::hip::bicgstab::step_2(hip, d_r.get(), d_s.get(), d_v.get(), - d_rho.get(), d_alpha.get(), - d_beta.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_alpha, alpha, 1e-14); - GKO_ASSERT_MTX_NEAR(d_s, s, 1e-14); -} - - -TEST_F(Bicgstab, HipBicgstabStep3IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::bicgstab::step_3( - ref, x.get(), r.get(), s.get(), t.get(), y.get(), z.get(), alpha.get(), - beta.get(), gamma.get(), omega.get(), stop_status.get()); - gko::kernels::hip::bicgstab::step_3( - hip, d_x.get(), d_r.get(), d_s.get(), d_t.get(), d_y.get(), d_z.get(), - d_alpha.get(), d_beta.get(), d_gamma.get(), d_omega.get(), - d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_omega, omega, 1e-14); - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); -} - - -TEST_F(Bicgstab, HipBicgstabApplyOneRHSIsEquivalentToRef) -{ - int m = 123; - int n = 1; - auto ref_solver = ref_bicgstab_factory->generate(mtx); - auto hip_solver = hip_bicgstab_factory->generate(d_mtx); - auto b = gen_mtx(m, n); - auto x = gen_mtx(m, n); - auto d_b = Mtx::create(hip); - auto d_x = Mtx::create(hip); - d_b->copy_from(b.get()); - d_x->copy_from(x.get()); - - ref_solver->apply(b.get(), x.get()); - hip_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); -} - - -TEST_F(Bicgstab, HipBicgstabApplyMultipleRHSIsEquivalentToRef) -{ - int m = 123; - int n = 16; - auto hip_solver = hip_bicgstab_factory->generate(d_mtx); - auto ref_solver = ref_bicgstab_factory->generate(mtx); - auto b = gen_mtx(m, n); - auto x = gen_mtx(m, n); - auto d_b = Mtx::create(hip); - auto d_x = Mtx::create(hip); - d_b->copy_from(b.get()); - d_x->copy_from(x.get()); - - ref_solver->apply(b.get(), x.get()); - hip_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); -} - - -} // namespace diff --git a/hip/test/solver/cb_gmres_kernels.cpp b/hip/test/solver/cb_gmres_kernels.cpp new file mode 100644 index 00000000000..b5114129935 --- /dev/null +++ b/hip/test/solver/cb_gmres_kernels.cpp @@ -0,0 +1,364 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/solver/cb_gmres_accessor.hpp" +#include "core/solver/cb_gmres_kernels.hpp" +#include "hip/test/utils.hip.hpp" + + +namespace { + + +class CbGmres : public ::testing::Test { +protected: + using value_type = double; + using storage_type = float; + using index_type = int; + using size_type = gko::size_type; + using Range3dHelper = + gko::cb_gmres::Range3dHelper; + using Range3d = typename Range3dHelper::Range; + using Dense = gko::matrix::Dense; + using Mtx = Dense; + static constexpr unsigned int default_krylov_dim_mixed{100}; + + CbGmres() : rand_engine(30) {} + + void SetUp() + { + ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + hip = gko::HipExecutor::create(0, ref); + } + + void TearDown() + { + if (hip != nullptr) { + ASSERT_NO_THROW(hip->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, ref); + } + + Range3dHelper generate_krylov_helper(gko::dim<3> size) + { + auto helper = Range3dHelper{ref, size}; + auto &bases = helper.get_bases(); + const auto num_rows = size[0] * size[1]; + const auto num_cols = size[2]; + auto temp_krylov_bases = gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution(num_cols, num_cols), + std::normal_distribution(-1.0, 1.0), rand_engine, + ref); + std::copy_n(temp_krylov_bases->get_const_values(), + bases.get_num_elems(), bases.get_data()); + // Only useful when the Accessor actually has a scale + auto range = helper.get_range(); + auto dist = std::normal_distribution(-1, 1); + for (size_type k = 0; k < size[0]; ++k) { + for (size_type i = 0; i < size[2]; ++i) { + gko::cb_gmres::helper_functions_accessor::write_scalar( + range, k, i, dist(rand_engine)); + } + } + return helper; + } + + void initialize_data() + { +#ifdef GINKGO_FAST_TESTS + int m = 123; +#else + int m = 597; +#endif + int n = 43; + x = gen_mtx(m, n); + y = gen_mtx(default_krylov_dim_mixed, n); + before_preconditioner = Mtx::create_with_config_of(x.get()); + b = gen_mtx(m, n); + arnoldi_norm = gen_mtx(3, n); + gko::dim<3> krylov_bases_dim(default_krylov_dim_mixed + 1, m, n); + range_helper = generate_krylov_helper(krylov_bases_dim); + + next_krylov_basis = gen_mtx(m, n); + hessenberg = + gen_mtx(default_krylov_dim_mixed + 1, default_krylov_dim_mixed * n); + hessenberg_iter = gen_mtx(default_krylov_dim_mixed + 1, n); + buffer_iter = gen_mtx(default_krylov_dim_mixed + 1, n); + residual = gen_mtx(m, n); + residual_norm = gen_mtx(1, n); + residual_norm_collection = gen_mtx(default_krylov_dim_mixed + 1, n); + givens_sin = gen_mtx(default_krylov_dim_mixed, n); + givens_cos = gen_mtx(default_krylov_dim_mixed, n); + stop_status = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { + stop_status->get_data()[i].reset(); + } + reorth_status = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < reorth_status->get_num_elems(); ++i) { + reorth_status->get_data()[i].reset(); + } + final_iter_nums = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < final_iter_nums->get_num_elems(); ++i) { + final_iter_nums->get_data()[i] = 5; + } + num_reorth = std::unique_ptr>( + new gko::Array(ref, n)); + for (size_t i = 0; i < num_reorth->get_num_elems(); ++i) { + num_reorth->get_data()[i] = 5; + } + + d_x = Mtx::create(hip); + d_x->copy_from(x.get()); + d_before_preconditioner = Mtx::create_with_config_of(d_x.get()); + d_y = Mtx::create(hip); + d_y->copy_from(y.get()); + d_b = Mtx::create(hip); + d_b->copy_from(b.get()); + d_arnoldi_norm = Mtx::create(hip); + d_arnoldi_norm->copy_from(arnoldi_norm.get()); + d_range_helper = Range3dHelper{hip, {}}; + d_range_helper = range_helper; + d_next_krylov_basis = Mtx::create(hip); + d_next_krylov_basis->copy_from(next_krylov_basis.get()); + d_hessenberg = Mtx::create(hip); + d_hessenberg->copy_from(hessenberg.get()); + d_hessenberg_iter = Mtx::create(hip); + d_hessenberg_iter->copy_from(hessenberg_iter.get()); + d_buffer_iter = Mtx::create(hip); + d_buffer_iter->copy_from(buffer_iter.get()); + d_residual = Mtx::create(hip); + d_residual->copy_from(residual.get()); + d_residual_norm = Mtx::create(hip); + d_residual_norm->copy_from(residual_norm.get()); + d_residual_norm_collection = Mtx::create(hip); + d_residual_norm_collection->copy_from(residual_norm_collection.get()); + d_givens_sin = Mtx::create(hip); + d_givens_sin->copy_from(givens_sin.get()); + d_givens_cos = Mtx::create(hip); + d_givens_cos->copy_from(givens_cos.get()); + d_stop_status = std::unique_ptr>( + new gko::Array(hip, n)); + *d_stop_status = *stop_status; + d_reorth_status = std::unique_ptr>( + new gko::Array(hip, n)); + *d_reorth_status = *reorth_status; + d_final_iter_nums = std::unique_ptr>( + new gko::Array(hip, n)); + *d_final_iter_nums = *final_iter_nums; + d_num_reorth = std::unique_ptr>( + new gko::Array(hip, n)); + *d_num_reorth = *num_reorth; + } + + void assert_krylov_bases_near() + { + gko::Array d_to_host{ref}; + auto &krylov_bases = range_helper.get_bases(); + d_to_host = d_range_helper.get_bases(); + const auto tolerance = r::value; + using std::abs; + for (gko::size_type i = 0; i < krylov_bases.get_num_elems(); ++i) { + const auto ref_value = krylov_bases.get_const_data()[i]; + const auto dev_value = d_to_host.get_const_data()[i]; + ASSERT_LE(abs(dev_value - ref_value), tolerance); + } + } + + std::shared_ptr ref; + std::shared_ptr hip; + + std::ranlux48 rand_engine; + + std::unique_ptr before_preconditioner; + std::unique_ptr x; + std::unique_ptr y; + std::unique_ptr b; + std::unique_ptr arnoldi_norm; + Range3dHelper range_helper; + std::unique_ptr next_krylov_basis; + std::unique_ptr hessenberg; + std::unique_ptr hessenberg_iter; + std::unique_ptr buffer_iter; + std::unique_ptr residual; + std::unique_ptr residual_norm; + std::unique_ptr residual_norm_collection; + std::unique_ptr givens_sin; + std::unique_ptr givens_cos; + std::unique_ptr> stop_status; + std::unique_ptr> reorth_status; + std::unique_ptr> final_iter_nums; + std::unique_ptr> num_reorth; + + std::unique_ptr d_x; + std::unique_ptr d_before_preconditioner; + std::unique_ptr d_y; + std::unique_ptr d_b; + std::unique_ptr d_arnoldi_norm; + Range3dHelper d_range_helper; + std::unique_ptr d_next_krylov_basis; + std::unique_ptr d_hessenberg; + std::unique_ptr d_hessenberg_iter; + std::unique_ptr d_buffer_iter; + std::unique_ptr d_residual; + std::unique_ptr d_residual_norm; + std::unique_ptr d_residual_norm_collection; + std::unique_ptr d_givens_sin; + std::unique_ptr d_givens_cos; + std::unique_ptr> d_stop_status; + std::unique_ptr> d_reorth_status; + std::unique_ptr> d_final_iter_nums; + std::unique_ptr> d_num_reorth; +}; + + +TEST_F(CbGmres, HipCbGmresInitialize1IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::cb_gmres::initialize_1( + ref, b.get(), residual.get(), givens_sin.get(), givens_cos.get(), + stop_status.get(), default_krylov_dim_mixed); + gko::kernels::hip::cb_gmres::initialize_1( + hip, d_b.get(), d_residual.get(), d_givens_sin.get(), + d_givens_cos.get(), d_stop_status.get(), default_krylov_dim_mixed); + + GKO_ASSERT_MTX_NEAR(d_residual, residual, 1e-14); + GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, 1e-14); + GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, 1e-14); + GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); +} + + +TEST_F(CbGmres, HipCbGmresInitialize2IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::cb_gmres::initialize_2( + ref, residual.get(), residual_norm.get(), + residual_norm_collection.get(), arnoldi_norm.get(), + range_helper.get_range(), next_krylov_basis.get(), + final_iter_nums.get(), default_krylov_dim_mixed); + gko::kernels::hip::cb_gmres::initialize_2( + hip, d_residual.get(), d_residual_norm.get(), + d_residual_norm_collection.get(), d_arnoldi_norm.get(), + d_range_helper.get_range(), d_next_krylov_basis.get(), + d_final_iter_nums.get(), default_krylov_dim_mixed); + + GKO_ASSERT_MTX_NEAR(d_arnoldi_norm, arnoldi_norm, 1e-14); + GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, 1e-14); + GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, + 1e-14); + assert_krylov_bases_near(); + GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); +} + + +TEST_F(CbGmres, HipCbGmresStep1IsEquivalentToRef) +{ + initialize_data(); + int iter = 5; + + gko::kernels::reference::cb_gmres::step_1( + ref, next_krylov_basis.get(), givens_sin.get(), givens_cos.get(), + residual_norm.get(), residual_norm_collection.get(), + range_helper.get_range(), hessenberg_iter.get(), buffer_iter.get(), + arnoldi_norm.get(), iter, final_iter_nums.get(), stop_status.get(), + reorth_status.get(), num_reorth.get()); + gko::kernels::hip::cb_gmres::step_1( + hip, d_next_krylov_basis.get(), d_givens_sin.get(), d_givens_cos.get(), + d_residual_norm.get(), d_residual_norm_collection.get(), + d_range_helper.get_range(), d_hessenberg_iter.get(), + d_buffer_iter.get(), d_arnoldi_norm.get(), iter, + d_final_iter_nums.get(), d_stop_status.get(), d_reorth_status.get(), + d_num_reorth.get()); + + GKO_ASSERT_MTX_NEAR(d_arnoldi_norm, arnoldi_norm, 1e-14); + GKO_ASSERT_MTX_NEAR(d_next_krylov_basis, next_krylov_basis, 1e-14); + GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, 1e-14); + GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, 1e-14); + GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, 1e-14); + GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, + 1e-14); + GKO_ASSERT_MTX_NEAR(d_hessenberg_iter, hessenberg_iter, 1e-14); + assert_krylov_bases_near(); + GKO_ASSERT_ARRAY_EQ(*d_final_iter_nums, *final_iter_nums); +} + + +TEST_F(CbGmres, HipCbGmresStep2IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::cb_gmres::step_2( + ref, residual_norm_collection.get(), + range_helper.get_range().get_accessor().to_const(), hessenberg.get(), + y.get(), before_preconditioner.get(), final_iter_nums.get()); + gko::kernels::hip::cb_gmres::step_2( + hip, d_residual_norm_collection.get(), + d_range_helper.get_range().get_accessor().to_const(), + d_hessenberg.get(), d_y.get(), d_before_preconditioner.get(), + d_final_iter_nums.get()); + + GKO_ASSERT_MTX_NEAR(d_y, y, 1e-14); + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); +} + + +} // namespace diff --git a/hip/test/solver/cg_kernels.cpp b/hip/test/solver/cg_kernels.cpp deleted file mode 100644 index db472f22000..00000000000 --- a/hip/test/solver/cg_kernels.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include - - -#include "core/solver/cg_kernels.hpp" -#include "hip/test/utils.hip.hpp" - - -namespace { - - -class Cg : public ::testing::Test { -protected: - using Mtx = gko::matrix::Dense<>; - Cg() : rand_engine(30) {} - - void SetUp() - { - ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); - ref = gko::ReferenceExecutor::create(); - hip = gko::HipExecutor::create(0, ref); - } - - void TearDown() - { - if (hip != nullptr) { - ASSERT_NO_THROW(hip->synchronize()); - } - } - - std::unique_ptr gen_mtx(int num_rows, int num_cols) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(num_cols, num_cols), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - void initialize_data() - { - int m = 597; - int n = 43; - b = gen_mtx(m, n); - r = gen_mtx(m, n); - z = gen_mtx(m, n); - p = gen_mtx(m, n); - q = gen_mtx(m, n); - x = gen_mtx(m, n); - beta = gen_mtx(1, n); - prev_rho = gen_mtx(1, n); - rho = gen_mtx(1, n); - stop_status = std::unique_ptr>( - new gko::Array(ref, n)); - for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { - stop_status->get_data()[i].reset(); - } - - d_b = Mtx::create(hip); - d_b->copy_from(b.get()); - d_r = Mtx::create(hip); - d_r->copy_from(r.get()); - d_z = Mtx::create(hip); - d_z->copy_from(z.get()); - d_p = Mtx::create(hip); - d_p->copy_from(p.get()); - d_q = Mtx::create(hip); - d_q->copy_from(q.get()); - d_x = Mtx::create(hip); - d_x->copy_from(x.get()); - d_beta = Mtx::create(hip); - d_beta->copy_from(beta.get()); - d_prev_rho = Mtx::create(hip); - d_prev_rho->copy_from(prev_rho.get()); - d_rho = Mtx::create(hip); - d_rho->copy_from(rho.get()); - d_stop_status = std::unique_ptr>( - new gko::Array(hip, n)); - *d_stop_status = *stop_status; - } - - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - - std::shared_ptr ref; - std::shared_ptr hip; - - std::ranlux48 rand_engine; - - std::unique_ptr b; - std::unique_ptr r; - std::unique_ptr z; - std::unique_ptr p; - std::unique_ptr q; - std::unique_ptr x; - std::unique_ptr beta; - std::unique_ptr prev_rho; - std::unique_ptr rho; - std::unique_ptr> stop_status; - - std::unique_ptr d_b; - std::unique_ptr d_r; - std::unique_ptr d_z; - std::unique_ptr d_p; - std::unique_ptr d_q; - std::unique_ptr d_x; - std::unique_ptr d_beta; - std::unique_ptr d_prev_rho; - std::unique_ptr d_rho; - std::unique_ptr> d_stop_status; -}; - - -TEST_F(Cg, HipCgInitializeIsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cg::initialize(ref, b.get(), r.get(), z.get(), - p.get(), q.get(), prev_rho.get(), - rho.get(), stop_status.get()); - gko::kernels::hip::cg::initialize(hip, d_b.get(), d_r.get(), d_z.get(), - d_p.get(), d_q.get(), d_prev_rho.get(), - d_rho.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z, z, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); - GKO_ASSERT_MTX_NEAR(d_prev_rho, prev_rho, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho, rho, 1e-14); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); -} - - -TEST_F(Cg, HipCgStep1IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cg::step_1(ref, p.get(), z.get(), rho.get(), - prev_rho.get(), stop_status.get()); - gko::kernels::hip::cg::step_1(hip, d_p.get(), d_z.get(), d_rho.get(), - d_prev_rho.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z, z, 1e-14); -} - - -TEST_F(Cg, HipCgStep2IsEquivalentToRef) -{ - initialize_data(); - gko::kernels::reference::cg::step_2(ref, x.get(), r.get(), p.get(), q.get(), - beta.get(), rho.get(), - stop_status.get()); - gko::kernels::hip::cg::step_2(hip, d_x.get(), d_r.get(), d_p.get(), - d_q.get(), d_beta.get(), d_rho.get(), - d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); -} - - -TEST_F(Cg, ApplyIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = Mtx::create(hip); - d_mtx->copy_from(mtx.get()); - auto d_x = Mtx::create(hip); - d_x->copy_from(x.get()); - auto d_b = Mtx::create(hip); - d_b->copy_from(b.get()); - auto cg_factory = - gko::solver::Cg<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(ref), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(ref)) - .on(ref); - auto d_cg_factory = - gko::solver::Cg<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(hip), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(hip)) - .on(hip); - auto solver = cg_factory->generate(std::move(mtx)); - auto d_solver = d_cg_factory->generate(std::move(d_mtx)); - - solver->apply(b.get(), x.get()); - d_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -} // namespace diff --git a/hip/test/solver/cgs_kernels.cpp b/hip/test/solver/cgs_kernels.cpp deleted file mode 100644 index ff676c2dffc..00000000000 --- a/hip/test/solver/cgs_kernels.cpp +++ /dev/null @@ -1,349 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include - - -#include "core/solver/cgs_kernels.hpp" -#include "hip/test/utils.hip.hpp" - - -namespace { - - -class Cgs : public ::testing::Test { -protected: - using Mtx = gko::matrix::Dense<>; - using Solver = gko::solver::Cgs<>; - - Cgs() : rand_engine(30) {} - - void SetUp() - { - ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); - ref = gko::ReferenceExecutor::create(); - hip = gko::HipExecutor::create(0, ref); - - mtx = gen_mtx(123, 123); - make_diag_dominant(mtx.get()); - d_mtx = Mtx::create(hip); - d_mtx->copy_from(mtx.get()); - hip_cgs_factory = - Solver::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(246u).on(hip), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-15) - .on(hip)) - .on(hip); - ref_cgs_factory = - Solver::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(246u).on(ref), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-15) - .on(ref)) - .on(ref); - } - - void TearDown() - { - if (hip != nullptr) { - ASSERT_NO_THROW(hip->synchronize()); - } - } - - std::unique_ptr gen_mtx(int num_rows, int num_cols) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(num_cols, num_cols), - std::normal_distribution<>(0.0, 1.0), rand_engine, ref); - } - - void initialize_data() - { - int m = 597; - int n = 43; - b = gen_mtx(m, n); - r = gen_mtx(m, n); - r_tld = gen_mtx(m, n); - p = gen_mtx(m, n); - q = gen_mtx(m, n); - u = gen_mtx(m, n); - u_hat = gen_mtx(m, n); - v_hat = gen_mtx(m, n); - t = gen_mtx(m, n); - x = gen_mtx(m, n); - alpha = gen_mtx(1, n); - beta = gen_mtx(1, n); - gamma = gen_mtx(1, n); - rho = gen_mtx(1, n); - rho_prev = gen_mtx(1, n); - stop_status = std::unique_ptr>( - new gko::Array(ref, n)); - for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { - stop_status->get_data()[i].reset(); - } - - d_b = Mtx::create(hip); - d_b->copy_from(b.get()); - d_r = Mtx::create(hip); - d_r->copy_from(r.get()); - d_r_tld = Mtx::create(hip); - d_r_tld->copy_from(r_tld.get()); - d_p = Mtx::create(hip); - d_p->copy_from(p.get()); - d_q = Mtx::create(hip); - d_q->copy_from(q.get()); - d_u = Mtx::create(hip); - d_u->copy_from(u.get()); - d_u_hat = Mtx::create(hip); - d_u_hat->copy_from(u_hat.get()); - d_v_hat = Mtx::create(hip); - d_v_hat->copy_from(v_hat.get()); - d_t = Mtx::create(hip); - d_t->copy_from(t.get()); - d_x = Mtx::create(hip); - d_x->copy_from(x.get()); - d_alpha = Mtx::create(hip); - d_alpha->copy_from(alpha.get()); - d_beta = Mtx::create(hip); - d_beta->copy_from(beta.get()); - d_gamma = Mtx::create(hip); - d_gamma->copy_from(gamma.get()); - d_rho_prev = Mtx::create(hip); - d_rho_prev->copy_from(rho_prev.get()); - d_rho = Mtx::create(hip); - d_rho->copy_from(rho.get()); - d_stop_status = std::unique_ptr>( - new gko::Array(hip, n)); - // because there is no public function copy_from, use overloaded = - // operator - *d_stop_status = *stop_status; - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - std::shared_ptr ref; - std::shared_ptr hip; - - std::ranlux48 rand_engine; - - std::shared_ptr mtx; - std::shared_ptr d_mtx; - std::unique_ptr hip_cgs_factory; - std::unique_ptr ref_cgs_factory; - - std::unique_ptr b; - std::unique_ptr r; - std::unique_ptr r_tld; - std::unique_ptr t; - std::unique_ptr p; - std::unique_ptr q; - std::unique_ptr u; - std::unique_ptr u_hat; - std::unique_ptr v_hat; - std::unique_ptr x; - std::unique_ptr alpha; - std::unique_ptr beta; - std::unique_ptr gamma; - std::unique_ptr rho; - std::unique_ptr rho_prev; - std::unique_ptr> stop_status; - - std::unique_ptr d_b; - std::unique_ptr d_r; - std::unique_ptr d_r_tld; - std::unique_ptr d_t; - std::unique_ptr d_p; - std::unique_ptr d_q; - std::unique_ptr d_u; - std::unique_ptr d_u_hat; - std::unique_ptr d_v_hat; - std::unique_ptr d_x; - std::unique_ptr d_alpha; - std::unique_ptr d_beta; - std::unique_ptr d_gamma; - std::unique_ptr d_rho; - std::unique_ptr d_rho_prev; - std::unique_ptr> d_stop_status; -}; - - -TEST_F(Cgs, HipCgsInitializeIsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cgs::initialize( - ref, b.get(), r.get(), r_tld.get(), p.get(), q.get(), u.get(), - u_hat.get(), v_hat.get(), t.get(), alpha.get(), beta.get(), gamma.get(), - rho_prev.get(), rho.get(), stop_status.get()); - gko::kernels::hip::cgs::initialize( - hip, d_b.get(), d_r.get(), d_r_tld.get(), d_p.get(), d_q.get(), - d_u.get(), d_u_hat.get(), d_v_hat.get(), d_t.get(), d_alpha.get(), - d_beta.get(), d_gamma.get(), d_rho_prev.get(), d_rho.get(), - d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r_tld, r_tld, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); - GKO_ASSERT_MTX_NEAR(d_u, u, 1e-14); - GKO_ASSERT_MTX_NEAR(d_t, t, 1e-14); - GKO_ASSERT_MTX_NEAR(d_u_hat, u_hat, 1e-14); - GKO_ASSERT_MTX_NEAR(d_v_hat, v_hat, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho_prev, rho_prev, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho, rho, 1e-14); - GKO_ASSERT_MTX_NEAR(d_alpha, alpha, 1e-14); - GKO_ASSERT_MTX_NEAR(d_beta, beta, 1e-14); - GKO_ASSERT_MTX_NEAR(d_gamma, gamma, 1e-14); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); -} - - -TEST_F(Cgs, HipCgsStep1IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cgs::step_1(ref, r.get(), u.get(), p.get(), - q.get(), beta.get(), rho.get(), - rho_prev.get(), stop_status.get()); - gko::kernels::hip::cgs::step_1(hip, d_r.get(), d_u.get(), d_p.get(), - d_q.get(), d_beta.get(), d_rho.get(), - d_rho_prev.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_beta, beta, 1e-14); - GKO_ASSERT_MTX_NEAR(d_u, u, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); -} - - -TEST_F(Cgs, HipCgsStep2IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cgs::step_2(ref, u.get(), v_hat.get(), q.get(), - t.get(), alpha.get(), rho.get(), - gamma.get(), stop_status.get()); - gko::kernels::hip::cgs::step_2(hip, d_u.get(), d_v_hat.get(), d_q.get(), - d_t.get(), d_alpha.get(), d_rho.get(), - d_gamma.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_alpha, alpha, 1e-14); - GKO_ASSERT_MTX_NEAR(d_t, t, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); -} - - -TEST_F(Cgs, HipCgsStep3IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::cgs::step_3(ref, t.get(), u_hat.get(), r.get(), - x.get(), alpha.get(), - stop_status.get()); - gko::kernels::hip::cgs::step_3(hip, d_t.get(), d_u_hat.get(), d_r.get(), - d_x.get(), d_alpha.get(), - d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); -} - - -TEST_F(Cgs, HipCgsApplyOneRHSIsEquivalentToRef) -{ - int m = 123; - int n = 1; - auto ref_solver = ref_cgs_factory->generate(mtx); - auto hip_solver = hip_cgs_factory->generate(d_mtx); - auto b = gen_mtx(m, n); - auto x = gen_mtx(m, n); - auto d_b = Mtx::create(hip); - auto d_x = Mtx::create(hip); - d_b->copy_from(b.get()); - d_x->copy_from(x.get()); - - ref_solver->apply(b.get(), x.get()); - hip_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); -} - - -TEST_F(Cgs, HipCgsApplyMultipleRHSIsEquivalentToRef) -{ - int m = 123; - int n = 16; - auto hip_solver = hip_cgs_factory->generate(d_mtx); - auto ref_solver = ref_cgs_factory->generate(mtx); - auto b = gen_mtx(m, n); - auto x = gen_mtx(m, n); - auto d_b = Mtx::create(hip); - auto d_x = Mtx::create(hip); - d_b->copy_from(b.get()); - d_x->copy_from(x.get()); - - ref_solver->apply(b.get(), x.get()); - hip_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); -} - -} // namespace diff --git a/hip/test/solver/fcg_kernels.cpp b/hip/test/solver/fcg_kernels.cpp deleted file mode 100644 index 7771cf9b03c..00000000000 --- a/hip/test/solver/fcg_kernels.cpp +++ /dev/null @@ -1,285 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include - - -#include "core/solver/fcg_kernels.hpp" -#include "hip/test/utils.hip.hpp" - - -namespace { - - -class Fcg : public ::testing::Test { -protected: - using Mtx = gko::matrix::Dense<>; - using Solver = gko::solver::Fcg<>; - - Fcg() : rand_engine(30) {} - - void SetUp() - { - ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); - ref = gko::ReferenceExecutor::create(); - hip = gko::HipExecutor::create(0, ref); - } - - void TearDown() - { - if (hip != nullptr) { - ASSERT_NO_THROW(hip->synchronize()); - } - } - - std::unique_ptr gen_mtx(int num_rows, int num_cols) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(num_cols, num_cols), - std::normal_distribution<>(0.0, 1.0), rand_engine, ref); - } - - void initialize_data() - { - int m = 597; - int n = 43; - b = gen_mtx(m, n); - r = gen_mtx(m, n); - t = gen_mtx(m, n); - z = gen_mtx(m, n); - p = gen_mtx(m, n); - q = gen_mtx(m, n); - x = gen_mtx(m, n); - beta = gen_mtx(1, n); - prev_rho = gen_mtx(1, n); - rho = gen_mtx(1, n); - rho_t = gen_mtx(1, n); - stop_status = std::unique_ptr>( - new gko::Array(ref, n)); - for (size_t i = 0; i < stop_status->get_num_elems(); ++i) { - stop_status->get_data()[i].reset(); - } - - d_b = Mtx::create(hip); - d_b->copy_from(b.get()); - d_r = Mtx::create(hip); - d_r->copy_from(r.get()); - d_t = Mtx::create(hip); - d_t->copy_from(t.get()); - d_z = Mtx::create(hip); - d_z->copy_from(z.get()); - d_p = Mtx::create(hip); - d_p->copy_from(p.get()); - d_q = Mtx::create(hip); - d_q->copy_from(q.get()); - d_x = Mtx::create(hip); - d_x->copy_from(x.get()); - d_beta = Mtx::create(hip); - d_beta->copy_from(beta.get()); - d_prev_rho = Mtx::create(hip); - d_prev_rho->copy_from(prev_rho.get()); - d_rho_t = Mtx::create(hip); - d_rho_t->copy_from(rho_t.get()); - d_rho = Mtx::create(hip); - d_rho->copy_from(rho.get()); - d_stop_status = std::unique_ptr>( - new gko::Array(hip, n)); - *d_stop_status = *stop_status; - } - - void make_symetric(Mtx *mtx) - { - for (int i = 0; i < mtx->get_size()[0]; ++i) { - for (int j = i + 1; j < mtx->get_size()[1]; ++j) { - mtx->at(i, j) = mtx->at(j, i); - } - } - } - - void make_diag_dominant(Mtx *mtx) - { - using std::abs; - for (int i = 0; i < mtx->get_size()[0]; ++i) { - auto sum = gko::zero(); - for (int j = 0; j < mtx->get_size()[1]; ++j) { - sum += abs(mtx->at(i, j)); - } - mtx->at(i, i) = sum; - } - } - - void make_spd(Mtx *mtx) - { - make_symetric(mtx); - make_diag_dominant(mtx); - } - - std::shared_ptr ref; - std::shared_ptr hip; - - std::ranlux48 rand_engine; - - std::unique_ptr b; - std::unique_ptr r; - std::unique_ptr t; - std::unique_ptr z; - std::unique_ptr p; - std::unique_ptr q; - std::unique_ptr x; - std::unique_ptr beta; - std::unique_ptr prev_rho; - std::unique_ptr rho; - std::unique_ptr rho_t; - std::unique_ptr> stop_status; - - std::unique_ptr d_b; - std::unique_ptr d_r; - std::unique_ptr d_t; - std::unique_ptr d_z; - std::unique_ptr d_p; - std::unique_ptr d_q; - std::unique_ptr d_x; - std::unique_ptr d_beta; - std::unique_ptr d_prev_rho; - std::unique_ptr d_rho; - std::unique_ptr d_rho_t; - std::unique_ptr> d_stop_status; -}; - - -TEST_F(Fcg, HipFcgInitializeIsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::fcg::initialize( - ref, b.get(), r.get(), z.get(), p.get(), q.get(), t.get(), - prev_rho.get(), rho.get(), rho_t.get(), stop_status.get()); - gko::kernels::hip::fcg::initialize( - hip, d_b.get(), d_r.get(), d_z.get(), d_p.get(), d_q.get(), d_t.get(), - d_prev_rho.get(), d_rho.get(), d_rho_t.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_t, t, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z, z, 1e-14); - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_q, q, 1e-14); - GKO_ASSERT_MTX_NEAR(d_prev_rho, prev_rho, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho, rho, 1e-14); - GKO_ASSERT_MTX_NEAR(d_rho_t, rho_t, 1e-14); - GKO_ASSERT_ARRAY_EQ(*d_stop_status, *stop_status); -} - - -TEST_F(Fcg, HipFcgStep1IsEquivalentToRef) -{ - initialize_data(); - - gko::kernels::reference::fcg::step_1(ref, p.get(), z.get(), rho_t.get(), - prev_rho.get(), stop_status.get()); - gko::kernels::hip::fcg::step_1(hip, d_p.get(), d_z.get(), d_rho_t.get(), - d_prev_rho.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_p, p, 1e-14); - GKO_ASSERT_MTX_NEAR(d_z, z, 1e-14); -} - - -TEST_F(Fcg, HipFcgStep2IsEquivalentToRef) -{ - initialize_data(); - gko::kernels::reference::fcg::step_2(ref, x.get(), r.get(), t.get(), - p.get(), q.get(), beta.get(), - rho.get(), stop_status.get()); - gko::kernels::hip::fcg::step_2(hip, d_x.get(), d_r.get(), d_t.get(), - d_p.get(), d_q.get(), d_beta.get(), - d_rho.get(), d_stop_status.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); - GKO_ASSERT_MTX_NEAR(d_r, r, 1e-14); - GKO_ASSERT_MTX_NEAR(d_t, t, 1e-14); -} - - -TEST_F(Fcg, ApplyIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - make_spd(mtx.get()); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = Mtx::create(hip); - d_mtx->copy_from(mtx.get()); - auto d_x = Mtx::create(hip); - d_x->copy_from(x.get()); - auto d_b = Mtx::create(hip); - d_b->copy_from(b.get()); - auto fcg_factory = - Solver::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(ref), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(ref)) - .on(ref); - auto d_fcg_factory = - Solver::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(50u).on(hip), - gko::stop::ResidualNormReduction<>::build() - .with_reduction_factor(1e-14) - .on(hip)) - .on(hip); - auto solver = fcg_factory->generate(std::move(mtx)); - auto d_solver = d_fcg_factory->generate(std::move(d_mtx)); - - solver->apply(b.get(), x.get()); - d_solver->apply(d_b.get(), d_x.get()); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -} // namespace diff --git a/hip/test/solver/gmres_kernels.cpp b/hip/test/solver/gmres_kernels.cpp index d16c781cb1e..42a9668e325 100644 --- a/hip/test/solver/gmres_kernels.cpp +++ b/hip/test/solver/gmres_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -93,7 +93,11 @@ class Gmres : public ::testing::Test { void initialize_data(int nrhs = 43) { +#ifdef GINKGO_FAST_TESTS + int m = 123; +#else int m = 597; +#endif x = gen_mtx(m, nrhs); y = gen_mtx(gko::solver::default_krylov_dim, nrhs); before_preconditioner = Mtx::create_with_config_of(x.get()); diff --git a/hip/test/solver/idr_kernels.cpp b/hip/test/solver/idr_kernels.cpp new file mode 100644 index 00000000000..4f9cebeefa0 --- /dev/null +++ b/hip/test/solver/idr_kernels.cpp @@ -0,0 +1,375 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/solver/idr_kernels.hpp" +#include "core/test/utils.hpp" + + +namespace { + + +class Idr : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense<>; + using Solver = gko::solver::Idr<>; + + Idr() : rand_engine(30) {} + + void SetUp() + { + ref = gko::ReferenceExecutor::create(); + hip = gko::HipExecutor::create(0, ref); + + hip_idr_factory = + Solver::build() + .with_deterministic(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(hip)) + .on(hip); + + ref_idr_factory = + Solver::build() + .with_deterministic(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(ref)) + .on(ref); + } + + void TearDown() + { + if (hip != nullptr) { + ASSERT_NO_THROW(hip->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(0.0, 1.0), rand_engine, ref); + } + + void initialize_data(int size = 597, int input_nrhs = 17) + { + nrhs = input_nrhs; + int s = 4; + mtx = gen_mtx(size, size); + x = gen_mtx(size, nrhs); + b = gen_mtx(size, nrhs); + r = gen_mtx(size, nrhs); + m = gen_mtx(s, nrhs * s); + f = gen_mtx(s, nrhs); + g = gen_mtx(size, nrhs * s); + u = gen_mtx(size, nrhs * s); + c = gen_mtx(s, nrhs); + v = gen_mtx(size, nrhs); + p = gen_mtx(s, size); + alpha = gen_mtx(1, nrhs); + omega = gen_mtx(1, nrhs); + tht = gen_mtx(1, nrhs); + residual_norm = gen_mtx(1, nrhs); + stop_status = std::unique_ptr>( + new gko::Array(ref, nrhs)); + for (size_t i = 0; i < nrhs; ++i) { + stop_status->get_data()[i].reset(); + } + + d_mtx = Mtx::create(hip); + d_x = Mtx::create(hip); + d_b = Mtx::create(hip); + d_r = Mtx::create(hip); + d_m = Mtx::create(hip); + d_f = Mtx::create(hip); + d_g = Mtx::create(hip); + d_u = Mtx::create(hip); + d_c = Mtx::create(hip); + d_v = Mtx::create(hip); + d_p = Mtx::create(hip); + d_alpha = Mtx::create(hip); + d_omega = Mtx::create(hip); + d_tht = Mtx::create(hip); + d_residual_norm = Mtx::create(hip); + d_stop_status = std::unique_ptr>( + new gko::Array(hip)); + + d_mtx->copy_from(mtx.get()); + d_x->copy_from(x.get()); + d_b->copy_from(b.get()); + d_r->copy_from(r.get()); + d_m->copy_from(m.get()); + d_f->copy_from(f.get()); + d_g->copy_from(g.get()); + d_u->copy_from(u.get()); + d_c->copy_from(c.get()); + d_v->copy_from(v.get()); + d_p->copy_from(p.get()); + d_alpha->copy_from(alpha.get()); + d_omega->copy_from(omega.get()); + d_tht->copy_from(tht.get()); + d_residual_norm->copy_from(residual_norm.get()); + *d_stop_status = + *stop_status; // copy_from is not a public member function of Array + } + + std::shared_ptr ref; + std::shared_ptr hip; + + std::ranlux48 rand_engine; + + std::shared_ptr mtx; + std::shared_ptr d_mtx; + std::unique_ptr hip_idr_factory; + std::unique_ptr ref_idr_factory; + + gko::size_type nrhs; + + std::unique_ptr x; + std::unique_ptr b; + std::unique_ptr r; + std::unique_ptr m; + std::unique_ptr f; + std::unique_ptr g; + std::unique_ptr u; + std::unique_ptr c; + std::unique_ptr v; + std::unique_ptr p; + std::unique_ptr alpha; + std::unique_ptr omega; + std::unique_ptr tht; + std::unique_ptr residual_norm; + std::unique_ptr> stop_status; + + std::unique_ptr d_x; + std::unique_ptr d_b; + std::unique_ptr d_r; + std::unique_ptr d_m; + std::unique_ptr d_f; + std::unique_ptr d_g; + std::unique_ptr d_u; + std::unique_ptr d_c; + std::unique_ptr d_v; + std::unique_ptr d_p; + std::unique_ptr d_alpha; + std::unique_ptr d_omega; + std::unique_ptr d_tht; + std::unique_ptr d_residual_norm; + std::unique_ptr> d_stop_status; +}; + + +TEST_F(Idr, IdrInitializeIsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::idr::initialize(ref, nrhs, m.get(), p.get(), true, + stop_status.get()); + gko::kernels::hip::idr::initialize(hip, nrhs, d_m.get(), d_p.get(), true, + d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(m, d_m, 1e-14); + GKO_ASSERT_MTX_NEAR(p, d_p, 1e-14); +} + + +TEST_F(Idr, IdrStep1IsEquivalentToRef) +{ + initialize_data(); + + gko::size_type k = 2; + gko::kernels::reference::idr::step_1(ref, nrhs, k, m.get(), f.get(), + r.get(), g.get(), c.get(), v.get(), + stop_status.get()); + gko::kernels::hip::idr::step_1(hip, nrhs, k, d_m.get(), d_f.get(), + d_r.get(), d_g.get(), d_c.get(), d_v.get(), + d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(c, d_c, 1e-14); + GKO_ASSERT_MTX_NEAR(v, d_v, 1e-14); +} + + +TEST_F(Idr, IdrStep2IsEquivalentToRef) +{ + initialize_data(); + + gko::size_type k = 2; + gko::kernels::reference::idr::step_2(ref, nrhs, k, omega.get(), v.get(), + c.get(), u.get(), stop_status.get()); + gko::kernels::hip::idr::step_2(hip, nrhs, k, d_omega.get(), d_v.get(), + d_c.get(), d_u.get(), d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(u, d_u, 1e-14); +} + + +TEST_F(Idr, IdrStep3IsEquivalentToRef) +{ + initialize_data(); + + gko::size_type k = 2; + gko::kernels::reference::idr::step_3( + ref, nrhs, k, p.get(), g.get(), v.get(), u.get(), m.get(), f.get(), + alpha.get(), r.get(), x.get(), stop_status.get()); + gko::kernels::hip::idr::step_3( + hip, nrhs, k, d_p.get(), d_g.get(), d_v.get(), d_u.get(), d_m.get(), + d_f.get(), d_alpha.get(), d_r.get(), d_x.get(), d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(g, d_g, 1e-14); + GKO_ASSERT_MTX_NEAR(v, d_v, 1e-14); + GKO_ASSERT_MTX_NEAR(u, d_u, 1e-14); + GKO_ASSERT_MTX_NEAR(m, d_m, 1e-14); + GKO_ASSERT_MTX_NEAR(f, d_f, 1e-14); + GKO_ASSERT_MTX_NEAR(r, d_r, 1e-14); + GKO_ASSERT_MTX_NEAR(x, d_x, 1e-14); +} + + +TEST_F(Idr, IdrComputeOmegaIsEquivalentToRef) +{ + initialize_data(); + + double kappa = 0.7; + gko::kernels::reference::idr::compute_omega(ref, nrhs, kappa, tht.get(), + residual_norm.get(), + omega.get(), stop_status.get()); + gko::kernels::hip::idr::compute_omega(hip, nrhs, kappa, d_tht.get(), + d_residual_norm.get(), d_omega.get(), + d_stop_status.get()); + + GKO_ASSERT_MTX_NEAR(omega, d_omega, 1e-14); +} + + +TEST_F(Idr, IdrIterationOneRHSIsEquivalentToRef) +{ + initialize_data(123, 1); + auto ref_solver = ref_idr_factory->generate(mtx); + auto hip_solver = hip_idr_factory->generate(d_mtx); + + ref_solver->apply(b.get(), x.get()); + hip_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); +} + + +TEST_F(Idr, IdrIterationWithComplexSubspaceOneRHSIsEquivalentToRef) +{ + initialize_data(123, 1); + hip_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(hip)) + .on(hip); + ref_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(ref)) + .on(ref); + auto ref_solver = ref_idr_factory->generate(mtx); + auto hip_solver = hip_idr_factory->generate(d_mtx); + + ref_solver->apply(b.get(), x.get()); + hip_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); +} + + +TEST_F(Idr, IdrIterationMultipleRHSIsEquivalentToRef) +{ + initialize_data(123, 16); + auto hip_solver = hip_idr_factory->generate(d_mtx); + auto ref_solver = ref_idr_factory->generate(mtx); + + ref_solver->apply(b.get(), x.get()); + hip_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, 1e-12); + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-12); +} + + +TEST_F(Idr, IdrIterationWithComplexSubspaceMultipleRHSIsEquivalentToRef) +{ + initialize_data(123, 16); + hip_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(hip)) + .on(hip); + ref_idr_factory = + Solver::build() + .with_deterministic(true) + .with_complex_subspace(true) + .with_criteria( + gko::stop::Iteration::build().with_max_iters(1u).on(ref)) + .on(ref); + auto hip_solver = hip_idr_factory->generate(d_mtx); + auto ref_solver = ref_idr_factory->generate(mtx); + + ref_solver->apply(b.get(), x.get()); + hip_solver->apply(d_b.get(), d_x.get()); + + GKO_ASSERT_MTX_NEAR(d_b, b, 1e-13); + GKO_ASSERT_MTX_NEAR(d_x, x, 1e-13); +} + + +} // namespace diff --git a/hip/test/solver/ir_kernels.cpp b/hip/test/solver/ir_kernels.cpp deleted file mode 100644 index 0e5791cd7cf..00000000000 --- a/hip/test/solver/ir_kernels.cpp +++ /dev/null @@ -1,259 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -#include - - -#include -#include -#include -#include -#include -#include - - -#include "core/solver/ir_kernels.hpp" -#include "hip/test/utils.hip.hpp" - - -namespace { - - -class Ir : public ::testing::Test { -protected: - using Mtx = gko::matrix::Dense<>; - Ir() : rand_engine(30) {} - - void SetUp() - { - ref = gko::ReferenceExecutor::create(); - hip = gko::HipExecutor::create(0, ref); - } - - void TearDown() - { - if (hip != nullptr) { - ASSERT_NO_THROW(hip->synchronize()); - } - } - - std::unique_ptr gen_mtx(int num_rows, int num_cols) - { - return gko::test::generate_random_matrix( - num_rows, num_cols, - std::uniform_int_distribution<>(num_cols, num_cols), - std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); - } - - std::shared_ptr ref; - std::shared_ptr hip; - - std::ranlux48 rand_engine; -}; - - -TEST_F(Ir, InitializeIsEquivalentToRef) -{ - auto stop_status = gko::Array(ref, 43); - for (size_t i = 0; i < stop_status.get_num_elems(); ++i) { - stop_status.get_data()[i].reset(); - } - auto d_stop_status = gko::Array(hip, stop_status); - - gko::kernels::reference::ir::initialize(ref, &stop_status); - gko::kernels::hip::ir::initialize(hip, &d_stop_status); - - auto tmp = gko::Array(ref, d_stop_status); - for (int i = 0; i < stop_status.get_num_elems(); ++i) { - ASSERT_EQ(stop_status.get_const_data()[i], tmp.get_const_data()[i]); - } -} - - -TEST_F(Ir, ApplyIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = clone(hip, mtx); - auto d_x = clone(hip, x); - auto d_b = clone(hip, b); - // Forget about accuracy - Richardson is not going to converge for a random - // matrix, just check that a couple of iterations gives the same result on - // both executors - auto ir_factory = - gko::solver::Ir<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(ref)) - .on(ref); - auto d_ir_factory = - gko::solver::Ir<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(hip)) - .on(hip); - auto solver = ir_factory->generate(std::move(mtx)); - auto d_solver = d_ir_factory->generate(std::move(d_mtx)); - - solver->apply(lend(b), lend(x)); - d_solver->apply(lend(d_b), lend(d_x)); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -TEST_F(Ir, ApplyWithIterativeInnerSolverIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = clone(hip, mtx); - auto d_x = clone(hip, x); - auto d_b = clone(hip, b); - - auto ir_factory = - gko::solver::Ir<>::build() - .with_solver( - gko::solver::Gmres<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(1u).on( - ref)) - .on(ref)) - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(ref)) - .on(ref); - auto d_ir_factory = - gko::solver::Ir<>::build() - .with_solver( - gko::solver::Gmres<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(1u).on( - hip)) - .on(hip)) - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(hip)) - .on(hip); - auto solver = ir_factory->generate(std::move(mtx)); - auto d_solver = d_ir_factory->generate(std::move(d_mtx)); - - solver->apply(lend(b), lend(x)); - d_solver->apply(lend(d_b), lend(d_x)); - - // Note: 1e-12 instead of 1e-14, as the difference in the inner gmres - // iteration gets amplified by the difference in IR. - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-12); -} - - -TEST_F(Ir, RichardsonApplyIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = clone(hip, mtx); - auto d_x = clone(hip, x); - auto d_b = clone(hip, b); - // Forget about accuracy - Richardson is not going to converge for a random - // matrix, just check that a couple of iterations gives the same result on - // both executors - auto ir_factory = - gko::solver::Ir<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(ref)) - .with_relaxation_factor(0.9) - .on(ref); - auto d_ir_factory = - gko::solver::Ir<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(hip)) - .with_relaxation_factor(0.9) - .on(hip); - auto solver = ir_factory->generate(std::move(mtx)); - auto d_solver = d_ir_factory->generate(std::move(d_mtx)); - - solver->apply(lend(b), lend(x)); - d_solver->apply(lend(d_b), lend(d_x)); - - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14); -} - - -TEST_F(Ir, RichardsonApplyWithIterativeInnerSolverIsEquivalentToRef) -{ - auto mtx = gen_mtx(50, 50); - auto x = gen_mtx(50, 3); - auto b = gen_mtx(50, 3); - auto d_mtx = clone(hip, mtx); - auto d_x = clone(hip, x); - auto d_b = clone(hip, b); - auto ir_factory = - gko::solver::Ir<>::build() - .with_solver( - gko::solver::Gmres<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(1u).on( - ref)) - .on(ref)) - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(ref)) - .with_relaxation_factor(0.9) - .on(ref); - auto d_ir_factory = - gko::solver::Ir<>::build() - .with_solver( - gko::solver::Gmres<>::build() - .with_criteria( - gko::stop::Iteration::build().with_max_iters(1u).on( - hip)) - .on(hip)) - .with_criteria( - gko::stop::Iteration::build().with_max_iters(2u).on(hip)) - .with_relaxation_factor(0.9) - .on(hip); - auto solver = ir_factory->generate(std::move(mtx)); - auto d_solver = d_ir_factory->generate(std::move(d_mtx)); - - solver->apply(lend(b), lend(x)); - d_solver->apply(lend(d_b), lend(d_x)); - - // Note: 1e-12 instead of 1e-14, as the difference in the inner gmres - // iteration gets amplified by the difference in IR. - GKO_ASSERT_MTX_NEAR(d_x, x, 1e-12); -} - - -} // namespace diff --git a/hip/test/solver/lower_trs_kernels.cpp b/hip/test/solver/lower_trs_kernels.cpp index b497b525020..3328984a901 100644 --- a/hip/test/solver/lower_trs_kernels.cpp +++ b/hip/test/solver/lower_trs_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/solver/upper_trs_kernels.cpp b/hip/test/solver/upper_trs_kernels.cpp index ba55bc6325c..fef330d9046 100644 --- a/hip/test/solver/upper_trs_kernels.cpp +++ b/hip/test/solver/upper_trs_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/stop/CMakeLists.txt b/hip/test/stop/CMakeLists.txt index 844f7037768..eb3fa564d29 100644 --- a/hip/test/stop/CMakeLists.txt +++ b/hip/test/stop/CMakeLists.txt @@ -1,2 +1,2 @@ ginkgo_create_hip_test(criterion_kernels) -ginkgo_create_hip_test_special_linkage(residual_norm_kernels) +ginkgo_create_test(residual_norm_kernels) diff --git a/hip/test/stop/criterion_kernels.hip.cpp b/hip/test/stop/criterion_kernels.hip.cpp index 92935ea4867..60bff1bed99 100644 --- a/hip/test/stop/criterion_kernels.hip.cpp +++ b/hip/test/stop/criterion_kernels.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/hip/test/stop/residual_norm_kernels.cpp b/hip/test/stop/residual_norm_kernels.cpp index 42c505da601..4dfcd690c79 100644 --- a/hip/test/stop/residual_norm_kernels.cpp +++ b/hip/test/stop/residual_norm_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -45,9 +45,323 @@ namespace { constexpr double tol = 1.0e-14; +class ResidualNorm : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense<>; + using NormVector = gko::matrix::Dense>; + + ResidualNorm() + { + ref_ = gko::ReferenceExecutor::create(); + hip_ = gko::HipExecutor::create(0, ref_); + factory_ = + gko::stop::ResidualNorm<>::build().with_reduction_factor(tol).on( + hip_); + rel_factory_ = gko::stop::ResidualNorm<>::build() + .with_reduction_factor(tol) + .with_baseline(gko::stop::mode::initial_resnorm) + .on(hip_); + abs_factory_ = gko::stop::ResidualNorm<>::build() + .with_reduction_factor(tol) + .with_baseline(gko::stop::mode::absolute) + .on(hip_); + } + + std::unique_ptr::Factory> factory_; + std::unique_ptr::Factory> rel_factory_; + std::unique_ptr::Factory> abs_factory_; + std::shared_ptr hip_; + std::shared_ptr ref_; +}; + + +TEST_F(ResidualNorm, WaitsTillResidualGoalForRhsResNorm) +{ + auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(hip_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + auto rhs_norm = gko::initialize({0.0}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(hip_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(hip_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1 * rhs_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(hip_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9 * rhs_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalMultipleRHSForRhsResNorm) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(hip_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + auto rhs_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(hip_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(hip_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9 * rhs_norm->at(0, 0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(hip_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9 * rhs_norm->at(0, 1); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalForRelResNorm) +{ + auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(hip_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + std::shared_ptr d_rhs = Mtx::create(hip_); + d_rhs->copy_from(rhs.get()); + auto criterion = + rel_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(hip_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1 * res_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(hip_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9 * res_norm->at(0); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalMultipleRHSForRelResNorm) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(hip_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + std::shared_ptr d_rhs = Mtx::create(hip_); + d_rhs->copy_from(rhs.get()); + auto criterion = + rel_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(hip_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9 * res_norm->at(0, 0); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(hip_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9 * res_norm->at(0, 1); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalForAbsResNorm) +{ + auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(hip_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + std::shared_ptr d_rhs = Mtx::create(hip_); + d_rhs->copy_from(rhs.get()); + auto criterion = + abs_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(hip_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = tol * 1.1; + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(hip_); + ASSERT_FALSE(one_changed); + + res->at(0) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ResidualNorm, WaitsTillResidualGoalMultipleRHSForAbsResNorm) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); + auto d_res = Mtx::create(hip_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + std::shared_ptr d_rhs = Mtx::create(hip_); + d_rhs->copy_from(rhs.get()); + auto criterion = + abs_factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(hip_); + + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(hip_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = tol * 0.9; + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + class ResidualNormReduction : public ::testing::Test { protected: using Mtx = gko::matrix::Dense<>; + using NormVector = gko::matrix::Dense>; ResidualNormReduction() { @@ -67,6 +381,8 @@ class ResidualNormReduction : public ::testing::Test { TEST_F(ResidualNormReduction, WaitsTillResidualGoal) { auto res = gko::initialize({100.0}, ref_); + auto res_norm = gko::initialize({0.0}, this->ref_); + res->compute_norm2(res_norm.get()); auto d_res = Mtx::create(hip_); d_res->copy_from(res.get()); std::shared_ptr rhs = gko::initialize({10.0}, ref_); @@ -84,7 +400,7 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoal) .residual_norm(d_res.get()) .check(RelativeStoppingId, true, &stop_status, &one_changed)); - res->at(0) = tol * 1.1e+2; + res->at(0) = tol * 1.1 * res_norm->at(0); d_res->copy_from(res.get()); ASSERT_FALSE( criterion->update() @@ -95,7 +411,7 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoal) stop_status.set_executor(hip_); ASSERT_FALSE(one_changed); - res->at(0) = tol * 0.9e+2; + res->at(0) = tol * 0.9 * res_norm->at(0); d_res->copy_from(res.get()); ASSERT_TRUE( criterion->update() @@ -110,6 +426,8 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoal) TEST_F(ResidualNormReduction, WaitsTillResidualGoalMultipleRHS) { auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto res_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + res->compute_norm2(res_norm.get()); auto d_res = Mtx::create(hip_); d_res->copy_from(res.get()); std::shared_ptr rhs = @@ -129,7 +447,7 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoalMultipleRHS) .residual_norm(d_res.get()) .check(RelativeStoppingId, true, &stop_status, &one_changed)); - res->at(0, 0) = tol * 0.9e+2; + res->at(0, 0) = tol * 0.9 * res_norm->at(0, 0); d_res->copy_from(res.get()); ASSERT_FALSE( criterion->update() @@ -140,7 +458,7 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoalMultipleRHS) stop_status.set_executor(hip_); ASSERT_TRUE(one_changed); - res->at(0, 1) = tol * 0.9e+2; + res->at(0, 1) = tol * 0.9 * res_norm->at(0, 1); d_res->copy_from(res.get()); ASSERT_TRUE( criterion->update() @@ -155,6 +473,7 @@ TEST_F(ResidualNormReduction, WaitsTillResidualGoalMultipleRHS) class RelativeResidualNorm : public ::testing::Test { protected: using Mtx = gko::matrix::Dense<>; + using NormVector = gko::matrix::Dense>; RelativeResidualNorm() { @@ -177,6 +496,8 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoal) auto d_res = Mtx::create(hip_); d_res->copy_from(res.get()); std::shared_ptr rhs = gko::initialize({10.0}, ref_); + auto rhs_norm = gko::initialize({0.0}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); std::shared_ptr d_rhs = Mtx::create(hip_); d_rhs->copy_from(rhs.get()); auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); @@ -191,7 +512,7 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoal) .residual_norm(d_res.get()) .check(RelativeStoppingId, true, &stop_status, &one_changed)); - res->at(0) = tol * 1.1e+1; + res->at(0) = tol * 1.1 * rhs_norm->at(0); d_res->copy_from(res.get()); ASSERT_FALSE( criterion->update() @@ -202,7 +523,7 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoal) stop_status.set_executor(hip_); ASSERT_FALSE(one_changed); - res->at(0) = tol * 0.9e+1; + res->at(0) = tol * 0.9 * rhs_norm->at(0); d_res->copy_from(res.get()); ASSERT_TRUE( criterion->update() @@ -221,6 +542,8 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoalMultipleRHS) d_res->copy_from(res.get()); std::shared_ptr rhs = gko::initialize({{10.0, 10.0}}, ref_); + auto rhs_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); std::shared_ptr d_rhs = Mtx::create(hip_); d_rhs->copy_from(rhs.get()); auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); @@ -236,7 +559,7 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoalMultipleRHS) .residual_norm(d_res.get()) .check(RelativeStoppingId, true, &stop_status, &one_changed)); - res->at(0, 0) = tol * 0.9e+1; + res->at(0, 0) = tol * 0.9 * rhs_norm->at(0, 0); d_res->copy_from(res.get()); ASSERT_FALSE( criterion->update() @@ -247,7 +570,7 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoalMultipleRHS) stop_status.set_executor(hip_); ASSERT_TRUE(one_changed); - res->at(0, 1) = tol * 0.9e+1; + res->at(0, 1) = tol * 0.9 * rhs_norm->at(0, 1); d_res->copy_from(res.get()); ASSERT_TRUE( criterion->update() @@ -259,6 +582,118 @@ TEST_F(RelativeResidualNorm, WaitsTillResidualGoalMultipleRHS) } +class ImplicitResidualNorm : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense<>; + using NormVector = gko::matrix::Dense>; + + ImplicitResidualNorm() + { + ref_ = gko::ReferenceExecutor::create(); + hip_ = gko::HipExecutor::create(0, ref_); + factory_ = gko::stop::ImplicitResidualNorm<>::build() + .with_reduction_factor(tol) + .on(hip_); + } + + std::unique_ptr::Factory> factory_; + std::shared_ptr hip_; + std::shared_ptr ref_; +}; + + +TEST_F(ImplicitResidualNorm, WaitsTillResidualGoal) +{ + auto res = gko::initialize({100.0}, ref_); + auto d_res = Mtx::create(hip_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = gko::initialize({10.0}, ref_); + auto rhs_norm = gko::initialize({0.0}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(hip_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 1); + stop_status.get_data()[0].reset(); + stop_status.set_executor(hip_); + + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0) = std::pow(tol * 1.1 * rhs_norm->at(0), 2); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_FALSE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(hip_); + ASSERT_FALSE(one_changed); + + res->at(0) = std::pow(tol * 0.9 * rhs_norm->at(0), 2); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + ASSERT_TRUE(one_changed); +} + + +TEST_F(ImplicitResidualNorm, WaitsTillResidualGoalMultipleRHS) +{ + auto res = gko::initialize({{100.0, 100.0}}, ref_); + auto d_res = Mtx::create(hip_); + d_res->copy_from(res.get()); + std::shared_ptr rhs = + gko::initialize({{10.0, 10.0}}, ref_); + auto rhs_norm = gko::initialize({{0.0, 0.0}}, this->ref_); + gko::as(rhs)->compute_norm2(rhs_norm.get()); + std::shared_ptr d_rhs = Mtx::create(hip_); + d_rhs->copy_from(rhs.get()); + auto criterion = factory_->generate(nullptr, d_rhs, nullptr, d_res.get()); + bool one_changed{}; + constexpr gko::uint8 RelativeStoppingId{1}; + gko::Array stop_status(ref_, 2); + stop_status.get_data()[0].reset(); + stop_status.get_data()[1].reset(); + stop_status.set_executor(hip_); + + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + + res->at(0, 0) = std::pow(tol * 0.9 * rhs_norm->at(0, 0), 2); + d_res->copy_from(res.get()); + ASSERT_FALSE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[0].has_converged()); + stop_status.set_executor(hip_); + ASSERT_TRUE(one_changed); + + res->at(0, 1) = std::pow(tol * 0.9 * rhs_norm->at(0, 1), 2); + d_res->copy_from(res.get()); + ASSERT_TRUE( + criterion->update() + .implicit_sq_residual_norm(d_res.get()) + .check(RelativeStoppingId, true, &stop_status, &one_changed)); + stop_status.set_executor(ref_); + ASSERT_TRUE(stop_status.get_data()[1].has_converged()); + ASSERT_TRUE(one_changed); +} + + class AbsoluteResidualNorm : public ::testing::Test { protected: using Mtx = gko::matrix::Dense<>; diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp index 8d524b0b615..03d4f2ba6c7 100644 --- a/hip/test/utils.hip.hpp +++ b/hip/test/utils.hip.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,9 +43,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { +// Visual Studio does not define the constructor of std::mutex as constexpr, +// causing it to not be initialized when creating this executor (which uses +// the mutex) +#if !defined(_MSC_VER) // prevent device reset after each test auto no_reset_exec = gko::HipExecutor::create(0, gko::ReferenceExecutor::create(), true); +#endif } // namespace diff --git a/hip/test/utils/assertions_test.hip.cpp b/hip/test/utils/assertions_test.hip.cpp index 2d5c67addc1..dfe95d6e186 100644 --- a/hip/test/utils/assertions_test.hip.cpp +++ b/hip/test/utils/assertions_test.hip.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 004b7b359ad..1f103460c2a 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -2,5 +2,5 @@ if (GINKGO_CHECK_CIRCULAR_DEPS) add_library(ginkgo_public_api INTERFACE) # dummy target set_property(TARGET ginkgo_public_api APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}") set_property(TARGET ginkgo_public_api APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_BINARY_DIR}") - ginkgo_check_headers(ginkgo_public_api) + ginkgo_check_headers(ginkgo_public_api "") endif() diff --git a/include/ginkgo/config.hpp.in b/include/ginkgo/config.hpp.in index fc5bae0b225..1c6a31ea481 100644 --- a/include/ginkgo/config.hpp.in +++ b/include/ginkgo/config.hpp.in @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -59,6 +59,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #cmakedefine GINKGO_JACOBI_FULL_OPTIMIZATIONS +/* Should we compile Ginkgo specifically to tune values? */ +#cmakedefine GINKGO_BENCHMARK_ENABLE_TUNING + + +/* Should we compile mixed-precision kernels for Ginkgo? */ +#cmakedefine GINKGO_MIXED_PRECISION + + /* What is HIP compiled for, hcc or nvcc? */ // clang-format off #define GINKGO_HIP_PLATFORM_HCC @GINKGO_HIP_PLATFORM_HCC@ @@ -74,4 +82,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // clang-format on +/* Is HWLOC available ? */ +// clang-format off +#define GKO_HAVE_HWLOC @GINKGO_HAVE_HWLOC@ +// clang-format on + + #endif // GKO_INCLUDE_CONFIG_H diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp index 2db193027a1..3a6535884ce 100644 --- a/include/ginkgo/core/base/abstract_factory.hpp +++ b/include/ginkgo/core/base/abstract_factory.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_ABSTRACT_FACTORY_HPP_ -#define GKO_CORE_BASE_ABSTRACT_FACTORY_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_ABSTRACT_FACTORY_HPP_ +#define GKO_PUBLIC_CORE_BASE_ABSTRACT_FACTORY_HPP_ #include @@ -266,7 +266,8 @@ class EnableDefaultFactory * used */ template -struct enable_parameters_type { +class enable_parameters_type { +public: using factory = Factory; /** @@ -289,4 +290,4 @@ struct enable_parameters_type { } // namespace gko -#endif // GKO_CORE_BASE_ABSTRACT_FACTORY_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_ABSTRACT_FACTORY_HPP_ diff --git a/include/ginkgo/core/base/array.hpp b/include/ginkgo/core/base/array.hpp index 7f0df5c27f6..c4aa2323417 100644 --- a/include/ginkgo/core/base/array.hpp +++ b/include/ginkgo/core/base/array.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_ARRAY_HPP_ -#define GKO_CORE_BASE_ARRAY_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_ARRAY_HPP_ +#define GKO_PUBLIC_CORE_BASE_ARRAY_HPP_ #include @@ -122,7 +122,7 @@ class Array { * * @param exec the Executor where the array data is allocated */ - Array(std::shared_ptr exec) noexcept + explicit Array(std::shared_ptr exec) noexcept : num_elems_(0), data_(nullptr, default_deleter{exec}), exec_(std::move(exec)) @@ -479,6 +479,13 @@ class Array { } } + /** + * Fill the array with the given value. + * + * @param value the value to be filled + */ + void fill(const ValueType value); + /** * Returns the number of elements in the Array. * @@ -563,7 +570,65 @@ class Array { }; +namespace detail { + + +template +struct temporary_clone_helper> { + static std::unique_ptr> create( + std::shared_ptr exec, Array *ptr, bool copy_data) + { + if (copy_data) { + return std::make_unique>(std::move(exec), *ptr); + } else { + return std::make_unique>(std::move(exec), + ptr->get_num_elems()); + } + } +}; + +template +struct temporary_clone_helper> { + static std::unique_ptr> create( + std::shared_ptr exec, const Array *ptr, bool) + { + return std::make_unique>(std::move(exec), *ptr); + } +}; + + +// specialization for non-constant arrays, copying back via assignment +template +class copy_back_deleter> { +public: + using pointer = Array *; + + /** + * Creates a new deleter object. + * + * @param original the origin object where the data will be copied before + * deletion + */ + copy_back_deleter(pointer original) : original_{original} {} + + /** + * Copies back the pointed-to object to the original and deletes it. + * + * @param ptr pointer to the object to be copied back and deleted + */ + void operator()(pointer ptr) const + { + *original_ = *ptr; + delete ptr; + } + +private: + pointer original_; +}; + + +} // namespace detail } // namespace gko -#endif // GKO_CORE_BASE_ARRAY_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_ARRAY_HPP_ diff --git a/include/ginkgo/core/base/combination.hpp b/include/ginkgo/core/base/combination.hpp index 908013a3e57..d7cc9f71cde 100644 --- a/include/ginkgo/core/base/combination.hpp +++ b/include/ginkgo/core/base/combination.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_COMBINATION_HPP_ -#define GKO_CORE_BASE_COMBINATION_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_COMBINATION_HPP_ +#define GKO_PUBLIC_CORE_BASE_COMBINATION_HPP_ #include @@ -207,4 +207,4 @@ class Combination : public EnableLinOp>, } // namespace gko -#endif // GKO_CORE_BASE_COMBINATION_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_COMBINATION_HPP_ diff --git a/include/ginkgo/core/base/composition.hpp b/include/ginkgo/core/base/composition.hpp index 4a7ecc7874f..3be5ca30db3 100644 --- a/include/ginkgo/core/base/composition.hpp +++ b/include/ginkgo/core/base/composition.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_COMPOSITION_HPP_ -#define GKO_CORE_BASE_COMPOSITION_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_COMPOSITION_HPP_ +#define GKO_PUBLIC_CORE_BASE_COMPOSITION_HPP_ #include @@ -167,7 +167,64 @@ class Composition : public EnableLinOp>, }; +/** + * The UseComposition class can be used to store the composition information in + * LinOp. + * + * @tparam ValueType precision of input and result vectors + */ +template +class UseComposition { +public: + using value_type = ValueType; + /** + * Returns the composition opertor. + * + * @return composition + */ + std::shared_ptr> get_composition() const + { + return composition_; + } + + /** + * Returns the operator at index-th poistion of composition + * + * @return index-th operator + * + * @note when this composition is not set, this function always returns + * nullptr. However, when this composition is set, it will throw + * exception when exceeding index. + * + * @throw std::out_of_range if index is out of bound when composition is + * existed. + */ + std::shared_ptr get_operator_at(size_type index) const + { + if (composition_ == nullptr) { + return nullptr; + } else { + return composition_->get_operators().at(index); + } + } + +protected: + /** + * Sets the composition with a list of operators + */ + template + void set_composition(LinOp &&... linop) + { + composition_ = + Composition::create(std::forward(linop)...); + } + +private: + std::shared_ptr> composition_; +}; + + } // namespace gko -#endif // GKO_CORE_BASE_COMPOSITION_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_COMPOSITION_HPP_ diff --git a/include/ginkgo/core/base/device.hpp b/include/ginkgo/core/base/device.hpp new file mode 100644 index 00000000000..5eaf892789d --- /dev/null +++ b/include/ginkgo/core/base/device.hpp @@ -0,0 +1,117 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_BASE_DEVICE_HPP_ +#define GKO_PUBLIC_CORE_BASE_DEVICE_HPP_ + + +#include +#include +#include +#include + + +#include + + +namespace gko { + + +class CudaExecutor; + +class HipExecutor; + + +/** + * nvidia_device handles the number of executor on Nvidia devices and have the + * corresponding recursive_mutex. + */ +class nvidia_device { + friend class CudaExecutor; + friend class HipExecutor; + +private: + /** + * get_mutex gets the static mutex reference at i. + * + * @param i index of mutex + * + * @return recursive_mutex reference + */ + static std::mutex &get_mutex(int i); + + /** + * get_num_execs gets the static num_execs reference at i. + * + * @param i index of num_execs + * + * @return int reference + */ + static int &get_num_execs(int i); + + static constexpr int max_devices = 64; +}; + + +/** + * amd_device handles the number of executor on Amd devices and have the + * corresponding recursive_mutex. + */ +class amd_device { + friend class HipExecutor; + +private: + /** + * get_mutex gets the static mutex reference at i. + * + * @param i index of mutex + * + * @return recursive_mutex reference + */ + static std::mutex &get_mutex(int i); + + /** + * get_num_execs gets the static num_execs reference at i. + * + * @param i index of num_execs + * + * @return int reference + */ + static int &get_num_execs(int i); + + static constexpr int max_devices = 64; +}; + + +} // namespace gko + +#endif // GKO_PUBLIC_CORE_BASE_DEVICE_HPP_ diff --git a/include/ginkgo/core/base/dim.hpp b/include/ginkgo/core/base/dim.hpp index c0256df30dc..726b899ba60 100644 --- a/include/ginkgo/core/base/dim.hpp +++ b/include/ginkgo/core/base/dim.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,12 +30,14 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_DIM_HPP_ -#define GKO_CORE_BASE_DIM_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_DIM_HPP_ +#define GKO_PUBLIC_CORE_BASE_DIM_HPP_ + + +#include #include -#include namespace gko { @@ -52,6 +54,7 @@ namespace gko { template struct dim { static constexpr size_type dimensionality = Dimensionality; + friend class dim; using dimension_type = DimensionType; @@ -118,8 +121,11 @@ struct dim { * different than zero. * * @return true if and only if all dimensions evaluate to true + * + * @note This operator is explicit to avoid implicit dim-to-int casts. + * It will still be used in contextual conversions (if, &&, ||, !) */ - constexpr GKO_ATTRIBUTES operator bool() const + explicit constexpr GKO_ATTRIBUTES operator bool() const { return static_cast(first_) && static_cast(rest_); } @@ -150,7 +156,30 @@ struct dim { return dim(x.first_ * y.first_, x.rest_ * y.rest_); } + /** + * A stream operator overload for dim + * + * @param os stream object + * @param x dim object + * + * @return a stream object appended with the dim output + */ + friend std::ostream &operator<<(std::ostream &os, const dim &x) + { + os << "("; + x.print_to(os); + os << ")"; + return os; + } + private: + void inline print_to(std::ostream &os) const + { + os << first_ << ", "; + rest_.print_to(os); + } + + constexpr GKO_ATTRIBUTES dim(const dimension_type first, dim rest) : first_{first}, rest_{rest} @@ -165,6 +194,7 @@ struct dim { template struct dim<1u, DimensionType> { static constexpr size_type dimensionality = 1u; + friend class dim<2>; using dimension_type = DimensionType; @@ -183,7 +213,7 @@ struct dim<1u, DimensionType> { return GKO_ASSERT(dimension == 0), first_; } - constexpr GKO_ATTRIBUTES operator bool() const + explicit constexpr GKO_ATTRIBUTES operator bool() const { return static_cast(first_); } @@ -198,7 +228,17 @@ struct dim<1u, DimensionType> { return dim(x.first_ * y.first_); } + friend std::ostream &operator<<(std::ostream &os, const dim &x) + { + os << "("; + x.print_to(os); + os << ")"; + return os; + } + private: + void inline print_to(std::ostream &os) const { os << first_; } + dimension_type first_; }; @@ -243,4 +283,4 @@ constexpr GKO_ATTRIBUTES GKO_INLINE dim<2, DimensionType> transpose( } // namespace gko -#endif // GKO_CORE_BASE_DIM_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_DIM_HPP_ diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp index 78fe81a617e..7556602f13b 100644 --- a/include/ginkgo/core/base/exception.hpp +++ b/include/ginkgo/core/base/exception.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_EXCEPTION_HPP_ -#define GKO_CORE_BASE_EXCEPTION_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_EXCEPTION_HPP_ +#define GKO_PUBLIC_CORE_BASE_EXCEPTION_HPP_ #include @@ -219,6 +219,29 @@ class CublasError : public Error { }; +/** + * CurandError is thrown when a cuRAND routine throws a non-zero error code. + */ +class CurandError : public Error { +public: + /** + * Initializes a cuRAND error. + * + * @param file The name of the offending source file + * @param line The source code line number where the error occurred + * @param func The name of the cuRAND routine that failed + * @param error_code The resulting cuRAND error code + */ + CurandError(const std::string &file, int line, const std::string &func, + int64 error_code) + : Error(file, line, func + ": " + get_error(error_code)) + {} + +private: + static std::string get_error(int64 error_code); +}; + + /** * CusparseError is thrown when a cuSPARSE routine throws a non-zero error code. */ @@ -288,6 +311,29 @@ class HipblasError : public Error { }; +/** + * HiprandError is thrown when a hipRAND routine throws a non-zero error code. + */ +class HiprandError : public Error { +public: + /** + * Initializes a hipRAND error. + * + * @param file The name of the offending source file + * @param line The source code line number where the error occurred + * @param func The name of the hipRAND routine that failed + * @param error_code The resulting hipRAND error code + */ + HiprandError(const std::string &file, int line, const std::string &func, + int64 error_code) + : Error(file, line, func + ": " + get_error(error_code)) + {} + +private: + static std::string get_error(int64 error_code); +}; + + /** * HipsparseError is thrown when a hipSPARSE routine throws a non-zero error * code. @@ -375,6 +421,30 @@ class BadDimension : public Error { }; +/** + * Error that denotes issues between block sizes and matrix dimensions + * + * \tparam IndexType Type of index used by the linear algebra object that is + * incompatible with the requried block size. + */ +template +class BlockSizeError : public Error { +public: + /** + * @param file The name of the offending source file + * @param line The source code line number where the error occurred + * @param block_size Size of small dense blocks in a matrix + * @param size The size that is not exactly divided by the block size + */ + BlockSizeError(const std::string &file, const int line, + const int block_size, const IndexType size) + : Error(file, line, + "block size = " + std::to_string(block_size) + + ", size = " + std::to_string(size)) + {} +}; + + /** * ValueMismatch is thrown if two values are not equal. */ @@ -488,4 +558,4 @@ class KernelNotFound : public Error { } // namespace gko -#endif // GKO_CORE_BASE_EXCEPTION_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_EXCEPTION_HPP_ diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp index 287b59380f8..7a34a0835a0 100644 --- a/include/ginkgo/core/base/exception_helpers.hpp +++ b/include/ginkgo/core/base/exception_helpers.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_EXCEPTION_HELPERS_HPP_ -#define GKO_CORE_BASE_EXCEPTION_HELPERS_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_EXCEPTION_HELPERS_HPP_ +#define GKO_PUBLIC_CORE_BASE_EXCEPTION_HELPERS_HPP_ #include @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include namespace gko { @@ -300,6 +301,15 @@ inline dim<2> get_size(const dim<2> &size) { return size; } ::gko::CublasError(__FILE__, __LINE__, __func__, _errcode) +/** + * Instantiates a CurandError. + * + * @param errcode The error code returned from the cuRAND routine. + */ +#define GKO_CURAND_ERROR(_errcode) \ + ::gko::CurandError(__FILE__, __LINE__, __func__, _errcode) + + /** * Instantiates a CusparseError. * @@ -337,6 +347,20 @@ inline dim<2> get_size(const dim<2> &size) { return size; } } while (false) +/** + * Asserts that a cuRAND library call completed without errors. + * + * @param _curand_call a library call expression + */ +#define GKO_ASSERT_NO_CURAND_ERRORS(_curand_call) \ + do { \ + auto _errcode = _curand_call; \ + if (_errcode != CURAND_STATUS_SUCCESS) { \ + throw GKO_CURAND_ERROR(_errcode); \ + } \ + } while (false) + + /** * Asserts that a cuSPARSE library call completed without errors. * @@ -369,6 +393,15 @@ inline dim<2> get_size(const dim<2> &size) { return size; } ::gko::HipblasError(__FILE__, __LINE__, __func__, _errcode) +/** + * Instantiates a HiprandError. + * + * @param errcode The error code returned from the HIPRAND routine. + */ +#define GKO_HIPRAND_ERROR(_errcode) \ + ::gko::HiprandError(__FILE__, __LINE__, __func__, _errcode) + + /** * Instantiates a HipsparseError. * @@ -406,6 +439,20 @@ inline dim<2> get_size(const dim<2> &size) { return size; } } while (false) +/** + * Asserts that a HIPRAND library call completed without errors. + * + * @param _hiprand_call a library call expression + */ +#define GKO_ASSERT_NO_HIPRAND_ERRORS(_hiprand_call) \ + do { \ + auto _errcode = _hiprand_call; \ + if (_errcode != HIPRAND_STATUS_SUCCESS) { \ + throw GKO_HIPRAND_ERROR(_errcode); \ + } \ + } while (false) + + /** * Asserts that a HIPSPARSE library call completed without errors. * @@ -520,7 +567,27 @@ inline T ensure_allocated_impl(T ptr, const std::string &file, int line, "semi-colon warnings") +/** + * Ensures that a given size, typically of a linear algebraic object, + * is divisible by a given block size. + * + * @param _size A size of a vector or matrix + * @param _block_size Size of small dense blocks that make up + * the vector or matrix + * + * @throw BlockSizeError if _block_size does not divide _size + */ +#define GKO_ASSERT_BLOCK_SIZE_CONFORMANT(_size, _block_size) \ + if (_size % _block_size != 0) { \ + throw BlockSizeError(__FILE__, __LINE__, _block_size, \ + _size); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + } // namespace gko -#endif // GKO_CORE_BASE_EXCEPTION_HELPERS_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_EXCEPTION_HELPERS_HPP_ diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 1df29abc59c..59876d620cc 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,22 +30,86 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_EXECUTOR_HPP_ -#define GKO_CORE_BASE_EXECUTOR_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_EXECUTOR_HPP_ +#define GKO_PUBLIC_CORE_BASE_EXECUTOR_HPP_ +#include +#include #include #include #include +#include #include #include +#include +#include +#include #include #include #include +namespace gko { + + +/** + * Specify the mode of allocation for CUDA/HIP GPUs. + * + * `device` allocates memory on the device and Unified Memory model is not used. + * + * `unified_global` allocates memory on the device, but is accessible by the + * host through the Unified memory model. + * + * `unified_host` allocates memory on the + * host and it is not available on devices which do not have concurrent acesses + * switched on, but this access can be explictly switched on, when necessary. + */ +enum class allocation_mode { device, unified_global, unified_host }; + + +#ifdef NDEBUG + +// When in release, prefer device allocations +constexpr allocation_mode default_cuda_alloc_mode = allocation_mode::device; + +constexpr allocation_mode default_hip_alloc_mode = allocation_mode::device; + +#else + +// When in debug, always UM allocations. +constexpr allocation_mode default_cuda_alloc_mode = + allocation_mode::unified_global; + +#if (GINKGO_HIP_PLATFORM_HCC == 1) + +// HIP on AMD GPUs does not support UM, so always prefer device allocations. +constexpr allocation_mode default_hip_alloc_mode = allocation_mode::device; + +#else + +// HIP on NVIDIA GPUs supports UM, so prefer UM allocations. +constexpr allocation_mode default_hip_alloc_mode = + allocation_mode::unified_global; + +#endif + +#endif + + +} // namespace gko + +inline namespace cl { +namespace sycl { + +class queue; + +} // namespace sycl +} // namespace cl + + struct cublasContext; struct cusparseContext; @@ -114,6 +178,9 @@ class ExecutorBase; * void run(const gko::HipExecutor *exec) const override * { os_ << "HIP(" << exec->get_device_id() << ")"; } * + * void run(const gko::DpcppExecutor *exec) const override + * { os_ << "DPC++(" << exec->get_device_id() << ")"; } + * * // This is optional, if not overloaded, defaults to OmpExecutor overload * void run(const gko::ReferenceExecutor *) const override * { os_ << "Reference CPU"; } @@ -142,6 +209,7 @@ class ExecutorBase; * std::cout << *omp << std::endl * << *gko::CudaExecutor::create(0, omp) << std::endl * << *gko::HipExecutor::create(0, omp) << std::endl + * << *gko::DpcppExecutor::create(0, omp) << std::endl * << *gko::ReferenceExecutor::create() << std::endl; * ``` * @@ -151,15 +219,16 @@ class ExecutorBase; * OMP * CUDA(0) * HIP(0) + * DPC++(0) * Reference CPU * ``` * * One might feel that this code is too complicated for such a simple task. * Luckily, there is an overload of the Executor::run() method, which is * designed to facilitate writing simple operations like this one. The method - * takes three closures as input: one which is run for OMP, one for - * CUDA executors, and the last one for HIP executors. Using this method, there - * is no need to implement an Operation subclass: + * takes three closures as input: one which is run for OMP, one for CUDA + * executors, one for HIP executors, and the last one for DPC++ executors. Using + * this method, there is no need to implement an Operation subclass: * * ``` * std::ostream& operator<<(std::ostream &os, const gko::Executor &exec) @@ -174,6 +243,10 @@ class ExecutorBase; * << static_cast(exec) * .get_device_id() * << ")"; }); + * [&]() { os << "DPC++(" // DPC++ closure + * << static_cast(exec) + * .get_device_id() + * << ")"; }); * return os; * } * ``` @@ -250,7 +323,7 @@ private: \ * kernel when the operation is executed. * * The kernels used to bind the operation are searched in `kernels::DEV_TYPE` - * namespace, where `DEV_TYPE` is replaced by `omp`, `cuda`, `hip` and + * namespace, where `DEV_TYPE` is replaced by `omp`, `cuda`, `hip`, `dpcpp` and * `reference`. * * @param _name operation name @@ -278,6 +351,11 @@ private: \ * // hip code * } * } + * namespace dpcpp { + * void my_kernel(int x) { + * // dpcpp code + * } + * } * namespace reference { * void my_kernel(int x) { * // reference code @@ -290,8 +368,9 @@ private: \ * int main() { * // create executors * auto omp = OmpExecutor::create(); - * auto cuda = CudaExecutor::create(omp, 0); - * auto hip = HipExecutor::create(omp, 0); + * auto cuda = CudaExecutor::create(0, omp); + * auto hip = HipExecutor::create(0, omp); + * auto dpcpp = DpcppExecutor::create(0, omp); * auto ref = ReferenceExecutor::create(); * * // create the operation @@ -300,53 +379,57 @@ private: \ * omp->run(op); // run omp kernel * cuda->run(op); // run cuda kernel * hip->run(op); // run hip kernel + * dpcpp->run(op); // run DPC++ kernel * ref->run(op); // run reference kernel * } * ``` * * @ingroup Executor */ -#define GKO_REGISTER_OPERATION(_name, _kernel) \ - template \ - class _name##_operation : public Operation { \ - using counts = \ - ::gko::syn::as_list<::gko::syn::range<0, sizeof...(Args)>>; \ - \ - public: \ - explicit _name##_operation(Args &&... args) \ - : data(std::forward(args)...) \ - {} \ - \ - const char *get_name() const noexcept override \ - { \ - static auto name = [this] { \ - std::ostringstream oss; \ - oss << #_kernel << '#' << sizeof...(Args); \ - return oss.str(); \ - }(); \ - return name.c_str(); \ - } \ - \ - GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(OmpExecutor, omp, _kernel); \ - GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(CudaExecutor, cuda, _kernel); \ - GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(HipExecutor, hip, _kernel); \ - GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(ReferenceExecutor, reference, \ - _kernel); \ - \ - private: \ - mutable std::tuple data; \ - }; \ - \ - template \ - static _name##_operation make_##_name(Args &&... args) \ - { \ - return _name##_operation(std::forward(args)...); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ +#define GKO_REGISTER_OPERATION(_name, _kernel) \ + template \ + class _name##_operation : public Operation { \ + using counts = \ + ::gko::syn::as_list<::gko::syn::range<0, sizeof...(Args)>>; \ + \ + public: \ + explicit _name##_operation(Args &&... args) \ + : data(std::forward(args)...) \ + {} \ + \ + const char *get_name() const noexcept override \ + { \ + static auto name = [this] { \ + std::ostringstream oss; \ + oss << #_kernel << '#' << sizeof...(Args); \ + return oss.str(); \ + }(); \ + return name.c_str(); \ + } \ + \ + GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(OmpExecutor, omp, _kernel); \ + GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(CudaExecutor, cuda, _kernel); \ + GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(HipExecutor, hip, _kernel); \ + GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(DpcppExecutor, dpcpp, _kernel); \ + GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(ReferenceExecutor, reference, \ + _kernel); \ + \ + private: \ + mutable std::tuple data; \ + }; \ + \ + template \ + static _name##_operation make_##_name(Args &&... args) \ + { \ + return _name##_operation(std::forward(args)...); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ "semi-colon warnings") +#define GKO_DECLARE_EXECUTOR_FRIEND(_type, ...) friend class _type + /** * The first step in using the Ginkgo library consists of creating an * executor. Executors are used to specify the location for the data of linear @@ -359,6 +442,8 @@ private: \ * operations executed on the NVIDIA GPU accelerator; * + HipExecutor specifies that the data should be stored and the * operations executed on either an NVIDIA or AMD GPU accelerator; + * + DpcppExecutor specifies that the data should be stored and the + * operations executed on an hardware supporting DPC++; * + ReferenceExecutor executes a non-optimized reference implementation, * which can be used to debug the library. * @@ -436,6 +521,9 @@ class Executor : public log::EnableLogging { template friend class detail::ExecutorBase; + GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_DECLARE_EXECUTOR_FRIEND); + friend class ReferenceExecutor; + public: virtual ~Executor() = default; @@ -464,12 +552,13 @@ class Executor : public log::EnableLogging { * @param op_cuda functor to run in case of a CudaExecutor * @param op_hip functor to run in case of a HipExecutor */ - template + template void run(const ClosureOmp &op_omp, const ClosureCuda &op_cuda, - const ClosureHip &op_hip) const + const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const { - LambdaOperation op(op_omp, op_cuda, - op_hip); + LambdaOperation op( + op_omp, op_cuda, op_hip, op_dpcpp); this->run(op); } @@ -530,7 +619,25 @@ class Executor : public log::EnableLogging { this->template log( src_exec, this, reinterpret_cast(src_ptr), reinterpret_cast(dest_ptr), num_elems * sizeof(T)); - this->raw_copy_from(src_exec, num_elems * sizeof(T), src_ptr, dest_ptr); + try { + this->raw_copy_from(src_exec, num_elems * sizeof(T), src_ptr, + dest_ptr); + } catch (NotSupported &) { +#if (GKO_VERBOSE_LEVEL >= 1) && !defined(NDEBUG) + // Unoptimized copy. Try to go through the masters. + // output to log when verbose >= 1 and debug build + std::clog << "Not direct copy. Try to copy data from the masters." + << std::endl; +#endif + auto src_master = src_exec->get_master().get(); + if (num_elems > 0 && src_master != src_exec) { + auto *master_ptr = src_exec->get_master()->alloc(num_elems); + src_master->copy_from(src_exec, num_elems, src_ptr, + master_ptr); + this->copy_from(src_master, num_elems, master_ptr, dest_ptr); + src_master->free(master_ptr); + } + } this->template log( src_exec, this, reinterpret_cast(src_ptr), reinterpret_cast(dest_ptr), num_elems * sizeof(T)); @@ -586,7 +693,140 @@ class Executor : public log::EnableLogging { */ virtual void synchronize() const = 0; + /** + * Verifies whether the executors share the same memory. + * + * @param other the other Executor to compare against + * + * @return whether the executors this and other share the same memory. + */ + bool memory_accessible(const std::shared_ptr &other) const + { + return this->verify_memory_from(other.get()); + } + protected: + /** + * A struct that abstracts the executor info for different executors + * classes. + */ + struct exec_info { + /** + * The id of the device. + */ + int device_id = -1; + + /** + * The type of the device, relevant only for the dpcpp executor. + */ + std::string device_type; + + /** + * The numa node of the executor. + */ + int numa_node = -1; + + /** + * The number of computing units in the executor. + * + * @note In CPU executors this is equivalent to the number of cores. + * In CUDA and HIP executors this is the number of Streaming + * Multiprocessors. In DPCPP, this is the number of computing + * units. + */ + int num_computing_units = -1; + + /** + * The number of processing units per computing unit in the executor. + * + * @note In CPU executors this is equivalent to the number of SIMD units + * per core. + * In CUDA and HIP executors this is the number of warps + * per SM. + * In DPCPP, this is currently undefined. + */ + int num_pu_per_cu = -1; + + /** + * The sizes of the subgroup for the executor. + * + * @note In CPU executors this is invalid. + * In CUDA and HIP executors this is invalid. + * In DPCPP, this is the subgroup sizes for the device associated + * with the dpcpp executor. + */ + std::vector subgroup_sizes{}; + + /** + * The maximum subgroup size for the executor. + * + * @note In CPU executors this is invalid. + * In CUDA and HIP executors this is the warp size. + * In DPCPP, this is the maximum subgroup size for the device + * associated with the dpcpp executor. + */ + int max_subgroup_size = -1; + + /** + * The sizes of the work items for the executor. + * + * @note In CPU executors this is invalid. + * In CUDA and HIP executors this is the maximum number of threads + * in each dimension of a block (x, y, z). + * In DPCPP, this is the maximum number of workitems, in each + * direction of the workgroup for the device associated with the + * dpcpp executor. + */ + std::vector max_workitem_sizes{}; + + /** + * The sizes of the work items for the executor. + * + * @note In CPU executors this is invalid. + * In CUDA and HIP executors this is the maximum number of threads + * in block. + * In DPCPP, this is the maximum number of workitems that are + * permitted in a workgroup. + */ + int max_workgroup_size; + + /** + * The major version for CUDA/HIP device. + */ + int major = -1; + + /** + * The minor version for CUDA/HIP device. + */ + int minor = -1; + + /** + * The PCI bus id of the device. + * + * @note Only relevant for I/O devices (GPUs). + */ + std::string pci_bus_id = std::string(13, 'x'); + + /** + * The host processing units closest to the device. + * + * @note Currently only relevant for CUDA, HIP and DPCPP executors. + * [Definition from hwloc + * documentation:](https://www.open-mpi.org/projects/hwloc/doc/v2.4.0/a00350.php) + * "The smallest processing element that can be represented by a + * hwloc object. It may be a single-core processor, a core of a + * multicore processor, or a single thread in a SMT processor" + */ + std::vector closest_pu_ids{}; + }; + + /** + * Gets the exec info struct + * + * @return the exec_info struct + */ + const exec_info &get_exec_info() const { return this->exec_info_; } + /** * Allocates raw memory in this Executor. * @@ -637,6 +877,50 @@ class Executor : public log::EnableLogging { #undef GKO_ENABLE_RAW_COPY_TO + /** + * Verify the memory from another Executor. + * + * @param src_exec Executor from which to verify the memory. + * + * @return whether this executor and src_exec share the same memory. + */ + virtual bool verify_memory_from(const Executor *src_exec) const = 0; + +/** + * @internal + * Declares a verify_memory_to() overload for a specified Executor subclass. + * + * This is the second stage of the double dispatch emulation required to + * implement verify_memory_from(). + * + * @param _exec_type the Executor subclass + */ +#define GKO_ENABLE_VERIFY_MEMORY_TO(_exec_type, ...) \ + virtual bool verify_memory_to(const _exec_type *dest_exec) const = 0 + + GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_ENABLE_VERIFY_MEMORY_TO); + + GKO_ENABLE_VERIFY_MEMORY_TO(ReferenceExecutor, ref); + +#undef GKO_ENABLE_VERIFY_MEMORY_TO + + /** + * Populates the executor specific info from the global machine topology + * object. + * + * @param mach_topo the machine topology object. + */ + virtual void populate_exec_info(const MachineTopology *mach_topo) = 0; + + /** + * Gets the modifiable exec info object + * + * @return the pointer to the exec_info object + */ + exec_info &get_exec_info() { return this->exec_info_; } + + exec_info exec_info_; + private: /** * The LambdaOperation class wraps three functor objects into an @@ -650,8 +934,10 @@ class Executor : public log::EnableLogging { * @tparam ClosureOmp the type of the first functor * @tparam ClosureCuda the type of the second functor * @tparam ClosureHip the type of the third functor + * @tparam ClosureDpcpp the type of the fourth functor */ - template + template class LambdaOperation : public Operation { public: /** @@ -661,10 +947,15 @@ class Executor : public log::EnableLogging { * and ReferenceExecutor * @param op_cuda a functor object which will be called by CudaExecutor * @param op_hip a functor object which will be called by HipExecutor + * @param op_dpcpp a functor object which will be called by + * DpcppExecutor */ LambdaOperation(const ClosureOmp &op_omp, const ClosureCuda &op_cuda, - const ClosureHip &op_hip) - : op_omp_(op_omp), op_cuda_(op_cuda), op_hip_(op_hip) + const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) + : op_omp_(op_omp), + op_cuda_(op_cuda), + op_hip_(op_hip), + op_dpcpp_(op_dpcpp) {} void run(std::shared_ptr) const override @@ -682,10 +973,16 @@ class Executor : public log::EnableLogging { op_hip_(); } + void run(std::shared_ptr) const override + { + op_dpcpp_(); + } + private: ClosureOmp op_omp_; ClosureCuda op_cuda_; ClosureHip op_hip_; + ClosureDpcpp op_dpcpp_; }; }; @@ -755,6 +1052,9 @@ namespace detail { template class ExecutorBase : public Executor { + GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_DECLARE_EXECUTOR_FRIEND); + friend class ReferenceExecutor; + public: void run(const Operation &op) const override { @@ -770,6 +1070,11 @@ class ExecutorBase : public Executor { src_exec->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr); } + virtual bool verify_memory_from(const Executor *src_exec) const override + { + return src_exec->verify_memory_to(self()); + } + private: ConcreteExecutor *self() noexcept { @@ -782,6 +1087,8 @@ class ExecutorBase : public Executor { } }; +#undef GKO_DECLARE_EXECUTOR_FRIEND + /** * Controls whether the DeviceReset function should be called thanks to a @@ -828,6 +1135,16 @@ class EnableDeviceReset { const void *src_ptr, void *dest_ptr) const override +#define GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(dest_, bool_) \ + virtual bool verify_memory_to(const dest_ *other) const override \ + { \ + return bool_; \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + /** * This is the Executor subclass which represents the OpenMP device * (typically CPU). @@ -854,14 +1171,39 @@ class OmpExecutor : public detail::ExecutorBase, void synchronize() const override; + int get_num_cores() const + { + return this->get_exec_info().num_computing_units; + } + + int get_num_threads_per_core() const + { + return this->get_exec_info().num_pu_per_cu; + } + protected: - OmpExecutor() = default; + OmpExecutor() + { + this->OmpExecutor::populate_exec_info(MachineTopology::get_instance()); + } + + void populate_exec_info(const MachineTopology *mach_topo) override; void *raw_alloc(size_type size) const override; void raw_free(void *ptr) const noexcept override; GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(OmpExecutor, true); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(HipExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false); + + bool verify_memory_to(const DpcppExecutor *dest_exec) const override; }; @@ -895,7 +1237,33 @@ class ReferenceExecutor : public OmpExecutor { } protected: - ReferenceExecutor() = default; + ReferenceExecutor() + { + this->ReferenceExecutor::populate_exec_info( + MachineTopology::get_instance()); + } + + void populate_exec_info(const MachineTopology *) override + { + this->get_exec_info().device_id = -1; + this->get_exec_info().num_computing_units = 1; + this->get_exec_info().num_pu_per_cu = 1; + } + + bool verify_memory_from(const Executor *src_exec) const override + { + return src_exec->verify_memory_to(this); + } + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, true); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(OmpExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(HipExecutor, false); }; @@ -924,12 +1292,15 @@ class CudaExecutor : public detail::ExecutorBase, * @param device_id the CUDA device id of this device * @param master an executor on the host that is used to invoke the device * kernels + * @param device_reset whether to reset the device after the object exits + * the scope. + * @param alloc_mode the allocation mode that the executor should operate + * on. See @allocation_mode for more details */ static std::shared_ptr create( int device_id, std::shared_ptr master, - bool device_reset = false); - - ~CudaExecutor() { decrease_num_execs(this->device_id_); } + bool device_reset = false, + allocation_mode alloc_mode = default_cuda_alloc_mode); std::shared_ptr get_master() noexcept override; @@ -942,7 +1313,10 @@ class CudaExecutor : public detail::ExecutorBase, /** * Get the CUDA device id of the device associated to this executor. */ - int get_device_id() const noexcept { return device_id_; } + int get_device_id() const noexcept + { + return this->get_exec_info().device_id; + } /** * Get the number of devices present on the system. @@ -952,35 +1326,51 @@ class CudaExecutor : public detail::ExecutorBase, /** * Get the number of warps per SM of this executor. */ - int get_num_warps_per_sm() const noexcept { return num_warps_per_sm_; } + int get_num_warps_per_sm() const noexcept + { + return this->get_exec_info().num_pu_per_cu; + } /** * Get the number of multiprocessor of this executor. */ - int get_num_multiprocessor() const noexcept { return num_multiprocessor_; } + int get_num_multiprocessor() const noexcept + { + return this->get_exec_info().num_computing_units; + } /** * Get the number of warps of this executor. */ int get_num_warps() const noexcept { - return num_multiprocessor_ * num_warps_per_sm_; + return this->get_exec_info().num_computing_units * + this->get_exec_info().num_pu_per_cu; } /** * Get the warp size of this executor. */ - int get_warp_size() const noexcept { return warp_size_; } + int get_warp_size() const noexcept + { + return this->get_exec_info().max_subgroup_size; + } /** * Get the major verion of compute capability. */ - int get_major_version() const noexcept { return major_; } + int get_major_version() const noexcept + { + return this->get_exec_info().major; + } /** * Get the minor verion of compute capability. */ - int get_minor_version() const noexcept { return minor_; } + int get_minor_version() const noexcept + { + return this->get_exec_info().minor; + } /** * Get the cublas handle for this executor @@ -999,26 +1389,50 @@ class CudaExecutor : public detail::ExecutorBase, return cusparse_handle_.get(); } + /** + * Get the closest PUs + * + * @return the array of PUs closest to this device + */ + std::vector get_closest_pus() const + { + return this->get_exec_info().closest_pu_ids; + } + + /** + * Get the closest NUMA node + * + * @return the closest NUMA node closest to this device + */ + int get_closest_numa() const { return this->get_exec_info().numa_node; } + protected: void set_gpu_property(); void init_handles(); CudaExecutor(int device_id, std::shared_ptr master, - bool device_reset = false) + bool device_reset = false, + allocation_mode alloc_mode = default_cuda_alloc_mode) : EnableDeviceReset{device_reset}, - device_id_(device_id), - master_(master), - num_warps_per_sm_(0), - num_multiprocessor_(0), - major_(0), - minor_(0), - warp_size_(0) + alloc_mode_{alloc_mode}, + master_(master) { - assert(device_id < max_devices && device_id >= 0); + this->get_exec_info().device_id = device_id; + this->get_exec_info().num_computing_units = 0; + this->get_exec_info().num_pu_per_cu = 0; + this->CudaExecutor::populate_exec_info(MachineTopology::get_instance()); + if (this->get_exec_info().closest_pu_ids.size()) { + MachineTopology::get_instance()->bind_to_pus( + this->get_closest_pus()); + } + // it only gets attribute from device, so it should not be affected by + // DeviceReset. this->set_gpu_property(); + // increase the number of executor before any operations may be affected + // by DeviceReset. + increase_num_execs(this->get_exec_info().device_id); this->init_handles(); - increase_num_execs(device_id); } void *raw_alloc(size_type size) const override; @@ -1027,41 +1441,33 @@ class CudaExecutor : public detail::ExecutorBase, GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); - static void increase_num_execs(unsigned device_id) - { - std::lock_guard guard(mutex[device_id]); - num_execs[device_id]++; - } + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(OmpExecutor, false); - static void decrease_num_execs(unsigned device_id) - { - std::lock_guard guard(mutex[device_id]); - num_execs[device_id]--; - } + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); - static unsigned get_num_execs(unsigned device_id) - { - std::lock_guard guard(mutex[device_id]); - return num_execs[device_id]; - } + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); + + bool verify_memory_to(const HipExecutor *dest_exec) const override; + + bool verify_memory_to(const CudaExecutor *dest_exec) const override; + + static void increase_num_execs(unsigned device_id); + + static void decrease_num_execs(unsigned device_id); + + static unsigned get_num_execs(unsigned device_id); + + void populate_exec_info(const MachineTopology *mach_topo) override; private: - int device_id_; std::shared_ptr master_; - int num_warps_per_sm_; - int num_multiprocessor_; - int major_; - int minor_; - int warp_size_; template using handle_manager = std::unique_ptr>; handle_manager cublas_handle_; handle_manager cusparse_handle_; - static constexpr int max_devices = 64; - static unsigned num_execs[max_devices]; - static std::mutex mutex[max_devices]; + allocation_mode alloc_mode_; }; @@ -1090,12 +1496,15 @@ class HipExecutor : public detail::ExecutorBase, * @param device_id the HIP device id of this device * @param master an executor on the host that is used to invoke the device * kernels + * @param device_reset whether to reset the device after the object exits + * the scope. + * @param alloc_mode the allocation mode that the executor should operate + * on. See @allocation_mode for more details */ - static std::shared_ptr create(int device_id, - std::shared_ptr master, - bool device_reset = false); - - ~HipExecutor() { decrease_num_execs(this->device_id_); } + static std::shared_ptr create( + int device_id, std::shared_ptr master, + bool device_reset = false, + allocation_mode alloc_mode = default_hip_alloc_mode); std::shared_ptr get_master() noexcept override; @@ -1108,7 +1517,10 @@ class HipExecutor : public detail::ExecutorBase, /** * Get the HIP device id of the device associated to this executor. */ - int get_device_id() const noexcept { return device_id_; } + int get_device_id() const noexcept + { + return this->get_exec_info().device_id; + } /** * Get the number of devices present on the system. @@ -1118,35 +1530,51 @@ class HipExecutor : public detail::ExecutorBase, /** * Get the number of warps per SM of this executor. */ - int get_num_warps_per_sm() const noexcept { return num_warps_per_sm_; } + int get_num_warps_per_sm() const noexcept + { + return this->get_exec_info().num_pu_per_cu; + } /** * Get the number of multiprocessor of this executor. */ - int get_num_multiprocessor() const noexcept { return num_multiprocessor_; } + int get_num_multiprocessor() const noexcept + { + return this->get_exec_info().num_computing_units; + } /** * Get the major verion of compute capability. */ - int get_major_version() const noexcept { return major_; } + int get_major_version() const noexcept + { + return this->get_exec_info().major; + } /** * Get the minor verion of compute capability. */ - int get_minor_version() const noexcept { return minor_; } + int get_minor_version() const noexcept + { + return this->get_exec_info().minor; + } /** * Get the number of warps of this executor. */ int get_num_warps() const noexcept { - return num_multiprocessor_ * num_warps_per_sm_; + return this->get_exec_info().num_computing_units * + this->get_exec_info().num_pu_per_cu; } /** * Get the warp size of this executor. */ - int get_warp_size() const noexcept { return warp_size_; } + int get_warp_size() const noexcept + { + return this->get_exec_info().max_subgroup_size; + } /** * Get the hipblas handle for this executor @@ -1165,26 +1593,50 @@ class HipExecutor : public detail::ExecutorBase, return hipsparse_handle_.get(); } + /** + * Get the closest NUMA node + * + * @return the closest NUMA node closest to this device + */ + int get_closest_numa() const { return this->get_exec_info().numa_node; } + + /** + * Get the closest PUs + * + * @return the array of PUs closest to this device + */ + std::vector get_closest_pus() const + { + return this->get_exec_info().closest_pu_ids; + } + protected: void set_gpu_property(); void init_handles(); HipExecutor(int device_id, std::shared_ptr master, - bool device_reset = false) + bool device_reset = false, + allocation_mode alloc_mode = default_hip_alloc_mode) : EnableDeviceReset{device_reset}, - device_id_(device_id), - master_(master), - num_multiprocessor_(0), - num_warps_per_sm_(0), - major_(0), - minor_(0), - warp_size_(0) + alloc_mode_(alloc_mode), + master_(master) { - assert(device_id < max_devices); + this->get_exec_info().device_id = device_id; + this->get_exec_info().num_computing_units = 0; + this->get_exec_info().num_pu_per_cu = 0; + this->HipExecutor::populate_exec_info(MachineTopology::get_instance()); + if (this->get_exec_info().closest_pu_ids.size()) { + MachineTopology::get_instance()->bind_to_pus( + this->get_closest_pus()); + } + // it only gets attribute from device, so it should not be affected by + // DeviceReset. this->set_gpu_property(); + // increase the number of executor before any operations may be affected + // by DeviceReset. + increase_num_execs(this->get_exec_info().device_id); this->init_handles(); - increase_num_execs(device_id); } void *raw_alloc(size_type size) const override; @@ -1193,41 +1645,33 @@ class HipExecutor : public detail::ExecutorBase, GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); - static void increase_num_execs(int device_id) - { - std::lock_guard guard(mutex[device_id]); - num_execs[device_id]++; - } + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(OmpExecutor, false); - static void decrease_num_execs(int device_id) - { - std::lock_guard guard(mutex[device_id]); - num_execs[device_id]--; - } + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); - static int get_num_execs(int device_id) - { - std::lock_guard guard(mutex[device_id]); - return num_execs[device_id]; - } + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); + + bool verify_memory_to(const CudaExecutor *dest_exec) const override; + + bool verify_memory_to(const HipExecutor *dest_exec) const override; + + static void increase_num_execs(int device_id); + + static void decrease_num_execs(int device_id); + + static int get_num_execs(int device_id); + + void populate_exec_info(const MachineTopology *mach_topo) override; private: - int device_id_; std::shared_ptr master_; - int num_multiprocessor_; - int num_warps_per_sm_; - int major_; - int minor_; - int warp_size_; template using handle_manager = std::unique_ptr>; handle_manager hipblas_handle_; handle_manager hipsparse_handle_; - static constexpr int max_devices = 64; - static int num_execs[max_devices]; - static std::mutex mutex[max_devices]; + allocation_mode alloc_mode_; }; @@ -1238,10 +1682,171 @@ using DefaultExecutor = HipExecutor; } // namespace kernels +/** + * This is the Executor subclass which represents a DPC++ enhanced device. + * + * @ingroup exec_dpcpp + * @ingroup Executor + */ +class DpcppExecutor : public detail::ExecutorBase, + public std::enable_shared_from_this { + friend class detail::ExecutorBase; + +public: + /** + * Creates a new DpcppExecutor. + * + * @param device_id the DPCPP device id of this device + * @param master an executor on the host that is used to invoke the device + * kernels + * @param device_type a string representing the type of device to consider + * (accelerator, cpu, gpu or all). + */ + static std::shared_ptr create( + int device_id, std::shared_ptr master, + std::string device_type = "all"); + + std::shared_ptr get_master() noexcept override; + + std::shared_ptr get_master() const noexcept override; + + void synchronize() const override; + + void run(const Operation &op) const override; + + /** + * Get the DPCPP device id of the device associated to this executor. + * + * @return the DPCPP device id of the device associated to this executor + */ + int get_device_id() const noexcept + { + return this->get_exec_info().device_id; + } + + ::cl::sycl::queue *get_queue() const { return queue_.get(); } + + /** + * Get the number of devices present on the system. + * + * @param device_type a string representing the device type + * + * @return the number of devices present on the system + */ + static int get_num_devices(std::string device_type); + + /** + * Get the available subgroup sizes for this device. + * + * @return the available subgroup sizes for this device + */ + const std::vector &get_subgroup_sizes() const noexcept + { + return this->get_exec_info().subgroup_sizes; + } + + /** + * Get the number of Computing Units of this executor. + * + * @return the number of Computing Units of this executor + */ + int get_num_computing_units() const noexcept + { + return this->get_exec_info().num_computing_units; + } + + /** + * Get the maximum work item sizes. + * + * @return the maximum work item sizes + */ + const std::vector &get_max_workitem_sizes() const noexcept + { + return this->get_exec_info().max_workitem_sizes; + } + + /** + * Get the maximum workgroup size. + * + * @return the maximum workgroup size + */ + int get_max_workgroup_size() const noexcept + { + return this->get_exec_info().max_workgroup_size; + } + + /** + * Get the maximum subgroup size. + * + * @return the maximum subgroup size + */ + int get_max_subgroup_size() const noexcept + { + return this->get_exec_info().max_subgroup_size; + } + + /** + * Get a string representing the device type. + * + * @return a string representing the device type + */ + std::string get_device_type() const noexcept + { + return this->get_exec_info().device_type; + } + +protected: + void set_device_property(); + + DpcppExecutor(int device_id, std::shared_ptr master, + std::string device_type = "all") + : master_(master) + { + std::for_each(device_type.begin(), device_type.end(), + [](char &c) { c = std::tolower(c); }); + this->get_exec_info().device_type = std::string(device_type); + this->get_exec_info().device_id = device_id; + this->set_device_property(); + } + + void populate_exec_info(const MachineTopology *mach_topo) override; + + void *raw_alloc(size_type size) const override; + + void raw_free(void *ptr) const noexcept override; + + GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(HipExecutor, false); + + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); + + bool verify_memory_to(const OmpExecutor *dest_exec) const override; + + bool verify_memory_to(const DpcppExecutor *dest_exec) const override; + +private: + std::shared_ptr master_; + + template + using queue_manager = std::unique_ptr>; + queue_manager<::cl::sycl::queue> queue_; +}; + + +namespace kernels { +namespace dpcpp { +using DefaultExecutor = DpcppExecutor; +} // namespace dpcpp +} // namespace kernels + + #undef GKO_OVERRIDE_RAW_COPY_TO } // namespace gko -#endif // GKO_CORE_BASE_EXECUTOR_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_EXECUTOR_HPP_ diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp index dcf5a1a88cf..50153c78ecb 100644 --- a/include/ginkgo/core/base/lin_op.hpp +++ b/include/ginkgo/core/base/lin_op.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_LIN_OP_HPP_ -#define GKO_CORE_BASE_LIN_OP_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_LIN_OP_HPP_ +#define GKO_PUBLIC_CORE_BASE_LIN_OP_HPP_ #include @@ -42,6 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include +#include #include #include #include @@ -438,18 +440,19 @@ class Transposable { * Linear operators which support permutation should implement the * Permutable interface. * - * It provides four functionalities, the row permute, the - * column permute, the inverse row permute and the inverse column permute. + * It provides functions to permute the rows and columns of a LinOp, + * independently or symmetrically, and with a regular or inverted permutation. * - * The row permute returns the permutation of the linear operator after - * permuting the rows of the linear operator. For example, if for a matrix A, - * the permuted matrix A' and the permutation array perm, the row i of the - * matrix A is the row perm[i] in the matrix A'. And similarly, for the inverse - * permutation, the row i in the matrix A' is the row perm[i] in the matrix A. - * - * The column permute returns the permutation of the linear operator after - * permuting the columns of the linear operator. The definitions of permute and - * inverse permute for the row_permute hold here as well. + * After a regular row permutation with permutation array `perm` the row `i` in + * the output LinOp contains the row `perm[i]` from the input LinOp. + * After an inverse row permutation, the row `perm[i]` in the output LinOp + * contains the row `i` from the input LinOp. + * Equivalently, after a column permutation, the output stores in column `i` + * the column `perm[i]` from the input, and an inverse column permutation + * stores in column `perm[i]` the column `i` from the input. + * A symmetric permutation is functionally equivalent to calling + * `as(A->row_permute(perm))->column_permute(perm)`, but the + * implementation can provide better performance due to kernel fusion. * * Example: Permuting a Csr matrix: * ------------------------------------ @@ -467,12 +470,49 @@ class Permutable { public: virtual ~Permutable() = default; + /** + * Returns a LinOp representing the symmetric row and column permutation of + * the Permutable object. + * In the resulting LinOp, the entry at location `(i,j)` contains the input + * value `(perm[i],perm[j])`. + * + * @param permutation_indices the array of indices containing the + * permutation order. + * + * @return a pointer to the new permuted object + */ + virtual std::unique_ptr permute( + const Array *permutation_indices) const + { + return as(this->row_permute(permutation_indices)) + ->column_permute(permutation_indices); + }; + + /** + * Returns a LinOp representing the symmetric inverse row and column + * permutation of the Permutable object. + * In the resulting LinOp, the entry at location `(perm[i],perm[j])` + * contains the input value `(i,j)`. + * + * @param permutation_indices the array of indices containing the + * permutation order. + * + * @return a pointer to the new permuted object + */ + virtual std::unique_ptr inverse_permute( + const Array *permutation_indices) const + { + return as(this->inverse_row_permute(permutation_indices)) + ->inverse_column_permute(permutation_indices); + }; + /** * Returns a LinOp representing the row permutation of the Permutable * object. + * In the resulting LinOp, the row `i` contains the input row `perm[i]`. * - * @param permutation_indices the array of indices contaning the - * permutation order. + * @param permutation_indices the array of indices containing the + * permutation order. * * @return a pointer to the new permuted object */ @@ -482,9 +522,11 @@ class Permutable { /** * Returns a LinOp representing the column permutation of the Permutable * object. + * In the resulting LinOp, the column `i` contains the input column + * `perm[i]`. * - * @param permutation_indices the array of indices contaning the - * permutation order. + * @param permutation_indices the array of indices containing the + * permutation order `perm`. * * @return a pointer to the new column permuted object */ @@ -494,26 +536,29 @@ class Permutable { /** * Returns a LinOp representing the row permutation of the inverse permuted * object. + * In the resulting LinOp, the row `perm[i]` contains the input row `i`. * - * @param inverse_permutation_indices the array of indices contaning the - * inverse permutation order. + * @param permutation_indices the array of indices containing the + * permutation order `perm`. * * @return a pointer to the new inverse permuted object */ virtual std::unique_ptr inverse_row_permute( - const Array *inverse_permutation_indices) const = 0; + const Array *permutation_indices) const = 0; /** * Returns a LinOp representing the row permutation of the inverse permuted * object. + * In the resulting LinOp, the column `perm[i]` contains the input column + * `i`. * - * @param inverse_permutation_indices the array of indices contaning the - * inverse permutation order. + * @param permutation_indices the array of indices containing the + * permutation order `perm`. * * @return a pointer to the new inverse permuted object */ virtual std::unique_ptr inverse_column_permute( - const Array *inverse_permutation_indices) const = 0; + const Array *permutation_indices) const = 0; }; @@ -537,6 +582,16 @@ class ReadableFromMatrixData { * @param data the matrix_data structure */ virtual void read(const matrix_data &data) = 0; + + /** + * Reads a matrix from a matrix_assembly_data structure. + * + * @param data the matrix_assembly_data structure + */ + void read(const matrix_assembly_data &data) + { + this->read(data.get_ordered_data()); + } }; @@ -599,6 +654,29 @@ class Preconditionable { }; +/** + * The diagonal of a LinOp can be extracted. It will be implemented by + * DiagonalExtractable, so the class does not need to implement it. + * extract_diagonal_linop returns a linop which extracts the elements whose col + * and row index are the same and stores the result in a min(nrows, ncols) x 1 + * dense matrix. + * + * @ingroup diagonal + * @ingroup LinOp + */ +class DiagonalLinOpExtractable { +public: + virtual ~DiagonalLinOpExtractable() = default; + + /** + * Extracts the diagonal entries of the matrix into a vector. + * + * @return linop the linop of diagonal format + */ + virtual std::unique_ptr extract_diagonal_linop() const = 0; +}; + + /** * The diagonal of a LinOp implementing this interface can be extracted. * extract_diagonal extracts the elements whose col and row index are the @@ -607,12 +685,14 @@ class Preconditionable { * @ingroup LinOp */ template -class DiagonalExtractable { +class DiagonalExtractable : public DiagonalLinOpExtractable { public: using value_type = ValueType; virtual ~DiagonalExtractable() = default; + std::unique_ptr extract_diagonal_linop() const override; + /** * Extracts the diagonal entries of the matrix into a vector. * @@ -622,6 +702,60 @@ class DiagonalExtractable { const = 0; }; + +/** + * The AbsoluteComputable is an interface that allows to get the component wise + * absolute of a LinOp. Use EnableAbsoluteComputation to + * implement this interface. + */ +class AbsoluteComputable { +public: + /** + * Gets the absolute LinOp + * + * @return a pointer to the new absolute LinOp + */ + virtual std::unique_ptr compute_absolute_linop() const = 0; + + /** + * Compute absolute inplace on each element. + */ + virtual void compute_absolute_inplace() = 0; +}; + + +/** + * The EnableAbsoluteComputation mixin provides the default implementations of + * `compute_absolute_linop` and the absolute interface. `compute_absolute` gets + * a new AbsoluteLinOp. `compute_absolute_inplace` applies absolute + * inplace, so it still keeps the value_type of the class. + * + * @tparam AbsoluteLinOp the absolute LinOp which is being returned + * [CRTP parameter] + * + * @ingroup LinOp + */ +template +class EnableAbsoluteComputation : public AbsoluteComputable { +public: + using absolute_type = AbsoluteLinOp; + + virtual ~EnableAbsoluteComputation() = default; + + std::unique_ptr compute_absolute_linop() const override + { + return this->compute_absolute(); + } + + /** + * Gets the AbsoluteLinOp + * + * @return a pointer to the new absolute object + */ + virtual std::unique_ptr compute_absolute() const = 0; +}; + + /** * The EnableLinOp mixin can be used to provide sensible default implementations * of the majority of the LinOp and PolymorphicObject interface. @@ -755,12 +889,12 @@ using EnableDefaultLinOpFactory = * * @ingroup LinOp */ -#define GKO_CREATE_FACTORY_PARAMETERS(_parameters_name, _factory_name) \ -public: \ - class _factory_name; \ - struct _parameters_name##_type \ - : ::gko::enable_parameters_type<_parameters_name##_type, \ - _factory_name> +#define GKO_CREATE_FACTORY_PARAMETERS(_parameters_name, _factory_name) \ +public: \ + class _factory_name; \ + struct _parameters_name##_type \ + : public ::gko::enable_parameters_type<_parameters_name##_type, \ + _factory_name> /** @@ -1008,4 +1142,4 @@ public: \ } // namespace gko -#endif // GKO_CORE_BASE_LIN_OP_HPP_ +#endif // GKO_PUBLIC_CORE_BASE_LIN_OP_HPP_ diff --git a/include/ginkgo/core/base/machine_topology.hpp b/include/ginkgo/core/base/machine_topology.hpp new file mode 100644 index 00000000000..509c7ada786 --- /dev/null +++ b/include/ginkgo/core/base/machine_topology.hpp @@ -0,0 +1,421 @@ +/************************************************************* +Copyright (c) 2017-2021, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_BASE_MACHINE_TOPOLOGY_HPP_ +#define GKO_PUBLIC_CORE_BASE_MACHINE_TOPOLOGY_HPP_ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include + + +#if GKO_HAVE_HWLOC + +#include + +#else + +struct hwloc_obj_type_t {}; +struct hwloc_obj_t {}; + +#endif + + +struct hwloc_topology; +struct hwloc_bitmap_s; + + +namespace gko { + + +/** + * The machine topology class represents the hierarchical topology of a machine, + * including NUMA nodes, cores and PCI Devices. Various infomation of the + * machine are gathered with the help of the Hardware Locality library (hwloc). + * + * This class also provides functionalities to bind objects in the topology to + * the execution objects. Binding can enhance performance by allowing data to be + * closer to the executing object. + * + * See the hwloc documentation + * (https://www.open-mpi.org/projects/hwloc/doc/) for more detailed + * information on topology detection and binding interfaces. + * + * @note A global object of MachineTopology type is created in a thread safe + * manner and only destroyed at the end of the program. This means that + * any subsequent queries will be from the same global object and hence + * use an extra atomic read. + */ +class MachineTopology { + template + using hwloc_manager = std::unique_ptr>; + + /** + * This struct holds the attributes for a normal non-IO object. + */ + struct normal_obj_info { + /** + * The hwloc object. + */ + hwloc_obj_t obj; + + /** + * The logical_id assigned by HWLOC (assigned according to physical + * proximity). + * + * @note Use this rather than os_id for all purposes other than binding. + * [Reference](https://www.open-mpi.org/projects/hwloc/doc/v2.4.0/a00364.php#faq_indexes) + */ + size_type logical_id; + + /** + * The os_id assigned by the OS (assigned arbitrarily by the OS) + */ + size_type os_id; + + /** + * The global persistent id assigned to the object by hwloc. + */ + size_type gp_id; + + /** + * The numa number of the object. + */ + int numa; + + /** + * The memory size of the object. + */ + size_type memory_size; + }; + + + /** + * This struct holds the attributes for an IO/Misc object. + * + * Mainly used for PCI devices. The identifier important for PCI devices is + * the PCI Bus ID, stored here as a string. PCI devices themselves usually + * contain Hard disks, network components as well as other objects that are + * not important for our case. + * + * In many cases, hwloc is able to identify the OS devices that belong to a + * certain PCI Bus ID and here they are stored in the io children vector. A + * list of their names are also additionally stored for easy access and + * comparison. + * + * @note IO children can have names such as ibX for Infiniband cards, cudaX + * for NVIDIA cards with CUDA and rsmiX for AMD cards. + */ + struct io_obj_info { + /** + * The hwloc object. + */ + hwloc_obj_t obj; + + /** + * The logical_id assigned by HWLOC (assigned according to proximity). + * + * @note Use this rather than os_id for all purposes other than binding. + * [Reference](https://www.open-mpi.org/projects/hwloc/doc/v2.4.0/a00364.php#faq_indexes) + */ + size_type logical_id; + + /** + * The os_id assigned by the OS (assigned arbitrarily by the OS) + */ + size_type os_id; + + /** + * The global persistent id assigned to the object by hwloc. + */ + size_type gp_id; + + /** + * The closest numa. + */ + int closest_numa; + + /** + * The non-io parent object. + */ + hwloc_obj_t non_io_ancestor; + + /** + * The ancestor local id. + */ + int ancestor_local_id; + + /** + * The ancestor type. + */ + std::string ancestor_type; + + /** + * The array of CPU ids closest to the object. + */ + std::vector closest_pu_ids; + + /** + * The PCI Bus ID + */ + std::string pci_bus_id; + }; + +public: + /** + * Returns an instance of the MachineTopology object. + * + * @return the MachineTopology instance + */ + static MachineTopology *get_instance() + { + static MachineTopology instance; + return &instance; + } + + /** + * Bind the calling process to the CPU cores associated with + * the ids. + * + * @param ids The ids of cores to be bound. + * @param singlify The ids of PUs are singlified to prevent possibly + * expensive migrations by the OS. This means that the + * binding is performed for only one of the ids in the + * set of ids passed in. + * See hwloc doc for + * [singlify](https://www.open-mpi.org/projects/hwloc/doc/v2.4.0/a00175.php#gaa611a77c092e679246afdf9a60d5db8b) + */ + void bind_to_cores(const std::vector &ids, + const bool singlify = true) const + { + hwloc_binding_helper(this->cores_, ids, singlify); + } + + /** + * Bind to a single core + * + * @param ids The ids of the core to be bound to the calling process. + */ + void bind_to_core(const int &id) const + { + MachineTopology::get_instance()->bind_to_cores(std::vector{id}); + } + + /** + * Bind the calling process to PUs associated with + * the ids. + * + * @param ids The ids of PUs to be bound. + * @param singlify The ids of PUs are singlified to prevent possibly + * expensive migrations by the OS. This means that the + * binding is performed for only one of the ids in the + * set of ids passed in. + * See hwloc doc for + * [singlify](https://www.open-mpi.org/projects/hwloc/doc/v2.4.0/a00175.php#gaa611a77c092e679246afdf9a60d5db8b) + */ + void bind_to_pus(const std::vector &ids, + const bool singlify = true) const + { + hwloc_binding_helper(this->pus_, ids, singlify); + } + + /** + * Bind to a Processing unit (PU) + * + * @param ids The ids of PUs to be bound to the calling process. + */ + void bind_to_pu(const int &id) const + { + MachineTopology::get_instance()->bind_to_pus(std::vector{id}); + } + + /** + * Get the object of type PU associated with the id. + * + * @param id The id of the PU + * @return the PU object struct. + */ + const normal_obj_info *get_pu(size_type id) const + { + GKO_ENSURE_IN_BOUNDS(id, this->pus_.size()); + return &this->pus_[id]; + } + + /** + * Get the object of type core associated with the id. + * + * @param id The id of the core + * @return the core object struct. + */ + const normal_obj_info *get_core(size_type id) const + { + GKO_ENSURE_IN_BOUNDS(id, this->cores_.size()); + return &this->cores_[id]; + } + + /** + * Get the object of type pci device associated with the id. + * + * @param id The id of the pci device + * @return the PCI object struct. + */ + const io_obj_info *get_pci_device(size_type id) const + { + GKO_ENSURE_IN_BOUNDS(id, this->pci_devices_.size()); + return &this->pci_devices_[id]; + } + + /** + * Get the object of type pci device associated with the PCI bus id. + * + * @param pci_bus_id The PCI bus id of the pci device + * @return the PCI object struct. + */ + const io_obj_info *get_pci_device(const std::string &pci_bus_id) const; + + /** + * Get the number of PU objects stored in this Topology tree. + * + * @return the number of PUs. + */ + size_type get_num_pus() const { return this->pus_.size(); } + + /** + * Get the number of core objects stored in this Topology tree. + * + * @return the number of cores. + */ + size_type get_num_cores() const { return this->cores_.size(); } + + /** + * Get the number of PCI device objects stored in this Topology tree. + * + * @return the number of PCI devices. + */ + size_type get_num_pci_devices() const { return this->pci_devices_.size(); } + + /** + * Get the number of NUMA objects stored in this Topology tree. + * + * @return the number of NUMA objects. + */ + size_type get_num_numas() const { return this->num_numas_; } + + /** + * @internal + * + * A helper function that binds the calling process with the ids of `obj` + * object . + */ + void hwloc_binding_helper( + const std::vector &obj, + const std::vector &ids, const bool singlify = true) const; + + /** + * @internal + * + * Load the objects of a normal HWLOC type (Packages, cores, numa-nodes). + * + * @note The objects should be sorted by logical index since hwloc uses + * logical index with these functions + */ + void load_objects(hwloc_obj_type_t type, + std::vector &objects) const; + + /** + * @internal + * + * Load the objects of io type (PCI devices and OS devices). + * + * @note The objects should be sorted by logical index since hwloc uses + * logical index with these functions + */ + void load_objects(hwloc_obj_type_t type, + std::vector &vector) const; + + /** + * + * @internal + * + * Get object id from the os index + */ + int get_obj_id_by_os_index(const std::vector &objects, + size_type os_index) const; + + /** + * + * @internal + * + * Get object id from the hwloc index + */ + int get_obj_id_by_gp_index(const std::vector &objects, + size_type gp_index) const; + +private: + /** + * Do not allow the MachineTopology object to be copied/moved. There should + * be only one global object per execution. + */ + MachineTopology(); + MachineTopology(MachineTopology &) = delete; + MachineTopology(MachineTopology &&) = delete; + MachineTopology &operator=(MachineTopology &) = delete; + MachineTopology &operator=(MachineTopology &&) = delete; + ~MachineTopology() = default; + + std::vector pus_; + std::vector cores_; + std::vector packages_; + std::vector numa_nodes_; + std::vector pci_devices_; + size_type num_numas_; + + hwloc_manager topo_; +}; + + +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_BASE_MACHINE_TOPOLOGY_HPP_ diff --git a/include/ginkgo/core/base/math.hpp b/include/ginkgo/core/base/math.hpp index 9f9dabb8f75..81f7349daa8 100644 --- a/include/ginkgo/core/base/math.hpp +++ b/include/ginkgo/core/base/math.hpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2020, the Ginkgo authors +Copyright (c) 2017-2021, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_CORE_BASE_MATH_HPP_ -#define GKO_CORE_BASE_MATH_HPP_ +#ifndef GKO_PUBLIC_CORE_BASE_MATH_HPP_ +#define GKO_PUBLIC_CORE_BASE_MATH_HPP_ #include @@ -41,6 +41,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#ifdef SYCL_LANGUAGE_VERSION +#include +#endif + + #include #include #include @@ -93,6 +98,20 @@ using std::sqrt; } // namespace kernels +namespace kernels { +namespace dpcpp { + + +using std::abs; + + +using std::sqrt; + + +} // namespace dpcpp +} // namespace kernels + + namespace test { @@ -133,6 +152,27 @@ struct remove_complex_impl> { }; +/** + * Use the complex type if it is not complex. + * + * @tparam T the type being made complex + */ +template +struct to_complex_impl { + using type = std::complex; +}; + +/** + * Use the same type if it is complex type. + * + * @tparam T the type being made complex + */ +template +struct to_complex_impl> { + using type = std::complex; +}; + + template struct is_complex_impl : public std::integral_constant {}; @@ -141,6 +181,96 @@ struct is_complex_impl> : public std::integral_constant {}; +template +struct is_complex_or_scalar_impl : std::is_scalar {}; + +template +struct is_complex_or_scalar_impl> : std::is_scalar {}; + + +/** + * template_converter is converting the template parameters of a class by + * converter. + * + * @tparam converter which convert one type to another type + * @tparam T type + */ +template