andreinechaev · cosminc98 · Jun 3, 2024 · May 19, 2024 · Jun 2, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -51,3 +51,10 @@ repos:
     - id: bandit
       args: ["-c", "pyproject.toml"]
       additional_dependencies: ["bandit[toml]"]
+
+  # remove notebook cell output
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.7.1
+    hooks:
+      - id: nbstripout
+        files: ".ipynb"
diff --git a/notebooks/compiling-with-external-libraries.ipynb b/notebooks/compiling-with-external-libraries.ipynb
@@ -0,0 +1,383 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "4_qHS_1yjcrK"
+   },
+   "source": [
+    "# Compiling CUDA with OpenCV\n",
+    "In this notebook we will show how to compile CUDA C++ code with the OpenCV library, which we will use to load an image and save it back to disk after applying a blur kernel.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "2XyG-R3nk1p6"
+   },
+   "source": [
+    "## Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "J2iU1zyrK4FF"
+   },
+   "source": [
+    "Install and load the nvcc4jupyter extension, which will allow us to run CUDA C++ code using cell magics. See [the documentation](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html) for details."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "K1pfzEEsjlFD",
+    "outputId": "f14a4888-9215-4280-e723-1a3e1477135d"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install nvcc4jupyter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "w-4m2tFtlt_M",
+    "outputId": "9118eb6e-63ee-426f-a618-fb35d9750b81"
+   },
+   "outputs": [],
+   "source": [
+    "%load_ext nvcc4jupyter"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make sure OpenCV is installed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "!apt update && apt install -y libopencv-dev"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hBZR7kZkk4UV"
+   },
+   "source": [
+    "## Code"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "JppQ-78qLYjP"
+   },
+   "source": [
+    "### Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "vYyG7Gd0vJJ0"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import subprocess\n",
+    "from pathlib import Path\n",
+    "from IPython.display import Image"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Zhs1emaLLemz"
+   },
+   "source": [
+    "### Download an image"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "1vyu_yKStyKG"
+   },
+   "source": [
+    "Get a random image of IMG_SIZE x IMG_SIZE pixels."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 417
+    },
+    "id": "T3yN-g8dvZ4P",
+    "outputId": "d5b8602c-6a1a-4de0-9fbf-8b72ef9f6071"
+   },
+   "outputs": [],
+   "source": [
+    "IMG_SIZE = 400\n",
+    "os.environ[\"IMG_SIZE\"] = str(IMG_SIZE)\n",
+    "IMG_FNAME = \"image.jpg\"\n",
+    "os.environ[\"IMG_FNAME\"] = IMG_FNAME\n",
+    "\n",
+    "!wget -O $IMG_FNAME https://picsum.photos/$IMG_SIZE.jpg &> /dev/null\n",
+    "Image(filename=IMG_FNAME)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BeOimvKtNluI"
+   },
+   "source": [
+    "### Compiler arguments\n",
+    "These are options that you need to pass to the compiler in order to link with the OpenCV library."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make sure the file containing the necessary compilation flags for the opencv library\n",
+    "# exists in a directory in the PKG_CONFIG_PATH environment variable\n",
+    "for path in Path('/usr/lib').rglob('opencv4.pc'):\n",
+    "    os.environ[\"PKG_CONFIG_PATH\"] = os.path.dirname(str(path))\n",
+    "    break\n",
+    "    \n",
+    "# get the compilation flags required to compile our CUDA C++ code with opencv\n",
+    "COMPILER_ARGS = subprocess.check_output([\"pkg-config\", \"--cflags\", \"--libs\", \"opencv4\"]).decode().strip()\n",
+    "COMPILER_ARGS = f\"-I/usr/include/opencv4 {COMPILER_ARGS}\"\n",
+    "print(COMPILER_ARGS)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "SAQ1x3_JNvhT"
+   },
+   "source": [
+    "### Blurring the image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "hhkNS3HJlFlC",
+    "outputId": "bb0b3fad-f132-4a26-e247-32bf5e29c1a4"
+   },
+   "outputs": [],
+   "source": [
+    "%%cuda --compiler-args \"$COMPILER_ARGS\"\n",
+    "\n",
+    "#include <opencv2/core.hpp>\n",
+    "#include <opencv2/imgcodecs.hpp>\n",
+    "\n",
+    "#include <iostream>\n",
+    "\n",
+    "#define BLUR_SIZE 5\n",
+    "#define R 0\n",
+    "#define G 1\n",
+    "#define B 2\n",
+    "\n",
+    "// error checking macro\n",
+    "#define cudaCheckErrors(msg) \\\n",
+    "    do { \\\n",
+    "        cudaError_t __err = cudaGetLastError(); \\\n",
+    "        if (__err != cudaSuccess) { \\\n",
+    "            fprintf(stderr, \"Fatal error: %s (%s at %s:%d)\\n\", \\\n",
+    "                msg, cudaGetErrorString(__err), \\\n",
+    "                __FILE__, __LINE__); \\\n",
+    "            fprintf(stderr, \"*** FAILED - ABORTING\\n\"); \\\n",
+    "            exit(1); \\\n",
+    "        } \\\n",
+    "    } while (0)\n",
+    "\n",
+    "// kernel taken from https://stackoverflow.com/a/65973288\n",
+    "__global__ void blurKernel(\n",
+    "    unsigned char* in,\n",
+    "    unsigned char* out,\n",
+    "    int width,\n",
+    "    int height,\n",
+    "    int num_channel,\n",
+    "    int channel\n",
+    ") {\n",
+    "\n",
+    "    int col = blockIdx.x * blockDim.x + threadIdx.x;\n",
+    "    int row = blockIdx.y * blockDim.y + threadIdx.y;\n",
+    "\n",
+    "    if(col < width && row < height) {\n",
+    "        int pixVal = 0;\n",
+    "        int pixels = 0;\n",
+    "        for(int blurRow = -BLUR_SIZE; blurRow < BLUR_SIZE + 1; ++blurRow) {\n",
+    "            for(int blurCol = -BLUR_SIZE; blurCol < BLUR_SIZE + 1; ++blurCol) {\n",
+    "                int curRow = row + blurRow;\n",
+    "                int curCol = col + blurCol;\n",
+    "                if(curRow > -1 && curRow < height && curCol > -1 && curCol < width) {\n",
+    "                    pixVal += in[curRow * width * num_channel + curCol * num_channel + channel];\n",
+    "                    pixels++;\n",
+    "                }\n",
+    "            }\n",
+    "        }\n",
+    "        out[row * width * num_channel + col * num_channel + channel] = (unsigned char)(pixVal/pixels);\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "int main()\n",
+    "{\n",
+    "    std::string image_path = cv::samples::findFile(\"image.jpg\");\n",
+    "    cv::Mat img = imread(image_path, cv::IMREAD_COLOR);\n",
+    "    if(img.empty())\n",
+    "    {\n",
+    "        std::cerr << \"Could not read the image: \" << image_path << std::endl;\n",
+    "        return 1;\n",
+    "    }\n",
+    "\n",
+    "    // image shape\n",
+    "    int width = img.cols;\n",
+    "    int height = img.rows;\n",
+    "    int n_channels = 3; // hard-coded RGB processing\n",
+    "\n",
+    "    // we will read and write directly into the data array of the OpenCV Matrix\n",
+    "    unsigned char *host_image_input = img.data;\n",
+    "    unsigned char *host_image_output = img.data;\n",
+    "\n",
+    "    // allocate memory for device arrays\n",
+    "    unsigned char* dev_image_input = NULL;\n",
+    "    unsigned char* dev_image_output = NULL;\n",
+    "    cudaMalloc(\n",
+    "        (void**)&dev_image_input,\n",
+    "        sizeof(unsigned char) * height * width * n_channels\n",
+    "    );\n",
+    "    cudaMalloc(\n",
+    "        (void**)&dev_image_output,\n",
+    "        sizeof(unsigned char) * height * width * n_channels\n",
+    "    );\n",
+    "    cudaCheckErrors(\"cudaMalloc failure\");\n",
+    "\n",
+    "    // transfer data from host to device for processing on GPU\n",
+    "    cudaMemcpy(\n",
+    "        dev_image_input,\n",
+    "        host_image_input,\n",
+    "        sizeof(unsigned char) * height * width * n_channels,\n",
+    "        cudaMemcpyHostToDevice\n",
+    "    );\n",
+    "    cudaCheckErrors(\"cudaMemcpy H2D failure\");\n",
+    "\n",
+    "    // run a blur kernel on each channel\n",
+    "    dim3 blockSize(16, 16, 1);\n",
+    "    dim3 gridSize(width/blockSize.x, height/blockSize.y, 1);\n",
+    "    blurKernel<<<gridSize, blockSize>>>(dev_image_input, dev_image_output, width, height, n_channels, R);\n",
+    "    blurKernel<<<gridSize, blockSize>>>(dev_image_input, dev_image_output, width, height, n_channels, G);\n",
+    "    blurKernel<<<gridSize, blockSize>>>(dev_image_input, dev_image_output, width, height, n_channels, B);\n",
+    "\n",
+    "    cudaDeviceSynchronize();\n",
+    "    cudaCheckErrors(\"kernel failure\");\n",
+    "\n",
+    "    // copy results back to host\n",
+    "    cudaMemcpy(\n",
+    "        host_image_output,\n",
+    "        dev_image_output,\n",
+    "        sizeof(unsigned char) * height * width * n_channels,\n",
+    "        cudaMemcpyDeviceToHost\n",
+    "    );\n",
+    "    cudaCheckErrors(\"cudaMemcpy D2H failure\");\n",
+    "\n",
+    "    cudaFree(dev_image_input);\n",
+    "    cudaFree(dev_image_output);\n",
+    "\n",
+    "    // save the blurred image to disk\n",
+    "    cv::imwrite(\"image_blurred.jpg\", img);\n",
+    "    return 0;\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 417
+    },
+    "id": "Bn9X_cm4rALH",
+    "outputId": "ead0c3b5-013b-45a3-f91a-7866d33b2404"
+   },
+   "outputs": [],
+   "source": [
+    "Image(filename=\"image_blurred.jpg\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kaggle": {
+   "accelerator": "nvidiaTeslaT4",
+   "dataSources": [],
+   "dockerImageVersionId": 30699,
+   "isGpuEnabled": true,
+   "isInternetEnabled": true,
+   "language": "python",
+   "sourceType": "notebook"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}