### CNN.c is a C library for creating CUDA-optimized Convolutional Neural Networks.

### Using this library
We want to make using this library as easy as possible.

First we'll start with a few prerequisites:

1. make
2. gcc
3.

In a C program,


In [10]:
!mkdir src
!mkdir src/core
!mkdir src/layers
!mkdir src/math
!mkdir src/optimizers
!mkdir src/cuda
!mkdir src/utils
!touch src/core/cnn.c
!touch src/core/network.c
!touch src/core/training.c
!touch src/layers/convolutional.c
!touch src/layers/pooling.c
!touch src/layers/fully_connected.c
!touch src/layers/activation.c

mkdir: cannot create directory ‘src’: File exists
mkdir: cannot create directory ‘src/core’: File exists
mkdir: cannot create directory ‘src/layers’: File exists
mkdir: cannot create directory ‘src/math’: File exists
mkdir: cannot create directory ‘src/optimizers’: File exists
mkdir: cannot create directory ‘src/cuda’: File exists
mkdir: cannot create directory ‘src/utils’: File exists


In [11]:
!mkdir include
!mkdir include/core
!mkdir include/layers
!mkdir include/math
!mkdir include/optimizers
!mkdir include/cuda
!mkdir include/utils
!touch include/cnn.h
!touch include/network.h
!touch include/training.h
!touch layers/convolutional.h
!touch layers/pooling.h
!touch layers/fully_connected.h

touch: cannot touch 'layers/convolutional.h': No such file or directory
touch: cannot touch 'layers/pooling.h': No such file or directory
touch: cannot touch 'layers/fully_connected.h': No such file or directory


In [12]:
!mkdir tests
!touch tests/test_cnn.c

mkdir: cannot create directory ‘tests’: File exists


In [7]:
!ls src

core  cuda  layers  math  optimizers  utils


In [3]:
%%writefile cuda_kernel.cu
#include <stdio.h>

__global__ void hello_cuda() {
    printf("Hello from CUDA!\n");
}

int main() {
    hello_cuda<<<1, 1>>>();
    cudaDeviceSynchronize();
    return 0;
}

Writing cuda_kernel.cu


In [4]:
!nvcc cuda_kernel.cu -o cuda_kernel
!./cuda_kernel

Hello from CUDA!


In [None]:
%%writefile vector_add.cu
#include <stdio.h>

__global__ void vectorAdd(float *a, float *b, float *c, int n) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < n) {
        c[i] = a[i] + b[i];
    }
}

int main() {
    int n = 1000000;
    size_t size = n * sizeof(float);

    float *h_a = (float *)malloc(size);
    float *h_b = (float *)malloc(size);
    float *h_c = (float *)malloc(size);

    for (int i = 0; i < n; i++) {
        h_a[i] = rand() / (float)RAND_MAX;
        h_b[i] = rand() / (float)RAND_MAX;
    }

    float *d_a, *d_b, *d_c;
    cudaMalloc(&d_a, size);
    cudaMalloc(&d_b, size);
    cudaMalloc(&d_c, size);

    cudaMemcpy(d_a, h_a, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_b, h_b, size, cudaMemcpyHostToDevice);

    int blockSize = 256;
    int numBlocks = (n + blockSize - 1) / blockSize;
    vectorAdd<<<numBlocks, blockSize>>>(d_a, d_b, d_c, n);

    cudaMemcpy(h_c, d_c, size, cudaMemcpyDeviceToHost);

    for (int i = 0; i < 10; i++) {
        printf("%f + %f = %f\n", h_a[i], h_b[i], h_c[i]);
    }

    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);
    free(h_a);
    free(h_b);
    free(h_c);

    return 0;
}

Writing vector_add.cu


In [None]:
!nvcc vector_add.cu -o vector_add
!./vector_add

0.840188 + 0.394383 = 1.234571
0.783099 + 0.798440 = 1.581539
0.911647 + 0.197551 = 1.109199
0.335223 + 0.768230 = 1.103452
0.277775 + 0.553970 = 0.831745
0.477397 + 0.628871 = 1.106268
0.364784 + 0.513401 = 0.878185
0.952230 + 0.916195 = 1.868425
0.635712 + 0.717297 = 1.353009
0.141603 + 0.606969 = 0.748571
