## Compile Static

In [1]:
%%writefile matrix_multiply.c
#include <stdio.h>

void matrix_multiply(int n, int A[][n], int B[][n], int C[][n]) {
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            C[i][j] = 0;
            for (int k = 0; k < n; k++) {
                C[i][j] += A[i][k] * B[k][j];
            }
        }
    }
}

Writing matrix_multiply.c


In [2]:
%%writefile matrix_vector_multiply.c
#include <stdio.h>

void matrix_vector_multiply(int n, int A[][n], int B[], int C[]) {
    for (int i = 0; i < n; i++) {
        C[i] = 0;
        for (int j = 0; j < n; j++) {
            C[i] += A[i][j] * B[j];
        }
    }
}

Writing matrix_vector_multiply.c


In [3]:
%%writefile main.c
#include <stdio.h>
#include <stdlib.h>

void matrix_multiply(int n, int A[][n], int B[][n], int C[][n]);
void matrix_vector_multiply(int n, int A[][n], int B[], int C[]);

int main(int argc, char *argv[]) {

    int N = atoi(argv[1]);

    // Allocate memory for matrices and vector
    int (*A)[N] = malloc(sizeof(int[N][N]));
    int (*B)[N] = malloc(sizeof(int[N][N]));
    int (*C)[N] = malloc(sizeof(int[N][N]));
    int *D = malloc(sizeof(int[N]));
    int *E = malloc(sizeof(int[N]));

    // Generate random values for matrices and vector
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            A[i][j] = rand() % 10; // Generate random numbers between 0 and 9
            B[i][j] = rand() % 10;
        }
        D[i] = rand() % 10;
    }

    // Perform matrix multiplication
    matrix_multiply(N, A, B, C);

    // Perform matrix-vector multiplication
    matrix_vector_multiply(N, A, D, E);

    // Free allocated memory
    free(A);
    free(B);
    free(C);
    free(D);
    free(E);

    return 0;
}

Writing main.c


In [4]:
!gcc main.c matrix_multiply.c matrix_vector_multiply.c -static -o great_program

In [5]:
%%timeit
!./great_program 512

615 ms ± 15.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Compile Opt/Debug

In [6]:
!gcc main.c matrix_multiply.c matrix_vector_multiply.c -fPIC -shared -g -o great_program_debug.so

In [7]:
!gcc main.c matrix_multiply.c matrix_vector_multiply.c -fPIC -shared -O3 -o great_program_opt.so

In [8]:
%%timeit
!./great_program_debug.so 512

108 ms ± 56.6 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
%%timeit
!./great_program_opt.so 512

108 ms ± 122 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## ctypes

In [10]:
import ctypes

In [11]:
great_program_py = ctypes.CDLL('./great_program_opt.so')

In [12]:
great_program_py.main.argtypes = [ctypes.c_int, ctypes.POINTER(ctypes.c_char_p)]
great_program_py.main.restype = ctypes.c_int

In [13]:
argc = 2
argv = (ctypes.c_char_p * argc)()
argv[0] = b'./great_program_opt.so'
argv[1] = b'512'

In [14]:
%%timeit
great_program_py.main(argc, argv)

171 ms ± 1.99 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Cython

In [15]:
!pip install cython



In [24]:
%load_ext cython

In [25]:
%%writefile great_program_cython.pyx
import numpy as np
cimport numpy as cnp
from libc.stdlib cimport malloc, free


cdef void matrix_multiply(double[:, :] A, double[:, :] B, double[:, :] C, int N):
    cdef int i, j, k
    for i in range(N):
        for j in range(N):
            C[i, j] = 0.0
            for k in range(N):
                C[i, j] += A[i, k] * B[k, j]

cdef void matrix_vector_multiply(double[:, :] A, double[:] x, double[:] y, int N):
    cdef int i, j
    for i in range(N):
        y[i] = 0.0
        for j in range(N):
            y[i] += A[i, j] * x[j]

def main(int N):
    cdef cnp.ndarray[double, ndim=2] A = np.random.rand(N, N)
    cdef cnp.ndarray[double, ndim=2] B = np.random.rand(N, N)
    cdef cnp.ndarray[double, ndim=2] C = np.zeros((N, N))
    
    cdef cnp.ndarray[double, ndim=1] x = np.random.rand(N)
    cdef cnp.ndarray[double, ndim=1] y = np.zeros(N)
    
    matrix_multiply(A, B, C, N)
    matrix_vector_multiply(A, x, y, N)

    return 0

Overwriting great_program_cython.pyx


In [26]:
%%writefile setup.py
from setuptools import setup
from Cython.Build import cythonize
import numpy

setup(
    ext_modules=cythonize("great_program_cython.pyx"),
    include_dirs=[numpy.get_include()],
)

Overwriting setup.py


In [27]:
!python setup.py build_ext --inplace

Compiling great_program_cython.pyx because it changed.
[1/1] Cythonizing great_program_cython.pyx
  tree = Parsing.p_module(s, pxd, full_module_name)
running build_ext
building 'great_program_cython' extension
/home/edd-ign/anaconda3/bin/x86_64-conda-linux-gnu-cc -DNDEBUG -fwrapv -O2 -Wall -fPIC -O2 -isystem /home/edd-ign/anaconda3/include -fPIC -O2 -isystem /home/edd-ign/anaconda3/include -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/edd-ign/anaconda3/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/edd-ign/anaconda3/include -fPIC -I/home/edd-ign/anaconda3/lib/python3.11/site-packages/numpy/core/include -I/home/edd-ign/anaconda3/include/python3.11 -c great_program_cython.c -o build/temp.linux-x86_64-cpython-311/great_program_cython.o
In file included from [01m[K/home/edd-ign/anaconda3/lib/python3.11/site-packages/numpy/core/include/numpy/ndarraytypes.h:1929[m[K,
                 from [01m

In [28]:
!ls -a

.
..
.ipynb_checkpoints
build
great_program
great_program_cython.c
great_program_cython.cpython-311-x86_64-linux-gnu.so
great_program_cython.pyx
great_program_debug.so
great_program_opt.so
main.c
matrix_multiplication.ipynb
matrix_multiply.c
matrix_vector_multiply.c
setup.py


In [29]:
import great_program_cython

In [31]:
%%timeit
great_program_cython.main(512)

294 ms ± 2.89 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
