In [1]:
%load_ext Cython
import numpy as np
from numpy import asfortranarray as Fort
from scipy.linalg.blas import dgemm as DG

# The following Cython function is low-level BLAS for Array Multiplication

It takes in zeroed arrays from python's numpy  and outputs the array product as well as the filled-in array of the inputs. This is done as a concept test of the code to then test with numpy's dot product to ascertain if the cython code produces correct output. 

The array's must be fortran-contiguous (so the cython inputs specify that the stride of the arrays are column-major). The output of the array multiplication is also done column major to preserve output. 

In [2]:
%%cython 

#!python
#cython: boundscheck=False
#cython: wraparound=False
#cython: nonecheck=False
#cython: cdivision=True

#Import module: the matrix multiplication module
from scipy.linalg.cython_blas cimport dgemm

from libc.stdlib cimport rand, RAND_MAX, calloc, malloc, free

####################################################################

#Random Number Generator for 
cdef double rand_value(unsigned int MIN, unsigned int MAX) nogil:
    cdef double scaled

    #generate a random number between 0 and 1
    scaled=rand()/<double>RAND_MAX

    #return a random number between the min and the max
    return (MAX*scaled + MIN)

#Fill Arrays
cdef void FILL_ARRAY(double* arr, size_t iter, 
                     unsigned int MIN, unsigned int MAX) nogil:
    cdef Py_ssize_t i
    for i in xrange(iter):
        arr[i]=rand_value(MIN,MAX)
        
#Ouput Arrays (remember that we are in Fortran contiguous - column -major)
#So cycle through the columns and not through the rows when putting into the 
#output cells.
cdef void OUTPUT(double[::1,:] arr_out, double* arr, int rows, int col) nogil:
    cdef Py_ssize_t i,j
    
    for i in xrange(col):
        for j in xrange(rows):
            arr_out[j,i] = arr[j + i*rows]

cpdef void myfunc(double[::1,:] in1, double[::1,:] in2, double[::1,:] out,
                  char* TransA, char* TransB) nogil:
    cdef: 
        double* a 
        double* b 
        double* c
        char* Trans='T'
        char* No_Trans='N'
        int m, n, k, lda, ldb, ldc, col_c
        int row_a, row_b, col_a, col_b
        unsigned int MIN=1, MAX=5
        double alpha, beta
    
    #dimensions of input arrays
    lda = in1.shape[0]
    col_a = in1.shape[1]
    ldb = in2.shape[0]
    col_b = in2.shape[1]
    
    #dimensions of arrays post operation
    if TransA[0]==Trans[0] and TransB[0]==No_Trans[0]:
        m = col_a; n = col_b ; k = lda
    elif TransB[0]==Trans[0] and TransA[0]==No_Trans[0]:
        m = lda; n = ldb ; k = ldb
    elif TransA[0]==Trans[0] and TransB[0]==Trans[0]:
        m = col_a; n = ldb ; k = lda
    else: 
        m = lda; n = col_b ; k = ldb
    
    #dimensions of array c
    ldc = m; col_c = n
    
    a = <double*> malloc(lda*col_a * sizeof(double))
    b = <double*> malloc(ldb*col_b * sizeof(double))    
    c = <double*> calloc(ldc*col_c, sizeof(double))
    
    with gil:
        if not (a or b or c): raise MemoryError()
    
    try:
        #fill in arrays
        FILL_ARRAY(a,lda*col_a,MIN,MAX)
        FILL_ARRAY(b,ldb*col_b,MIN,MAX)

        #scalars associated with C = beta*op(A)*op(B) + alpha*C
        alpha = 1.0
        beta = 0.0
    
        dgemm(TransA, TransB, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc)
        
        OUTPUT(in1,a,lda,col_a)
        OUTPUT(in2,b,ldb,col_b)
        OUTPUT(out,c,ldc,col_c)
        
    finally:
        free(a)
        free(b)
        free(c)

In [36]:
n=4
m=4
k=3

test_a = np.zeros((m,k), dtype='d', order='F')
test_b = np.zeros((k,n), dtype='d', order='F')
test_c = np.zeros((m,n), dtype='d', order='F')

The following takes in a memory view of the numpy array only (no use of malloc) and works on them directly . 

In [7]:
%%cython

#!python
#cython: boundscheck=False
#cython: wraparound=False
#cython: nonecheck=False
#cython: cdivision=True

#Import module: the matrix multiplication module
from scipy.linalg.cython_blas cimport dgemm

####################################################################

cpdef void MM(double[::1,:] a, double[::1,:] b, 
              double[::1,:] out, char* TransA, char* TransB) nogil:
    
    cdef:
        char* Trans='T'
        char* No_Trans='N'
        int m, n, k, lda, ldb, ldc
        int col_a, col_b
        double alpha, beta
    
    #dimensions of input arrays
    lda = a.shape[0]
    col_a = a.shape[1]
    ldb = b.shape[0]
    col_b = b.shape[1]
    
    #dimensions of arrays post operation (after transposing, or not)
    if TransA[0]==Trans[0] and TransB[0]==No_Trans[0]:
        m = col_a; n = col_b ; k = lda
    elif TransB[0]==Trans[0] and TransA[0]==No_Trans[0]:
        m = lda; n = ldb ; k = col_a
    elif TransA[0]==Trans[0] and TransB[0]==Trans[0]:
        m = col_a; n = ldb ; k = lda
    else: 
        m = lda; n = col_b ; k = ldb
    
    #leading dimension of c from above
    ldc = m
    
    #scalars associated with C = beta*op(A)*op(B) + alpha*C
    alpha = 1.0
    beta = 0.0
    
    #Fortran BLAS function for calculating the multiplication of arrays
    dgemm(TransA, TransB, &m, &n, &k, &alpha, &a[0,0], &lda, &b[0,0], &ldb, &beta, &out[0,0], &ldc)


In [31]:
n=3
m=3
k=6

test_a = Fort(np.random.randint(2,size=(m,k)), dtype='d')
test_b = Fort(np.random.randint(2,size=(k,n)), dtype='d')
test_c = np.zeros((k,k), dtype='d', order='F')

In [None]:
%testit MM(test_a,test_b,test_c,"T","T")

# Cython code for the inverse of a square array

This code takes in a square array and rewrites the array as the inverse of itself. 

In [15]:
%%cython

#!python
#cython: boundscheck=False
#cython: wraparound=False
#cython: nonecheck=False
#cython: cdivision=True

#Import module: the matrix multiplication module
from scipy.linalg.cython_lapack cimport dgetrf, dgetri

from libc.stdlib cimport malloc, free

####################################################################

cpdef void INV_M(double[::1,:] a) nogil:
    cdef:
        int* pivot
        double* work
        int m, n, lda, INFO, Lwork
    
    #Dimensions of arrays
    m = a.shape[0]
    n = a.shape[1]
    lda = m
    Lwork = m**2

    #manually allocate the temporary array c for the output
    pivot = <int*> malloc(m * sizeof(int))
    work = <double*> malloc(Lwork * sizeof(double))
    
    with gil:
        if not (pivot or work): raise MemoryError()
    
    try:
        #First, Conduct the LU factorization of the array
        dgetrf(&m, &n, &a[0,0], &lda, &pivot[0], &INFO)
        #Now use the LU factorization and the pivot information to inverse
        dgetri(&n, &a[0,0], &lda, &pivot[0], &work[0], &Lwork, &INFO)
        
    finally:
        free(pivot)
        free(work)

In [34]:
INV_M(test_c)

In [35]:
print (test_c)

[[ 0.01240349 -0.00179716 -0.00196759 -0.00256198 -0.00231155 -0.00145656]
 [-0.00179716  0.0116299  -0.00175464 -0.00177647 -0.00244694 -0.00296923]
 [-0.00196759 -0.00175464  0.01137267 -0.00176311 -0.00244331 -0.00092717]
 [-0.00256198 -0.00177647 -0.00176311  0.01126294 -0.00042601 -0.00251885]
 [-0.00231155 -0.00244694 -0.00244331 -0.00042601  0.0119643  -0.00184702]
 [-0.00145656 -0.00296923 -0.00092717 -0.00251885 -0.00184702  0.01117136]]
