In [1]:
# This notebook shows how to practice 
# different optimization techniques 
# with the heat equation solver
#
# Prepared: Octavian Andrei, 2020 

## References

1. The code for this exercise is located [here](https://github.com/csc-training/hpc-python/tree/master/cython/heat-equation).

2. [Cython user guide for numpy](https://cython.readthedocs.io/en/latest/src/userguide/numpy_tutorial.html)

# Optimising heat equation with Cython

### Creating a Cython extension

Write a `setup.py` for creating a Cython version of [heat.py](https://github.com/csc-training/hpc-python/blob/master/cython/heat-equation/heat.py)
module, and use it from the main program [heat_main.py](https://github.com/csc-training/hpc-python/blob/master/cython/heat-equation/heat_main.py).
How much does simple Cythonization (i.e. diminishing the interpreting
overhead) improve the performance?

### Optimising

Based on the profile in the performance measurement
[exercise](https://github.com/csc-training/hpc-python/blob/master/performance/cprofile) optimise the most time
consuming part of the algorithm. If you did not finish the profiling
exercise, you can look at example profile [here](https://github.com/csc-training/hpc-python/blob/master/cython/heat-equation/profile.md). 

Utilize all the tricks you have learned so far (type declarations,
fast array indexing, compiler directives, C functions, ...).

Investigate how the different optimizations affect the performance. You
can use applications own timers and/or **timeit**. Annotated HTML-report with
`cython -a …` can be useful when tuning performance.

When finished with the optimisation, compare performance to
Python/NumPy model solution (in
[numpy/heat-equation](https://github.com/csc-training/hpc-python/blob/master/numpy/heat-equation)), which uses array 
operations. You can play around also with larger input data as provided in
[bottle_medium.dat](https://github.com/csc-training/hpc-python/blob/master/cython/heat-equation/bottle_medium.dat) and [bottle_large.dat](https://github.com/csc-training/hpc-python/blob/master/cython/heat-equation/bottle_large.dat).



In [2]:
from __future__ import print_function
%load_ext cython
import Cython
print(Cython.__version__)

0.29.15


In [3]:
import numpy as np
import matplotlib
import time
matplotlib.use('Agg')
import matplotlib.pyplot as plt

# Set the colormap
plt.rcParams['image.cmap'] = 'BrBG'

In [4]:
# dictionary to store the execution times
timeit_dict = dict()      # bottle.dat
timeit_middle = dict()    # bottle_medium.dat
timeit_large = dict()     # bottle_large.dat

In [5]:
# original code
# %load '../cython/heat-equation/heat.py'
# %load '../cython/heat-equation/heat_main.py'

def evolve(u, u_previous, a, dt, dx2, dy2):
    """Explicit time evolution.
       u:            new temperature field
       u_previous:   previous field
       a:            diffusion constant
       dt:           time step. """

    n, m = u.shape

    for i in range(1, n-1):
        for j in range(1, m-1):
            u[i, j] = u_previous[i, j] + a * dt * ( \
             (u_previous[i+1, j] - 2*u_previous[i, j] + \
              u_previous[i-1, j]) / dx2 + \
             (u_previous[i, j+1] - 2*u_previous[i, j] + \
                 u_previous[i, j-1]) / dy2 )
    u_previous[:] = u[:]

def iterate(field, field0, a, dx, dy, timesteps, image_interval):
    """Run fixed number of time steps of heat equation"""

    dx2 = dx**2
    dy2 = dy**2

    # For stability, this is the largest interval possible
    # for the size of the time-step:
    dt = dx2*dy2 / ( 2*a*(dx2+dy2) )    

    for m in range(1, timesteps+1):
        evolve(field, field0, a, dt, dx2, dy2)
        if m % image_interval == 0:
            write_field(field, m)

def init_fields(filename):
    # Read the initial temperature field from file
    field = np.loadtxt(filename)
    field0 = field.copy() # Array for field of previous time step
    return field, field0

def write_field(field, step):
    plt.gca().clear()
    plt.imshow(field)
    plt.axis('off')
    plt.savefig('heat_{0:03d}.png'.format(step))


def main(input_file='bottle.dat', a=0.5, dx=0.1, dy=0.1, 
         timesteps=200, image_interval=4000):

    # Initialise the temperature field
    field, field0 = init_fields(input_file)

    print("Heat equation solver")
    # print("Diffusion constant: {}".format(a))
    print("Input file: {}".format(input_file))
    # print("Parameters")
    # print("----------")
    # print("  nx={} ny={} dx={} dy={}".format(field.shape[0], field.shape[1],
    #                                          dx, dy))
    # print("  time steps={}  image interval={}".format(timesteps,
    #                                                      image_interval))

    # Plot/save initial field
    write_field(field, 0)
    # Iterate
    t0 = time.time()
    iterate(field, field0, a, dx, dy, timesteps, image_interval)
    t1 = time.time()
    # Plot/save final field
    write_field(field, timesteps)

    print("Simulation finished in {0} s".format(t1-t0))
    
main()

Heat equation solver
Input file: bottle.dat
Simulation finished in 39.48003315925598 s


In [6]:
timeit_result = %timeit -n1 -r 3 -o main()

Heat equation solver
Input file: bottle.dat
Simulation finished in 38.31700301170349 s
Heat equation solver
Input file: bottle.dat
Simulation finished in 39.43882393836975 s
Heat equation solver
Input file: bottle.dat
Simulation finished in 38.06397533416748 s
38.9 s ± 570 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [7]:
original_time = timeit_result.average
timeit_dict['Original python'] = original_time
original_time

38.869277024999995

In [8]:
# it takes ~7X longer than the small
# bottle_medium.dat
timeit_result = %timeit -n1 -r 1 -o main(input_file='bottle_medium.dat')
run_time = timeit_result.average
print(run_time)
timeit_middle['Original python'] = run_time

Heat equation solver
Input file: bottle_medium.dat
Simulation finished in 263.2609541416168 s
4min 23s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
263.738006195


    Heat equation solver
    Input file: bottle_medium.dat
    Simulation finished in 251.13387298583984 s
    4min 11s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

In [9]:
# it takes ~31X longer than the small
# bottle_large.dat
timeit_result = %timeit -n1 -r 1 -o main(input_file='bottle_large.dat')
run_time = timeit_result.average
print(run_time)
timeit_large['Original python'] = run_time

Heat equation solver
Input file: bottle_large.dat
Simulation finished in 1065.3524651527405 s
17min 46s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
1066.699140252


    Heat equation solver
    Input file: bottle_large.dat
    Simulation finished in 1114.454592704773 s
    18min 35s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Numpy version

In [10]:
def evolve(u, u_previous, a, dt, dx2, dy2):
    """Explicit time evolution.
       u:            new temperature field
       u_previous:   previous field
       a:            diffusion constant
       dt:           time step
       dx2:          grid spacing squared, i.e. dx^2
       dy2:            -- "" --          , i.e. dy^2"""

    u[1:-1, 1:-1] = u_previous[1:-1, 1:-1] + a * dt * ( \
            (u_previous[2:, 1:-1] - 2*u_previous[1:-1, 1:-1] + \
             u_previous[:-2, 1:-1]) / dx2 + \
            (u_previous[1:-1, 2:] - 2*u_previous[1:-1, 1:-1] + \
                 u_previous[1:-1, :-2]) / dy2 )
    u_previous[:] = u[:]

def iterate(field, field0, a, dx, dy, timesteps, image_interval):
    """Run fixed number of time steps of heat equation"""

    dx2 = dx**2
    dy2 = dy**2

    # For stability, this is the largest interval possible
    # for the size of the time-step:
    dt = dx2*dy2 / ( 2*a*(dx2+dy2) )    

    for m in range(1, timesteps+1):
        evolve(field, field0, a, dt, dx2, dy2)
        if m % image_interval == 0:
            write_field(field, m)

def init_fields(filename):
    # Read the initial temperature field from file
    field = np.loadtxt(filename)
    field0 = field.copy() # Array for field of previous time step
    return field, field0

def write_field(field, step):
    plt.gca().clear()
    plt.imshow(field)
    plt.axis('off')
    plt.savefig('heat_{0:03d}.png'.format(step))

def main(input_file='bottle.dat', a=0.5, dx=0.1, dy=0.1, 
         timesteps=200, image_interval=4000):

    # Initialise the temperature field
    field, field0 = init_fields(input_file)

    print("Heat equation solver")
    # print("Diffusion constant: {}".format(a))
    # print("Input file: {}".format(input_file))
    # print("Parameters")
    # print("----------")
    # print("  nx={} ny={} dx={} dy={}".format(field.shape[0], field.shape[1],
    #                                          dx, dy))
    # print("  time steps={}  image interval={}".format(timesteps,
    #                                                      image_interval))

    # Plot/save initial field
    write_field(field, 0)
    # Iterate
    t0 = time.time()
    iterate(field, field0, a, dx, dy, timesteps, image_interval)
    t1 = time.time()
    # Plot/save final field
    write_field(field, timesteps)

    print("Simulation finished in {0} s".format(t1-t0))

main()

Heat equation solver
Simulation finished in 0.19930601119995117 s


In [11]:
timeit_result = %timeit -n1 -r 3 -o main()

Heat equation solver
Simulation finished in 0.18418383598327637 s
Heat equation solver
Simulation finished in 0.21879816055297852 s
Heat equation solver
Simulation finished in 0.1908581256866455 s
438 ms ± 26.5 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [12]:
np_time = timeit_result.average
timeit_dict['Numpy'] = np_time
print(np_time)

0.4381252816666195


In [13]:
# Original vs. numpy. any improvement?
print(original_time/np_time)

88.71726570341266


In [14]:
# bottle_medium.dat
timeit_result = %timeit -n1 -r 1 -o main(input_file='bottle_medium.dat')
run_time = timeit_result.average
timeit_middle['Numpy'] = run_time
print(timeit_middle)

Heat equation solver
Simulation finished in 1.8918349742889404 s
2.35 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
{'Original python': 263.738006195, 'Numpy': 2.351041131999864}


In [15]:
# bottle_large.dat
timeit_result = %timeit -n1 -r 1 -o main(input_file='bottle_large.dat')
run_time = timeit_result.average
timeit_large['Numpy'] = run_time
print(timeit_large)

Heat equation solver
Simulation finished in 8.23552393913269 s
9.45 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
{'Original python': 1066.699140252, 'Numpy': 9.445352796999941}


# Helper functions

In [16]:
def compare_time(current, reference, name):
    ratio = reference/current
    if ratio > 1:
        word = "faster"
    else:
        ratio = 1 / ratio 
        word = "slower"
        
    print("We are", "{0:.1f}".format(ratio), "times", word, "than the", name, "version.")

def print_report(main_function, **kwargs):
    timeit_result = %timeit -n1 -r 3 -o main_function(**kwargs)
    run_time = timeit_result.average
    timeit_dict['Optimized'] = run_time
    print('\nCurrent run_time: {:.10f} s'.format(run_time))
    compare_time(run_time, original_time, "original Python")
    compare_time(run_time, np_time, "NumPy")

# Python version compiled with Cython

In [17]:
%%cython -a
import time
import numpy as np
import matplotlib.pyplot as plt

def evolve(u, u_previous, a, dt, dx2, dy2):
    """Explicit time evolution.
       u:            new temperature field
       u_previous:   previous field
       a:            diffusion constant
       dt:           time step. """

    n, m = u.shape

    for i in range(1, n-1):
        for j in range(1, m-1):
            u[i, j] = u_previous[i, j] + a * dt * ( \
             (u_previous[i+1, j] - 2*u_previous[i, j] + \
              u_previous[i-1, j]) / dx2 + \
             (u_previous[i, j+1] - 2*u_previous[i, j] + \
                 u_previous[i, j-1]) / dy2 )
    u_previous[:] = u[:]

def iterate(field, field0, a, dx, dy, timesteps, image_interval):
    """Run fixed number of time steps of heat equation"""

    dx2 = dx**2
    dy2 = dy**2

    # For stability, this is the largest interval possible
    # for the size of the time-step:
    dt = dx2*dy2 / ( 2*a*(dx2+dy2) )    

    for m in range(1, timesteps+1):
        evolve(field, field0, a, dt, dx2, dy2)
        if m % image_interval == 0:
            write_field(field, m)

def init_fields(filename):
    # Read the initial temperature field from file
    field = np.loadtxt(filename)
    field0 = field.copy() # Array for field of previous time step
    return field, field0

def write_field(field, step):
    plt.gca().clear()
    plt.imshow(field)
    plt.axis('off')
    plt.savefig('heat_{0:03d}.png'.format(step))


def main(input_file='bottle.dat', a=0.5, dx=0.1, dy=0.1, 
         timesteps=200, image_interval=4000):

    # Initialise the temperature field
    field, field0 = init_fields(input_file)

    print("Heat equation solver")
    # print("Diffusion constant: {}".format(a))
    # print("Input file: {}".format(input_file))
    # print("Parameters")
    # print("----------")
    # print("  nx={} ny={} dx={} dy={}".format(field.shape[0], field.shape[1],
    #                                          dx, dy))
    # print("  time steps={}  image interval={}".format(timesteps,
    #                                                      image_interval))

    # Plot/save initial field
    write_field(field, 0)
    # Iterate
    t0 = time.time()
    iterate(field, field0, a, dx, dy, timesteps, image_interval)
    t1 = time.time()
    # Plot/save final field
    write_field(field, timesteps)

    print("Simulation finished in {0} s".format(t1-t0))

In [18]:
print_report(main)

Heat equation solver
Simulation finished in 33.07450795173645 s
Heat equation solver
Simulation finished in 33.14570498466492 s
Heat equation solver
Simulation finished in 32.741637229919434 s
33.2 s ± 186 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)

Current run_time: 33.2188343063 s
We are 1.2 times faster than the original Python version.
We are 75.8 times slower than the NumPy version.


In [19]:
timeit_dict['Compiled with cython'] = timeit_dict['Optimized']
print(timeit_dict)

{'Original python': 38.869277024999995, 'Numpy': 0.4381252816666195, 'Optimized': 33.21883430633344, 'Compiled with cython': 33.21883430633344}


# Adding types:

In [20]:
# int: n, m
# double: a, dt, dx2, dy2

In [21]:
%%cython
import time
import numpy as np
import matplotlib.pyplot as plt

def evolve(u, u_previous, double a, double dt, double dx2, double dy2):
    """Explicit time evolution.
       u:            new temperature field
       u_previous:   previous field
       a:            diffusion constant
       dt:           time step. """

    cdef int n = u.shape[0]
    cdef int m = u.shape[1]

    for i in range(1, n-1):
        for j in range(1, m-1):
            u[i, j] = u_previous[i, j] + a * dt * ( \
             (u_previous[i+1, j] - 2*u_previous[i, j] + \
              u_previous[i-1, j]) / dx2 + \
             (u_previous[i, j+1] - 2*u_previous[i, j] + \
                 u_previous[i, j-1]) / dy2 )
    u_previous[:] = u[:]

def iterate(field, field0, a, dx, dy, timesteps, image_interval):
    """Run fixed number of time steps of heat equation"""

    dx2 = dx**2
    dy2 = dy**2

    # For stability, this is the largest interval possible
    # for the size of the time-step:
    dt = dx2*dy2 / ( 2*a*(dx2+dy2) )    

    for m in range(1, timesteps+1):
        evolve(field, field0, a, dt, dx2, dy2)
        if m % image_interval == 0:
            write_field(field, m)

def init_fields(filename):
    # Read the initial temperature field from file
    field = np.loadtxt(filename)
    field0 = field.copy() # Array for field of previous time step
    return field, field0

def write_field(field, step):
    plt.gca().clear()
    plt.imshow(field)
    plt.axis('off')
    plt.savefig('heat_{0:03d}.png'.format(step))


def main(input_file='bottle.dat', a=0.5, dx=0.1, dy=0.1, 
         timesteps=200, image_interval=4000):

    # Initialise the temperature field
    field, field0 = init_fields(input_file)

    print("Heat equation solver")
    # print("Diffusion constant: {}".format(a))
    # print("Input file: {}".format(input_file))
    # print("Parameters")
    # print("----------")
    # print("  nx={} ny={} dx={} dy={}".format(field.shape[0], field.shape[1],
    #                                          dx, dy))
    # print("  time steps={}  image interval={}".format(timesteps,
    #                                                      image_interval))

    # Plot/save initial field
    write_field(field, 0)
    # Iterate
    t0 = time.time()
    iterate(field, field0, a, dx, dy, timesteps, image_interval)
    t1 = time.time()
    # Plot/save final field
    write_field(field, timesteps)

    print("Simulation finished in {0} s".format(t1-t0))

In [22]:
print_report(main)

Heat equation solver
Simulation finished in 35.175283908843994 s
Heat equation solver
Simulation finished in 34.132588148117065 s
Heat equation solver
Simulation finished in 35.01782298088074 s
35 s ± 474 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)

Current run_time: 35.0158728800 s
We are 1.1 times faster than the original Python version.
We are 79.9 times slower than the NumPy version.


In [23]:
timeit_dict['Optimized add types'] = timeit_dict['Optimized']
print(timeit_dict)

{'Original python': 38.869277024999995, 'Numpy': 0.4381252816666195, 'Optimized': 35.01587287999996, 'Compiled with cython': 33.21883430633344, 'Optimized add types': 35.01587287999996}


# C-function, array indexing

In [24]:
# double[:,:] u, u_previous
# c-function

In [25]:
%%cython
import time
import numpy as np
import matplotlib.pyplot as plt

cdef evolve(double[:,:] u, double[:,:] u_previous, 
            double a, double dt, double dx2, double dy2):
    """Explicit time evolution.
       u:            new temperature field
       u_previous:   previous field
       a:            diffusion constant
       dt:           time step. """

    cdef int n = u.shape[0]
    cdef int m = u.shape[1]
    
    for i in range(1, n-1):
        for j in range(1, m-1):
            u[i, j] = u_previous[i, j] + a * dt * ( \
             (u_previous[i+1, j] - 2*u_previous[i, j] + \
              u_previous[i-1, j]) / dx2 + \
             (u_previous[i, j+1] - 2*u_previous[i, j] + \
                 u_previous[i, j-1]) / dy2 )
    u_previous[:] = u[:]

def iterate(field, field0, a, dx, dy, timesteps, image_interval):
    """Run fixed number of time steps of heat equation"""

    dx2 = dx**2
    dy2 = dy**2

    # For stability, this is the largest interval possible
    # for the size of the time-step:
    dt = dx2*dy2 / ( 2*a*(dx2+dy2) )    

    for m in range(1, timesteps+1):
        evolve(field, field0, a, dt, dx2, dy2)
        if m % image_interval == 0:
            write_field(field, m)

def init_fields(filename):
    # Read the initial temperature field from file
    field = np.loadtxt(filename)
    field0 = field.copy() # Array for field of previous time step
    return field, field0

def write_field(field, step):
    plt.gca().clear()
    plt.imshow(field)
    plt.axis('off')
    plt.savefig('heat_{0:03d}.png'.format(step))


def main(input_file='bottle.dat', a=0.5, dx=0.1, dy=0.1, 
         timesteps=200, image_interval=4000):

    # Initialise the temperature field
    field, field0 = init_fields(input_file)

    print("Heat equation solver")
    # print("Diffusion constant: {}".format(a))
    # print("Input file: {}".format(input_file))
    # print("Parameters")
    # print("----------")
    # print("  nx={} ny={} dx={} dy={}".format(field.shape[0], field.shape[1],
    #                                          dx, dy))
    # print("  time steps={}  image interval={}".format(timesteps,
    #                                                      image_interval))

    # Plot/save initial field
    write_field(field, 0)
    # Iterate
    t0 = time.time()
    iterate(field, field0, a, dx, dy, timesteps, image_interval)
    t1 = time.time()
    # Plot/save final field
    write_field(field, timesteps)

    print("Simulation finished in {0} s".format(t1-t0))

In [26]:
print_report(main)

Heat equation solver
Simulation finished in 0.7551350593566895 s
Heat equation solver
Simulation finished in 0.7363908290863037 s
Heat equation solver
Simulation finished in 0.7344369888305664 s
989 ms ± 17.9 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)

Current run_time: 0.9886551460 s
We are 39.3 times faster than the original Python version.
We are 2.3 times slower than the NumPy version.


In [27]:
timeit_dict['Optimized array indexing'] = timeit_dict['Optimized']
print(timeit_dict)

{'Original python': 38.869277024999995, 'Numpy': 0.4381252816666195, 'Optimized': 0.988655146000004, 'Compiled with cython': 33.21883430633344, 'Optimized add types': 35.01587287999996, 'Optimized array indexing': 0.988655146000004}


# Declaring the arrays as contiguous + decorators

In [28]:
%%cython
import time
import numpy as np
import matplotlib.pyplot as plt
import cython

@cython.boundscheck(False)
@cython.wraparound(False)
cdef evolve(double[:,::1] u, double[:,::1] u_previous, 
            double a, double dt, double dx2, double dy2):
    """Explicit time evolution.
       u:            new temperature field
       u_previous:   previous field
       a:            diffusion constant
       dt:           time step. """

    cdef Py_ssize_t n = u.shape[0]
    cdef Py_ssize_t m = u.shape[1]
    
    cdef Py_ssize_t i,j
    
    for i in range(1, n-1):
        for j in range(1, m-1):
            u[i, j] = u_previous[i, j] + a * dt * ( \
             (u_previous[i+1, j] - 2*u_previous[i, j] + \
              u_previous[i-1, j]) / dx2 + \
             (u_previous[i, j+1] - 2*u_previous[i, j] + \
                 u_previous[i, j-1]) / dy2 )
    u_previous[:] = u[:]

def iterate(field, field0, a, dx, dy, timesteps, image_interval):
    """Run fixed number of time steps of heat equation"""

    dx2 = dx**2
    dy2 = dy**2

    # For stability, this is the largest interval possible
    # for the size of the time-step:
    dt = dx2*dy2 / ( 2*a*(dx2+dy2) )    

    for m in range(1, timesteps+1):
        evolve(field, field0, a, dt, dx2, dy2)
        if m % image_interval == 0:
            write_field(field, m)

def init_fields(filename):
    # Read the initial temperature field from file
    field = np.loadtxt(filename)
    field0 = field.copy() # Array for field of previous time step
    return field, field0

def write_field(field, step):
    plt.gca().clear()
    plt.imshow(field)
    plt.axis('off')
    plt.savefig('heat_{0:03d}.png'.format(step))


def main(input_file='bottle.dat', a=0.5, dx=0.1, dy=0.1, 
         timesteps=200, image_interval=4000):

    # Initialise the temperature field
    field, field0 = init_fields(input_file)

    print("Heat equation solver")
    # print("Diffusion constant: {}".format(a))
    # print("Input file: {}".format(input_file))
    # print("Parameters")
    # print("----------")
    # print("  nx={} ny={} dx={} dy={}".format(field.shape[0], field.shape[1],
    #                                          dx, dy))
    # print("  time steps={}  image interval={}".format(timesteps,
    #                                                      image_interval))

    # Plot/save initial field
    write_field(field, 0)
    # Iterate
    t0 = time.time()
    iterate(field, field0, a, dx, dy, timesteps, image_interval)
    t1 = time.time()
    # Plot/save final field
    write_field(field, timesteps)

    print("Simulation finished in {0} s".format(t1-t0))

In [29]:
print_report(main)

Heat equation solver
Simulation finished in 0.04358386993408203 s
Heat equation solver
Simulation finished in 0.04497098922729492 s
Heat equation solver
Simulation finished in 0.0453639030456543 s
295 ms ± 10.4 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)

Current run_time: 0.2954735793 s
We are 131.5 times faster than the original Python version.
We are 1.5 times faster than the NumPy version.


In [30]:
timeit_dict['Optimized contiguous'] = timeit_dict['Optimized']
print(timeit_dict)

{'Original python': 38.869277024999995, 'Numpy': 0.4381252816666195, 'Optimized': 0.29547357933324747, 'Compiled with cython': 33.21883430633344, 'Optimized add types': 35.01587287999996, 'Optimized array indexing': 0.988655146000004, 'Optimized contiguous': 0.29547357933324747}


In [31]:
# medium file
print_report(main, input_file='bottle_medium.dat')
timeit_middle['Optimized contiguous'] = timeit_dict['Optimized']

Heat equation solver
Simulation finished in 0.2978031635284424 s
Heat equation solver
Simulation finished in 0.32636594772338867 s
Heat equation solver
Simulation finished in 0.3412461280822754 s
780 ms ± 32.3 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)

Current run_time: 0.7799825003 s
We are 49.8 times faster than the original Python version.
We are 1.8 times slower than the NumPy version.


In [32]:
# large file
print_report(main, input_file='bottle_large.dat')
timeit_large['Optimized contiguous'] = timeit_dict['Optimized']

Heat equation solver
Simulation finished in 1.2217137813568115 s
Heat equation solver
Simulation finished in 1.2256028652191162 s
Heat equation solver
Simulation finished in 1.2471942901611328 s
2.48 s ± 31.1 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)

Current run_time: 2.4811165210 s
We are 15.7 times faster than the original Python version.
We are 5.7 times slower than the NumPy version.


# Instructor solution

In [33]:
%%cython
import time
import numpy as np
import matplotlib.pyplot as plt
import cython
cimport numpy as cnp



@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.profile(True)
cdef evolve(cnp.ndarray[cnp.double_t, ndim=2] u, 
            cnp.ndarray[cnp.double_t, ndim=2] u_previous,
            double a, double dt, double dx2, double dy2):
    """Explicit time evolution.
       u:            new temperature field
       u_previous:   previous field
       a:            diffusion constant
       dt:           time step. """

    cdef int n = u.shape[0]
    cdef int m = u.shape[1]

    cdef int i,j

    # Multiplication is more efficient than division
    cdef double dx2inv = 1. / dx2
    cdef double dy2inv = 1. / dy2

    for i in range(1, n-1):
        for j in range(1, m-1):
            u[i, j] = u_previous[i, j] + a * dt * ( \
             (u_previous[i+1, j] - 2*u_previous[i, j] + \
              u_previous[i-1, j]) * dx2inv + \
             (u_previous[i, j+1] - 2*u_previous[i, j] + \
                 u_previous[i, j-1]) * dy2inv )
    u_previous[:] = u[:]

def iterate(field, field0, a, dx, dy, timesteps, image_interval):
    """Run fixed number of time steps of heat equation"""

    dx2 = dx**2
    dy2 = dy**2

    # For stability, this is the largest interval possible
    # for the size of the time-step:
    dt = dx2*dy2 / ( 2*a*(dx2+dy2) )    

    for m in range(1, timesteps+1):
        evolve(field, field0, a, dt, dx2, dy2)
        if m % image_interval == 0:
            write_field(field, m)

def init_fields(filename):
    # Read the initial temperature field from file
    field = np.loadtxt(filename)
    field0 = field.copy() # Array for field of previous time step
    return field, field0

def write_field(field, step):
    plt.gca().clear()
    plt.imshow(field)
    plt.axis('off')
    plt.savefig('heat_{0:03d}.png'.format(step))

def main(input_file='bottle.dat', a=0.5, dx=0.1, dy=0.1, 
         timesteps=200, image_interval=4000):

    # Initialise the temperature field
    field, field0 = init_fields(input_file)

    print("Heat equation solver")
    # print("Diffusion constant: {}".format(a))
    # print("Input file: {}".format(input_file))
    # print("Parameters")
    # print("----------")
    # print("  nx={} ny={} dx={} dy={}".format(field.shape[0], field.shape[1],
    #                                          dx, dy))
    # print("  time steps={}  image interval={}".format(timesteps,
    #                                                      image_interval))

    # Plot/save initial field
    write_field(field, 0)
    # Iterate
    t0 = time.time()
    iterate(field, field0, a, dx, dy, timesteps, image_interval)
    t1 = time.time()
    # Plot/save final field
    write_field(field, timesteps)

    print("Simulation finished in {0} s".format(t1-t0))
    


In [34]:
print_report(main)
timeit_dict['Instructor solution'] = timeit_dict['Optimized']

Heat equation solver
Simulation finished in 0.07579302787780762 s
Heat equation solver
Simulation finished in 0.04141497611999512 s
Heat equation solver
Simulation finished in 0.06228780746459961 s
301 ms ± 26.1 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)

Current run_time: 0.3009282877 s
We are 129.2 times faster than the original Python version.
We are 1.5 times faster than the NumPy version.


In [35]:
print_report(main, input_file='bottle_medium.dat')
timeit_middle['Instructor solution'] = timeit_dict['Optimized']

Heat equation solver
Simulation finished in 0.40575504302978516 s
Heat equation solver
Simulation finished in 0.44788384437561035 s
Heat equation solver
Simulation finished in 0.4167819023132324 s
894 ms ± 13.9 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)

Current run_time: 0.8936017167 s
We are 43.5 times faster than the original Python version.
We are 2.0 times slower than the NumPy version.


In [36]:
print_report(main, input_file='bottle_large.dat')
timeit_large['Instructor solution'] = timeit_dict['Optimized']

Heat equation solver
Simulation finished in 1.424349069595337 s
Heat equation solver
Simulation finished in 1.4150662422180176 s
Heat equation solver
Simulation finished in 1.524371862411499 s
2.66 s ± 39.3 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)

Current run_time: 2.6602774967 s
We are 14.6 times faster than the original Python version.
We are 6.1 times slower than the NumPy version.


# Display all the execution times

In [37]:
def display_results(x: dict()):
    v0 = list(x.values())[0]
    for k, v in x.items():
        msg = '{:<25s}: {:.10f}'.format(k,v)
        if k != 'Original python':
            msg += ' ({:.1f}X)'.format(v0/v)
        print(msg)

In [38]:
#del timeit_dict['Optimized']
display_results(timeit_dict)

Original python          : 38.8692770250
Numpy                    : 0.4381252817 (88.7X)
Optimized                : 2.6602774967 (14.6X)
Compiled with cython     : 33.2188343063 (1.2X)
Optimized add types      : 35.0158728800 (1.1X)
Optimized array indexing : 0.9886551460 (39.3X)
Optimized contiguous     : 0.2954735793 (131.5X)
Instructor solution      : 0.3009282877 (129.2X)


In [39]:
display_results(timeit_middle)

Original python          : 263.7380061950
Numpy                    : 2.3510411320 (112.2X)
Optimized contiguous     : 0.7799825003 (338.1X)
Instructor solution      : 0.8936017167 (295.1X)


In [40]:
display_results(timeit_large)

Original python          : 1066.6991402520
Numpy                    : 9.4453527970 (112.9X)
Optimized contiguous     : 2.4811165210 (429.9X)
Instructor solution      : 2.6602774967 (401.0X)
