# Generate Mandelbrot set

---
**Requirements:**

- [Get started](./Get_started.ipynb)
- [Data management](./Data_management.ipynb)
- [Multi GPU](./MultiGPU.ipynb)

---

## Introduction

The Mandelbrot set is the set of complex numbers c for which the function
\begin{equation}
    f_c(z) = z^2+c
\end{equation}
does not diverge when iterated from z = 0. [Wikipedia](https://en.wikipedia.org/wiki/Mandelbrot_set)

<img align="center" alt="Mandelbrot set" src="../../pictures/mandelbrot.jpg" style="float:none" width="500"/>

By Created by <a href="//commons.wikimedia.org/wiki/User:Wolfgangbeyer" title="User:Wolfgangbeyer">Wolfgang Beyer</a> with the program Ultra Fractal 3. - <span class="int-own-work" lang="en">Own work</span>, <a href="http://creativecommons.org/licenses/by-sa/3.0/" title="Creative Commons Attribution-Share Alike 3.0">CC BY-SA 3.0</a>, <a href="https://commons.wikimedia.org/w/index.php?curid=321973">Link</a>

In this hands-on you will generate a picture with the Mandelbrot set using a Multi-GPU version of the code.
We use the MPI language to split the work between the GPUs.

## What to do

Add the directives to use several GPUs. Here we do __not__ need the GPUs to communicate.
Be careful to allocate the memory only for the part of the picture treated by the GPU and not the complete memory.

You can have a look at the file [init_openacc.h](../../examples/init_openacc.h). It gives the details to associate a rank with a GPU.

The default coordinates show the well known representation of the set.
If you want to play around have a look at [this webpage](http://paulbourke.net/fractals/mandelbrot/) giving interesting areas of the set on which you can "zoom".

We have a bug for MPI in the notebooks and you need to save the file before running the next cell.
It is a good way to pratice manual building!
Please add the correct extension for the language you are running.

Example stored in: `../../examples/C/mandelbrot_mpi_exercise.c`

In [None]:
%%idrrun -a -opts "-cpp -DMULTIGPU" -cliopts "2000 1000"
// you can use ` --option "-DMULTIGPU" `  to print the device info after filling the openacc initialisation
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#ifdef _OPENACC
  #include <openacc.h>
#endif
#include <complex.h>
#include <mpi.h>
// add openacc initialisation
void output(unsigned char* picture, unsigned int start, unsigned int num_elements)
{
   MPI_File     fh;
   MPI_Offset   woffset=start;

   if (MPI_File_open(MPI_COMM_WORLD,"mandel.gray",MPI_MODE_WRONLY+MPI_MODE_CREATE,MPI_INFO_NULL,&fh) != MPI_SUCCESS)
   {
        fprintf(stderr,"ERROR in creating output file\n");
        MPI_Abort(MPI_COMM_WORLD,1);
   }

   MPI_File_write_at(fh,woffset,picture,num_elements,MPI_UNSIGNED_CHAR,MPI_STATUS_IGNORE);

   MPI_File_close(&fh);
} 

#pragma acc routine seq
unsigned char mandelbrot_iterations(const float complex c)
{
    unsigned char max_iter = 255;
    unsigned char n = 0;
    float complex z = 0.0 + 0.0 * I;
    while (abs(z*z) <= 2 && n < max_iter)
    {
        z = z*z + c;
        ++n;
    }
    return n;
}
int main(int argc, char** argv)
{
    #ifdef _OPENACC
    // add initilisation openacc
    #endif   
    MPI_Init(&argc, &argv);
    unsigned int width = (unsigned int) atoi(argv[1]);
    float step_w = 1./width;
    unsigned int height = (unsigned int) atoi(argv[2]);
    float step_h = 1./height;

    const float min_re = -2.;
    const float max_re = 1.;
    const float min_im = -1.;
    const float max_im = 1.;

    struct timespec end, start;
    clock_gettime(CLOCK_MONOTONIC_RAW, &start);

    int i;
    int rank;
    int nb_procs;
    
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nb_procs);

    unsigned int local_height = height / nb_procs;
    unsigned int first = 0;
    unsigned int last = local_height;
    unsigned int rest_eucli = height % nb_procs;

    if ((rank==0) && (rank < rest_eucli))
          ++last;

    for (i=1; i <= rank; ++i)
    {
      first += local_height;
      last  += local_height;
      if (rank < rest_eucli)
          {
              ++first;
              ++last;
          }
    }

    if (rank < rest_eucli) 
        ++local_height;

    unsigned int num_elements = width*local_height;
    if (rank == 0) printf("Using MPI\n");
    #if defined(_OPENACC) && defined(MULTIGPU)
    printf("I am rank %2d and my range is [%5d, %5d[ ie %10d elements. I use GPU %d over %d devices.\n", rank, first, last, num_elements,info.current_device, info.total_devices);
    #else
    printf("I am rank %2d and my range is [%5d, %5d[ ie %10d elements.", rank, first, last, num_elements);
    #endif

    unsigned char* restrict picture = (unsigned char*) malloc(num_elements*sizeof(unsigned char));

#pragma acc data copyout(picture[0:num_elements])
{
#pragma acc parallel loop
    for (unsigned int i=0; i<local_height; ++i)
        for (unsigned int j=0; j<width; ++j)
        {
            float complex c;
            c = min_re + j*step_w * (max_re - min_re) + \
                I * (min_im +  ((i+first) * step_h) * (max_im - min_im));
            picture[width*i+j] = (unsigned char)255 - mandelbrot_iterations(c);
        }
}
    output(picture, first*width, num_elements); 
    MPI_Finalize();

    // Measure time
    clock_gettime(CLOCK_MONOTONIC_RAW, &end);
    unsigned long int delta_us = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_nsec - start.tv_nsec) / 1000;
    printf("The time to generate the mandelbrot picture was %lu us\n", delta_us);
    return EXIT_SUCCESS;
}


In [None]:
from idrcomp import show_gray
show_gray("mandel.gray", 2000, 1000)

## Solution

Example stored in: `../../examples/C/mandelbrot_mpi_solution.c`

In [None]:
%%idrrun -a -opts "-cpp -DMULTIGPU" -cliopts "2000 1000"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#ifdef _OPENACC
  #include <openacc.h>
#endif
#include <complex.h>
#include <mpi.h>
#include "../../examples/C/init_openacc.h"
void output(unsigned char* picture, unsigned int start, unsigned int num_elements)
{
   MPI_File     fh;
   MPI_Offset   woffset=start;

   if (MPI_File_open(MPI_COMM_WORLD,"mandel.gray",MPI_MODE_WRONLY+MPI_MODE_CREATE,MPI_INFO_NULL,&fh) != MPI_SUCCESS)
   {
        fprintf(stderr,"ERROR in creating output file\n");
        MPI_Abort(MPI_COMM_WORLD,1);
   }

   MPI_File_write_at(fh,woffset,picture,num_elements,MPI_UNSIGNED_CHAR,MPI_STATUS_IGNORE);

   MPI_File_close(&fh);
} 

#pragma acc routine seq
unsigned char mandelbrot_iterations(const float complex c)
{
    unsigned char max_iter = 255;
    unsigned char n = 0;
    float complex z = 0.0 + 0.0 * I;
    while (abs(z*z) <= 2 && n < max_iter)
    {
        z = z*z + c;
        ++n;
    }
    return n;
}
int main(int argc, char** argv)
{
    #ifdef _OPENACC
    acc_info info = initialisation_openacc();
    #endif   
    MPI_Init(&argc, &argv);
    // Dimension of the world in pixels.
    int width, height;
    if (argc >= 3)
    {
        unsigned int width = (unsigned int) atoi(argv[1]);
        unsigned int height = (unsigned int) atoi(argv[2]);
    }
    else
    {
        width = 4000;
        height = 4000;
    }
    
    float step_w = 1./width;
    float step_h = 1./height;

    const float min_re = -2.;
    const float max_re = 1.;
    const float min_im = -1.;
    const float max_im = 1.;

    struct timespec end, start;
    clock_gettime(CLOCK_MONOTONIC_RAW, &start);

    int i;
    int rank;
    int nb_procs;
    
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nb_procs);

    unsigned int local_height = height / nb_procs;
    unsigned int first = 0;
    unsigned int last = local_height;
    unsigned int rest_eucli = height % nb_procs;

    if ((rank==0) && (rank < rest_eucli))
          ++last;

    for (i=1; i <= rank; ++i)
    {
      first += local_height;
      last  += local_height;
      if (rank < rest_eucli)
          {
              ++first;
              ++last;
          }
    }

    if (rank < rest_eucli) 
        ++local_height;

    unsigned int num_elements = width*local_height;
    if (rank == 0) printf("Using MPI\n");
    #ifdef _OPENACC
    printf("I am rank %2d and my range is [%5d, %5d[ ie %10d elements. I use GPU %d over %d devices.\n", rank, first, last, num_elements,info.current_device, info.total_devices);
    #else
    printf("I am rank %2d and my range is [%5d, %5d[ ie %10d elements.", rank, first, last, num_elements);
    #endif

    unsigned char* restrict picture = (unsigned char*) malloc(num_elements*sizeof(unsigned char));

#pragma acc data copyout(picture[0:num_elements])
{
#pragma acc parallel loop
    for (unsigned int i=0; i<local_height; ++i)
        for (unsigned int j=0; j<width; ++j)
        {
            float complex c;
            c = min_re + j*step_w * (max_re - min_re) + \
                I * (min_im +  ((i+first) * step_h) * (max_im - min_im));
            picture[width*i+j] = (unsigned char)255 - mandelbrot_iterations(c);
        }
}
    output(picture, first*width, num_elements); 
    MPI_Finalize();

    // Measure time
    clock_gettime(CLOCK_MONOTONIC_RAW, &end);
    unsigned long int delta_us = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_nsec - start.tv_nsec) / 1000;
    printf("The time to generate the mandelbrot picture was %lu us\n", delta_us);
    return EXIT_SUCCESS;
}

In [None]:
from idrcomp import show_gray
show_gray("mandel.gray", 2000, 1000)