# Lecture 21 : Introduction to MPI (Message Passing Interface)

# Part 1 : MPI Hello World

In [7]:
%%writefile mpi_world.c
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

int main(int argc, char** argv) {

    MPI_Init (&argc, &argv);

    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);
    MPI_Comm_size(MPI_COMM_WORLD,&size);

    printf ("Hello World! from MPI rank %d, number of ranks = %d\n",rank,size);

    MPI_Finalize();
}

Overwriting mpi_world.c


## Typing the following on matrix:    
    mpicc -o mpi_world mpi_world.c
    mpiexec -n 8 ./mpi_world

## Produces the output:

    Hello World! from MPI rank 1, number of ranks = 8
    Hello World! from MPI rank 5, number of ranks = 8
    Hello World! from MPI rank 0, number of ranks = 8
    Hello World! from MPI rank 2, number of ranks = 8
    Hello World! from MPI rank 3, number of ranks = 8
    Hello World! from MPI rank 4, number of ranks = 8
    Hello World! from MPI rank 6, number of ranks = 8
    Hello World! from MPI rank 7, number of ranks = 8

# Part 2 : MPI Sum

## In our first version, each MPI rank separately computes the full sum and prints the results.

In [10]:
%%writefile mpi_sum_v1.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>

int main(int argc, char** argv) {

    MPI_Init (&argc, &argv);

    // MPI_COMM_WORLD is the default communicator that contains all ranks
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // get N from command line
    if (argc < 2) {
        printf ("Command usage : %s %s\n",argv[0],"N");
        return 1;
    }
    long long N = atoll(argv[1]);

    // start the timer
    double start_time, end_time;
    start_time = MPI_Wtime();

    // calculate the sum
    long long sum = 0;
    for (long long i = 1; i <= N;i++) {
        sum += i;
    }

    // stop the timer
    end_time = MPI_Wtime();

    // print results
    printf ("rank %d (of %d) sum = %lld, N*(N+1)/2 = %lld, elapsed time = %.4f seconds\n",
            rank,size,sum,(N/2)*(N+1),end_time-start_time);

    MPI_Finalize();
}

Writing mpi_sum_v1.c


## Typing the following on matrix:    
    mpicc -o mpi_sum_v1 mpi_sum_v1.c
    mpiexec -n 2 ./mpi_sum_v1 1000000

## Produces the output:
    rank 1 (of 2) sum = 500000500000, N*(N+1)/2 = 500000500000, elapsed time = 0.0027 seconds
    rank 0 (of 2) sum = 500000500000, N*(N+1)/2 = 500000500000, elapsed time = 0.0031 seconds

## In our second version we have each MPI rank compute only part of the sum.

In [None]:
%%writefile mpi_sum_v2.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>

int main(int argc, char **argv) {

    MPI_Init (&argc, &argv);

    // MPI_COMM_WORLD is the default communicator that contains all ranks
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // get N from command line
    if (argc < 2) {
        printf ("Command usage : %s %s\n",argv[0],"N");
        return 1;
    }
    long long N = atoll(argv[1]);

    // start the timer
    double start_time, end_time;
    start_time = MPI_Wtime();

    // calculate the sum
    long long sum = 0;
    for (long long i = 1+rank; i <= N;i+=size) {
        sum += i;
    }

    // stop the timer
    end_time = MPI_Wtime();

    // print results
    printf ("rank %d (of %d) sum = %lld, N*(N+1)/2 = %lld, elapsed time = %.4f seconds\n",
            rank,size,sum,(N/2)*(N+1),end_time-start_time);

    MPI_Finalize();

}

## Typing the following on matrix:    
    mpicc -o mpi_sum_v2 mpi_sum_v2.c
    mpiexec -n 2 ./mpi_sum_v2 1000000

## Produces the output:
    rank 0 (of 2) sum = 250000000000, N*(N+1)/2 = 500000500000, elapsed time = 0.0008 seconds
    rank 1 (of 2) sum = 250000500000, N*(N+1)/2 = 500000500000, elapsed time = 0.0013 seconds

## The big difference between MPI and OpenMP/CUDA is that in MPI every variable is private and there is no way to create a shared variable!

## This is because different MPI ranks can execute on completely different computers (perhaps not even in the same city, state, or country).

## Thus, we cannot assume that our MPI ranks will have to access to some common memory that can be shared.  

## Since ranks cannot communicate using shared memory, they must communicate using **message passing**.

## In our third version, we have each nonzero rank send its partial sum to rank 0.  Rank 0 receives all of the partial sums and computes the final answer!

## Note that in this version only rank 0 has the correct answer.

In [11]:
%%writefile mpi_sum_v3.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>

int main(int argc, char** argv) {

    MPI_Init (&argc, &argv);

    // MPI_COMM_WORLD is the default communicator that contains all ranks
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // get N from command line
    if (argc < 2) {
        printf ("Command usage : %s %s\n",argv[0],"N");
        return 1;
    }
    long long N = atoll(argv[1]);

    // start the timer
    double start_time, end_time;
    start_time = MPI_Wtime();

    // calculate the sum
    long long sum = 0;
    for (long long i = 1+rank; i <= N;i+=size) {
        sum += i;
    }

    // all nonzero ranks send their partial sums to rank 0
    if (rank == 0) {
	    long long rank_sum;
	    MPI_Status status;
	    for (int source=1;source<size;source++) {
	        MPI_Recv(&rank_sum,1,MPI_UNSIGNED_LONG_LONG,source,0,MPI_COMM_WORLD,&status);
#ifdef DIAG
	        printf ("rank 0 received message %lld from rank %d\n",rank_sum,source);
#endif
            sum += rank_sum;
	    }
    } else {
	    int dest = 0;
	    MPI_Send(&sum,1,MPI_LONG_LONG,dest,0,MPI_COMM_WORLD);
    }

    // stop the timer
    end_time = MPI_Wtime();

    // print results
    printf ("rank %d (of %d) sum = %lld, N*(N+1)/2 = %lld, elapsed time = %.4f seconds\n",
            rank,size,sum,(N/2)*(N+1),end_time-start_time);

    MPI_Finalize();
}

Overwriting mpi_sum_v3.c


## Typing the following on matrix:    
    mpicc -DDIAG -o mpi_sum_v3 mpi_sum_v3.c
    mpiexec -n 2 ./mpi_sum_v3 1000000

## Produces the output:
    rank 0 received message 250000500000 from rank 1
    rank 0 (of 2) sum = 500000500000, N*(N+1)/2 = 500000500000, elapsed time = 0.0013 seconds
    rank 1 (of 2) sum = 250000500000, N*(N+1)/2 = 500000500000, elapsed time = 0.0013 seconds

## For version 4, we have only rank 0 print the final result.

In [12]:
%%writefile mpi_sum_v4.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>

int main(int argc, char** argv) {

    MPI_Init (&argc, &argv);

    // MPI_COMM_WORLD is the default communicator that contains all ranks
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // get N from command line
    if (argc < 2) {
        printf ("Command usage : %s %s\n",argv[0],"N");
        return 1;
    }
    long long N = atoll(argv[1]);

    // start the timer
    double start_time, end_time;
    start_time = MPI_Wtime();

    // calculate the sum
    long long sum = 0;
    for (long long i = 1+rank; i <= N;i+=size) {
        sum += i;
    }

    // all nonzero ranks send their partial sums to rank 0
    if (rank == 0) {
	    long long rank_sum;
	    MPI_Status status;
	    for (int source=1;source<size;source++) {
	        MPI_Recv(&rank_sum,1,MPI_UNSIGNED_LONG_LONG,source,0,MPI_COMM_WORLD,&status);
#ifdef DIAG
	        printf ("rank 0 received message %lld from rank %d\n",rank_sum,source);
#endif
            sum += rank_sum;
	    }
    } else {
	    int dest = 0;
	    MPI_Send(&sum,1,MPI_LONG_LONG,dest,0,MPI_COMM_WORLD);
    }

    // stop the timer
    end_time = MPI_Wtime();

    // only rank 0 prints results
    if (rank == 0) {
        printf ("sum = %lld, N*(N+1)/2 = %lld, elapsed time = %.4f seconds\n",
                sum,(N/2)*(N+1),end_time-start_time);
    }

    MPI_Finalize();
}

Writing mpi_sum_v4.c


## Typing the following on matrix:    
    mpicc -o mpi_sum_v4 mpi_sum_v4.c
    mpiexec -n 1 ./mpi_sum_v4 1000000000
    mpiexec -n 2 ./mpi_sum_v4 1000000000
    mpiexec -n 4 ./mpi_sum_v4 1000000000

## Produces the output:
    (base) jasonwil@matrix:~/cmda3634/L21_test$ mpicc -o mpi_sum_v4 mpi_sum_v4.c
    (base) jasonwil@matrix:~/cmda3634/L21_test$ mpiexec -n 1 ./mpi_sum_v4 1000000000
    sum = 500000000500000000, N*(N+1)/2 = 500000000500000000, elapsed time = 1.9880 seconds
    (base) jasonwil@matrix:~/cmda3634/L21_test$ mpiexec -n 2 ./mpi_sum_v4 1000000000
    sum = 500000000500000000, N*(N+1)/2 = 500000000500000000, elapsed time = 0.9907 seconds
    (base) jasonwil@matrix:~/cmda3634/L21_test$ mpiexec -n 4 ./mpi_sum_v4 1000000000
    sum = 500000000500000000, N*(N+1)/2 = 500000000500000000, elapsed time = 0.5256 seconds

## In our final version 5 we have rank 0 send each of the other ranks the final answer.  

In [None]:
%%writefile mpi_sum_v5.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>

int main(int argc, char** argv) {

    MPI_Init (&argc, &argv);

    // MPI_COMM_WORLD is the default communicator that contains all ranks
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // get N from command line
    if (argc < 2) {
        printf ("Command usage : %s %s\n",argv[0],"N");
        return 1;
    }
    long long N = atoll(argv[1]);

    // start the timer
    double start_time, end_time;
    start_time = MPI_Wtime();

    // calculate the sum
    long long sum = 0;
    for (long long i = 1+rank; i <= N;i+=size) {
        sum += i;
    }

    // all nonzero ranks send their partial sums to rank 0
    if (rank == 0) {
	    long long rank_sum;
	    MPI_Status status;
	    for (int source=1;source<size;source++) {
	        MPI_Recv(&rank_sum,1,MPI_UNSIGNED_LONG_LONG,source,0,MPI_COMM_WORLD,&status);
#ifdef DIAG
	        printf ("rank 0 received message %lld from rank %d\n",rank_sum,source);
#endif
            sum += rank_sum;
	    }
    } else {
	    int dest = 0;
	    MPI_Send(&sum,1,MPI_LONG_LONG,dest,0,MPI_COMM_WORLD);
    }

    // all nonzero ranks receive the final sum from rank 0
    if (rank == 0) {
        for (int dest = 1;dest < size;dest++) {
            MPI_Send(&sum,1,MPI_LONG_LONG,dest,0,MPI_COMM_WORLD);
        }
    } else {
        int src = 0;
        MPI_Status status;
        MPI_Recv(&sum,1,MPI_LONG_LONG,src,0,MPI_COMM_WORLD,&status);
    }

    // stop the timer
    end_time = MPI_Wtime();

    // print results
    printf ("rank %d (of %d) sum = %lld, N*(N+1)/2 = %lld, elapsed time = %.4f seconds\n",
            rank,size,sum,(N/2)*(N+1),end_time-start_time);

    MPI_Finalize();
}

## Typing the following on matrix:    
    mpicc -o mpi_sum_v5 mpi_sum_v5.c
    mpiexec -n 4 ./mpi_sum_v5 1000000000

## Produces the output:
    rank 0 (of 4) sum = 500000000500000000, N*(N+1)/2 = 500000000500000000, elapsed time = 0.5265 seconds
    rank 1 (of 4) sum = 500000000500000000, N*(N+1)/2 = 500000000500000000, elapsed time = 0.5266 seconds
    rank 2 (of 4) sum = 500000000500000000, N*(N+1)/2 = 500000000500000000, elapsed time = 0.5265 seconds
    rank 3 (of 4) sum = 500000000500000000, N*(N+1)/2 = 500000000500000000, elapsed time = 0.5266 seconds