# Lecture 23 : MPI Extreme

# Part 1 : Finding the Exact Extreme Pair

## For version 1 we start by adding the MPI_Init, MPI_Finalize, and some timers.

## Also note that we read the dataset using a given filename rather than using a pipe (pipes do not work when using MPI).

## Here is code for opening a file given a file name and reading the len and dim parameters.

    // open the text file for reading
    FILE* fptr;
    fptr = fopen(filename,"r");

    // need to check for null
    if (fptr == 0) {
        printf ("Error opening data file %s.\n",filename);
        exit(1);
    }

    // read the number of points and the dimension of each point
    int len, dim;
    if (fscanf(fptr,"%d %d",&len, &dim) != 2) {
        printf ("error reading the number of points and the dimension\n");
        return 1;
    }


## Here is a function that reads a dataset from a file pointer (in vec.c)

    // read len vectors in dim dimensional space from a file into data array
    void vec_read_dataset_file (FILE* fptr, double* data, int len, int dim) {
        for (int i=0;i<len;i++) {
            for (int j=0;j<dim;j++) {
                if (fscanf(fptr,"%lf",&(data[i*dim+j])) != 1) {
                    printf ("error reading dataset\n");
                    exit(1);
                }
            }
        }
    }

## Finally note that we keep track of the number of pairs each rank checks.
## We will use this later to see investigate load balancing.

## Here is the full version 1.

In [None]:
%%writefile mpi_extreme_v1.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#include "vec.h"

int main (int argc, char** argv) {

    MPI_Init (&argc, &argv);

    // MPI_COMM_WORLD is the default communicator that contains all ranks
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // read the filename from the command line
    if (argc < 2) {
        printf ("command usage: %s %s\n",argv[0],"filename");
        return 1;
    }
    char* filename = argv[1];

    // open the text file for reading
    FILE* fptr;
    fptr = fopen(filename,"r");

    // need to check for null
    if (fptr == 0) {
        printf ("Error opening data file %s.\n",filename);
        exit(1);
    }

    // read the number of points and the dimension of each point
    int len, dim;
    if (fscanf(fptr,"%d %d",&len, &dim) != 2) {
        printf ("error reading the number of points and the dimension\n");
        return 1;
    }

    // allocate the (len x dim) data matrix on the heap using malloc
    double* data = (double*)malloc(len*dim*sizeof(double));
    if (data == NULL) {
        printf ("malloc failed to allocate data matrix\n");
        return 1;
    }
    vec_read_dataset_file (fptr,data,len,dim);

    // close the data file
    fclose(fptr);

    // start the timer
    double start_time, end_time;
    start_time = MPI_Wtime();

    // find the extreme pair
    double max_dist_sq = 0;
    int extreme[2];
#ifdef DIAG
    int pairs_checked = 0;
#endif
    for (int i=0;i<len-1;i++) {
        for (int j=i+1;j<len;j++) {
            double dist_sq = vec_dist_sq(data+i*dim,data+j*dim,dim);
#ifdef DIAG
            pairs_checked += 1;
#endif
            if (dist_sq > max_dist_sq) {
                max_dist_sq = dist_sq;
                extreme[0] = i;
                extreme[1] = j;
            }
        }
    }

    // stop the timer
    end_time = MPI_Wtime();

    // output the results
#ifdef DIAG
    printf ("rank %d: pairs checked = %d\n",rank,pairs_checked);
#endif
    printf ("rank %d: elapsed time = %.4f seconds\n",rank,end_time-start_time);
    printf ("rank %d: Extreme Distance = %.2f\n",rank,sqrt(max_dist_sq));
    printf ("rank %d: Extreme Pair = %d %d\n",rank,extreme[0],extreme[1]);

    // free memory allocated for dataset
    free(data);

    MPI_Finalize();
}

## Here is the result of running with 2 ranks on matrix.

    ~/cmda3634_materials/L23$ mpicc -DDIAG -o mpi_extreme_v1 mpi_extreme_v1.c vec.c -lm
    ~/cmda3634_materials/L23$ mpiexec -n 2 ./mpi_extreme_v1 mnist1000.txt
    rank 1: pairs checked = 499500
    rank 1: elapsed time = 1.0683 seconds
    rank 1: Extreme Distance = 3797.52
    rank 1: Extreme Pair = 121 426
    rank 0: pairs checked = 499500
    rank 0: elapsed time = 1.0684 seconds
    rank 0: Extreme Distance = 3797.52
    rank 0: Extreme Pair = 121 426
    ~/cmda3634_materials/L23$

## The big problem with version 1 is that each rank does the full work.

## In version 2 we change the outer loop to distribute the work across ranks using the code:

    for (int i=0+rank;i<len-1;i+=size) {

In [None]:
%%writefile mpi_extreme_v2.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#include "vec.h"

int main (int argc, char** argv) {

    MPI_Init (&argc, &argv);

    // MPI_COMM_WORLD is the default communicator that contains all ranks
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // read the filename from the command line
    if (argc < 2) {
        printf ("command usage: %s %s\n",argv[0],"filename");
        return 1;
    }
    char* filename = argv[1];

    // open the text file for reading
    FILE* fptr;
    fptr = fopen(filename,"r");

    // need to check for null
    if (fptr == 0) {
        printf ("Error opening data file %s.\n",filename);
        exit(1);
    }

    // read the number of points and the dimension of each point
    int len, dim;
    if (fscanf(fptr,"%d %d",&len, &dim) != 2) {
        printf ("error reading the number of points and the dimension\n");
        return 1;
    }

    // allocate the (len x dim) data matrix on the heap using malloc
    double* data = (double*)malloc(len*dim*sizeof(double));
    if (data == NULL) {
        printf ("malloc failed to allocate data matrix\n");
        return 1;
    }
    vec_read_dataset_file (fptr,data,len,dim);

    // close the data file
    fclose(fptr);

    // start the timer
    double start_time, end_time;
    start_time = MPI_Wtime();

    // find the extreme pair
    double max_dist_sq = 0;
    int extreme[2];
#ifdef DIAG
    int pairs_checked = 0;
#endif
    for (int i=0+rank;i<len-1;i+=size) {
        for (int j=i+1;j<len;j++) {
            double dist_sq = vec_dist_sq(data+i*dim,data+j*dim,dim);
#ifdef DIAG
            pairs_checked += 1;
#endif
            if (dist_sq > max_dist_sq) {
                max_dist_sq = dist_sq;
                extreme[0] = i;
                extreme[1] = j;
            }
        }
    }

    // stop the timer
    end_time = MPI_Wtime();

    // output the results
#ifdef DIAG
    printf ("rank %d: pairs checked = %d\n",rank,pairs_checked);
#endif
    printf ("rank %d: elapsed time = %.4f seconds\n",rank,end_time-start_time);
    printf ("rank %d: Extreme Distance = %.2f\n",rank,sqrt(max_dist_sq));
    printf ("rank %d: Extreme Pair = %d %d\n",rank,extreme[0],extreme[1]);

    // free memory allocated for dataset
    free(data);

    MPI_Finalize();
}

## Here are the results of running on matrix with 2 ranks:

    ~/cmda3634_materials/L23$ mpicc -DDIAG -o mpi_extreme_v2 mpi_extreme_v2.c vec.c -lm
    ~/cmda3634_materials/L23$ mpiexec -n 2 ./mpi_extreme_v2 mnist1000.txt
    rank 0: pairs checked = 250000
    rank 0: elapsed time = 0.5393 seconds
    rank 0: Extreme Distance = 3707.97
    rank 0: Extreme Pair = 286 984
    rank 1: pairs checked = 249500
    rank 1: elapsed time = 0.5380 seconds
    rank 1: Extreme Distance = 3797.52
    rank 1: Extreme Pair = 121 426
    ~/cmda3634_materials/L23$

## What do you observe?

## Why is the load fairly well balanced despite the fact that the double for loop is triangular?


## In version 3 we add a round of communication where each nonzero rank sends its extreme pair to rank 0.  Here is the new code:

    // all nonzero ranks send their extreme pair to rank 0
    if (rank == 0) {
        MPI_Status status;
        int rank_extreme[2];
        for (int source=1;source<size;source++) {
            MPI_Recv(rank_extreme,2,MPI_INT,source,0,MPI_COMM_WORLD,&status);
            int i = rank_extreme[0];
            int j = rank_extreme[1];
            double dist_sq = vec_dist_sq(data+i*dim,data+j*dim,dim);
            if (dist_sq > max_dist_sq) {
                max_dist_sq = dist_sq;
                extreme[0] = i;
                extreme[1] = j;
            }
        }
    } else {
        MPI_Send(extreme,2,MPI_INT,0,0,MPI_COMM_WORLD);
    }


In [1]:
%%writefile mpi_extreme_v3.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#include "vec.h"

int main (int argc, char** argv) {

    MPI_Init (&argc, &argv);

    // MPI_COMM_WORLD is the default communicator that contains all ranks
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // read the filename from the command line
    if (argc < 2) {
        printf ("command usage: %s %s\n",argv[0],"filename");
        return 1;
    }
    char* filename = argv[1];

    // open the text file for reading
    FILE* fptr;
    fptr = fopen(filename,"r");

    // need to check for null
    if (fptr == 0) {
        printf ("Error opening data file %s.\n",filename);
        exit(1);
    }

    // read the number of points and the dimension of each point
    int len, dim;
    if (fscanf(fptr,"%d %d",&len, &dim) != 2) {
        printf ("error reading the number of points and the dimension\n");
        return 1;
    }

    // allocate the (len x dim) data matrix on the heap using malloc
    double* data = (double*)malloc(len*dim*sizeof(double));
    if (data == NULL) {
        printf ("malloc failed to allocate data matrix\n");
        return 1;
    }
    vec_read_dataset_file (fptr,data,len,dim);

    // close the data file
    fclose(fptr);

    // start the timer
    double start_time, end_time;
    start_time = MPI_Wtime();

    // find the extreme pair
    double max_dist_sq = 0;
    int extreme[2];
#ifdef DIAG
    int pairs_checked = 0;
#endif
    for (int i=0+rank;i<len-1;i+=size) {
        for (int j=i+1;j<len;j++) {
            double dist_sq = vec_dist_sq(data+i*dim,data+j*dim,dim);
#ifdef DIAG
            pairs_checked += 1;
#endif
            if (dist_sq > max_dist_sq) {
                max_dist_sq = dist_sq;
                extreme[0] = i;
                extreme[1] = j;
            }
        }
    }

    // all nonzero ranks send their extreme pair to rank 0
    if (rank == 0) {
        MPI_Status status;
        int rank_extreme[2];
        for (int source=1;source<size;source++) {
            MPI_Recv(rank_extreme,2,MPI_INT,source,0,MPI_COMM_WORLD,&status);
            int i = rank_extreme[0];
            int j = rank_extreme[1];
            double dist_sq = vec_dist_sq(data+i*dim,data+j*dim,dim);
            if (dist_sq > max_dist_sq) {
                max_dist_sq = dist_sq;
                extreme[0] = i;
                extreme[1] = j;
            }
        }
    } else {
        MPI_Send(extreme,2,MPI_INT,0,0,MPI_COMM_WORLD);
    }


    // stop the timer
    end_time = MPI_Wtime();

    // output the results
#ifdef DIAG
    printf ("rank %d: pairs checked = %d\n",rank,pairs_checked);
#endif
    printf ("rank %d: elapsed time = %.4f seconds\n",rank,end_time-start_time);
    printf ("rank %d: Extreme Distance = %.2f\n",rank,sqrt(max_dist_sq));
    printf ("rank %d: Extreme Pair = %d %d\n",rank,extreme[0],extreme[1]);

    // free memory allocated for dataset
    free(data);

    MPI_Finalize();
}

Writing mpi_extreme_v3.c


## Here is the output when running on matrix with 4 ranks.

    ~/cmda3634_materials/L23$ mpicc -DDIAG -o mpi_extreme_v3 mpi_extreme_v3.c vec.c -lm
    ~/cmda3634_materials/L23$ mpiexec -n 4 ./mpi_extreme_v3 mnist1000.txt
    rank 2: pairs checked = 124750
    rank 2: elapsed time = 0.2864 seconds
    rank 2: Extreme Distance = 3707.97
    rank 2: Extreme Pair = 286 984
    rank 1: pairs checked = 125000
    rank 1: elapsed time = 0.3654 seconds
    rank 1: Extreme Distance = 3797.52
    rank 1: Extreme Pair = 121 426
    rank 0: pairs checked = 125250
    rank 0: elapsed time = 0.4413 seconds
    rank 0: Extreme Distance = 3797.52
    rank 0: Extreme Pair = 121 426
    rank 3: pairs checked = 124500
    rank 3: elapsed time = 0.3935 seconds
    rank 3: Extreme Distance = 3673.16
    rank 3: Extreme Pair = 867 872
    ~/cmda3634_materials/L23$

## Note that the load is still well balanced.  
## Also note that both rank 1 and rank 0 have the correct answer.
## Which rank found the extreme pair initially?

## For our final version 4 we clean up the output by having only rank 0 print the results.  

In [2]:
%%writefile mpi_extreme_v4.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#include "vec.h"

int main (int argc, char** argv) {

    MPI_Init (&argc, &argv);

    // MPI_COMM_WORLD is the default communicator that contains all ranks
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // read the filename from the command line
    if (argc < 2) {
        printf ("command usage: %s %s\n",argv[0],"filename");
        return 1;
    }
    char* filename = argv[1];

    // open the text file for reading
    FILE* fptr;
    fptr = fopen(filename,"r");

    // need to check for null
    if (fptr == 0) {
        printf ("Error opening data file %s.\n",filename);
        exit(1);
    }

    // read the number of points and the dimension of each point
    int len, dim;
    if (fscanf(fptr,"%d %d",&len, &dim) != 2) {
        printf ("error reading the number of points and the dimension\n");
        return 1;
    }

    // allocate the (len x dim) data matrix on the heap using malloc
    double* data = (double*)malloc(len*dim*sizeof(double));
    if (data == NULL) {
        printf ("malloc failed to allocate data matrix\n");
        return 1;
    }
    vec_read_dataset_file (fptr,data,len,dim);

    // close the data file
    fclose(fptr);

    // start the timer
    double start_time, end_time;
    start_time = MPI_Wtime();

    // find the extreme pair
    double max_dist_sq = 0;
    int extreme[2];
#ifdef DIAG
    int pairs_checked = 0;
#endif
    for (int i=0+rank;i<len-1;i+=size) {
        for (int j=i+1;j<len;j++) {
            double dist_sq = vec_dist_sq(data+i*dim,data+j*dim,dim);
#ifdef DIAG
            pairs_checked += 1;
#endif
            if (dist_sq > max_dist_sq) {
                max_dist_sq = dist_sq;
                extreme[0] = i;
                extreme[1] = j;
            }
        }
    }

    // all nonzero ranks send their extreme pair to rank 0
    if (rank == 0) {
        MPI_Status status;
        int rank_extreme[2];
        for (int source=1;source<size;source++) {
            MPI_Recv(rank_extreme,2,MPI_INT,source,0,MPI_COMM_WORLD,&status);
            int i = rank_extreme[0];
            int j = rank_extreme[1];
            double dist_sq = vec_dist_sq(data+i*dim,data+j*dim,dim);
            if (dist_sq > max_dist_sq) {
                max_dist_sq = dist_sq;
                extreme[0] = i;
                extreme[1] = j;
            }
        }
    } else {
        MPI_Send(extreme,2,MPI_INT,0,0,MPI_COMM_WORLD);
    }


    // stop the timer
    end_time = MPI_Wtime();

    // output the results
#ifdef DIAG
    printf ("rank %d: pairs checked = %d\n",rank,pairs_checked);
#endif
    if (rank == 0) {
	printf ("rank %d: elapsed time = %.4f seconds\n",rank,end_time-start_time);
	printf ("rank %d: Extreme Distance = %.2f\n",rank,sqrt(max_dist_sq));
	printf ("rank %d: Extreme Pair = %d %d\n",rank,extreme[0],extreme[1]);
    }

    // free memory allocated for dataset
    free(data);

    MPI_Finalize();
}

Writing mpi_extreme_v4.c


## Here is the output when running with 1, 2, and 4 ranks on matrix (without the DIAG flag):

    ~/cmda3634_materials/L23$ mpicc -o mpi_extreme_v4 mpi_extreme_v4.c vec.c -lm
    ~/cmda3634_materials/L23$ mpiexec -n 1 ./mpi_extreme_v4 mnist1000.txt
    rank 0: elapsed time = 1.0708 seconds
    rank 0: Extreme Distance = 3797.52
    rank 0: Extreme Pair = 121 426
    ~/cmda3634_materials/L23$ mpiexec -n 2 ./mpi_extreme_v4 mnist1000.txt
    rank 0: elapsed time = 0.5385 seconds
    rank 0: Extreme Distance = 3797.52
    rank 0: Extreme Pair = 121 426
    ~/cmda3634_materials/L23$ mpiexec -n 4 ./mpi_extreme_v4 mnist1000.txt
    rank 0: elapsed time = 0.2854 seconds
    rank 0: Extreme Distance = 3797.52
    rank 0: Extreme Pair = 121 426
    ~/cmda3634_materials/L23$

# Part 2 : Using Randomization to Approximate the Extreme Pair

## It is relatively straightforward to change our final version of mpi_extreme into a version that approximates the extreme pair using randomization.

## To approximate the extreme pair we have each rank check **num_points** random pairs.  Then as in the exact case each nonzero rank sends its approximate extreme pair to rank 0.

## One thing to note is that we need to add **rank to the random seed** so that each rank uses a different pseudorandom sequence!

In [None]:
%%writefile mpi_extreme_approx.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#include "vec.h"

int main (int argc, char** argv) {

    MPI_Init (&argc, &argv);

    // MPI_COMM_WORLD is the default communicator that contains all ranks
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // read the filename, num_pairs, and random seed from the command line
    if (argc < 4) {
        printf ("command usage: %s %s %s %s\n",argv[0],"filename","num_pairs","seed");
        return 1;
    }
    char* filename = argv[1];
    int num_pairs = atoi(argv[2]);
    int seed = atoi(argv[3]);

    // seed the random number generator using command line seed
    // we add rank to seed to that each rank uses a different pseudorandom sequence
    srandom(seed+rank);

    // open the text file for reading
    FILE* fptr;
    fptr = fopen(filename,"r");

    // need to check for null
    if (fptr == 0) {
        printf ("Error opening data file %s.\n",filename);
        exit(1);
    }

    // read the number of points and the dimension of each point
    int len, dim;
    if (fscanf(fptr,"%d %d",&len, &dim) != 2) {
        printf ("error reading the number of points and the dimension\n");
        return 1;
    }

    // allocate the (len x dim) data matrix on the heap using malloc
    double* data = (double*)malloc(len*dim*sizeof(double));
    if (data == NULL) {
        printf ("malloc failed to allocate data matrix\n");
        return 1;
    }
    vec_read_dataset_file (fptr,data,len,dim);

    // close the data file
    fclose(fptr);

    // start the timer
    double start_time, end_time;
    start_time = MPI_Wtime();

    // find the approximate extreme pair
    double max_dist_sq = 0;
    int extreme[2];
    for (int p=0;p<num_pairs;p++) {
        int i = random() % len;
        int j = random() % len;
        double dist_sq = vec_dist_sq(data+i*dim,data+j*dim,dim);
        if (dist_sq > max_dist_sq) {
            max_dist_sq = dist_sq;
            extreme[0] = i;
            extreme[1] = j;
        }
    }

    // all nonzero ranks send their extreme pair to rank 0
    if (rank == 0) {
        MPI_Status status;
        int rank_extreme[2];
        for (int source=1;source<size;source++) {
            MPI_Recv(rank_extreme,2,MPI_INT,source,0,MPI_COMM_WORLD,&status);
            int i = rank_extreme[0];
            int j = rank_extreme[1];
            double dist_sq = vec_dist_sq(data+i*dim,data+j*dim,dim);
            if (dist_sq > max_dist_sq) {
                max_dist_sq = dist_sq;
                extreme[0] = i;
                extreme[1] = j;
            }
        }
    } else {
        MPI_Send(extreme,2,MPI_INT,0,0,MPI_COMM_WORLD);
    }


    // stop the timer
    end_time = MPI_Wtime();

    // output the results
    if (rank == 0) {
	printf ("rank %d: elapsed time = %.4f seconds\n",rank,end_time-start_time);
	printf ("rank %d: Approximate Extreme Distance = %.2f\n",rank,sqrt(max_dist_sq));
	printf ("rank %d: Approximate Extreme Pair = %d %d\n",rank,extreme[0],extreme[1]);
    }

    // free memory allocated for dataset
    free(data);

    MPI_Finalize();
}

## Here are a few sample runs.  We first use version 4 to find the exact extreme pair for the 10000 image dataset.
## Next we use the randomized version with first 4 and and then 8 ranks.  
## Note that when we go from 4 to 8 ranks we do not get any speedup.
## Instead, we use the extra ranks to search a larger set of pairs!

    ~/cmda3634_materials/L23$ mpicc -o mpi_extreme_approx mpi_extreme_approx.c vec.c -lm
    ~/cmda3634_materials/L23$ mpiexec -n 8 ./mpi_extreme_v4 mnist10k.txt
    rank 0: elapsed time = 14.9333 seconds
    rank 0: Extreme Distance = 4097.95
    rank 0: Extreme Pair = 5977 6412
    ~/cmda3634_materials/L23$ mpiexec -n 4 ./mpi_extreme_approx mnist10k.txt 1000000 123456789
    rank 0: elapsed time = 2.6473 seconds
    rank 0: Approximate Extreme Distance = 4051.22
    rank 0: Approximate Extreme Pair = 4662 6412
    ~/cmda3634_materials/L23$ mpiexec -n 8 ./mpi_extreme_approx mnist10k.txt 1000000 123456789
    rank 0: elapsed time = 2.7881 seconds
    rank 0: Approximate Extreme Distance = 4097.95
    rank 0: Approximate Extreme Pair = 5977 6412
    ~/cmda3634_materials/L23$