# Assignment No. 5
***Title:  Implement HPC application for AI/ML domain.***

In [None]:
!pip install mpi4py

Collecting mpi4py
  Downloading mpi4py-4.0.3.tar.gz (466 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/466.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m466.3/466.3 kB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: mpi4py
  Building wheel for mpi4py (pyproject.toml) ... [?25l[?25hdone
  Created wheel for mpi4py: filename=mpi4py-4.0.3-cp311-cp311-linux_x86_64.whl size=4458268 sha256=7b75e58544a563e8609a6b8d08f17d2e3f35fdeebc614b7b196de08866d2c0ff
  Stored in directory: /root/.cache/pip/wheels/5c/56/17/bf6ba37aa971a191a8b9eaa188bf5ec855b8911c1c56fb1f84
Successfully built mpi4py
Installing collected packages: mpi4py
Successfully installe

In [None]:
# Imports
import tensorflow as tf
from mpi4py import MPI
import numpy as np

# Initialize MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()

# Load and preprocess dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize and reshape
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

# Build the model
def create_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    return model

# Training function (distributed)
def train(model, x_train, y_train, rank, size):
    # Split data across nodes
    n = len(x_train)
    chunk_size = n // size
    start = rank * chunk_size
    end = n if rank == size - 1 else (rank + 1) * chunk_size

    x_train_chunk = x_train[start:end]
    y_train_chunk = y_train[start:end]

    # Compile model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    # Train
    model.fit(x_train_chunk, y_train_chunk, epochs=1, batch_size=32, verbose=0)

    # Evaluate
    train_loss, train_acc = model.evaluate(x_train_chunk, y_train_chunk, verbose=0)

    # Average accuracy across all processes
    train_acc = comm.allreduce(train_acc, op=MPI.SUM) / size
    return train_acc

# Create and train model
model = create_model()
epochs = 3

for epoch in range(epochs):
    # Train on local chunk
    train_acc = train(model, x_train, y_train, rank, size)

    # Evaluate on full test set
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    test_acc = comm.allreduce(test_acc, op=MPI.SUM) / size

    # Output results
    if rank == 0:
        print(f"Epoch {epoch + 1}: Train accuracy = {train_acc:.4f}, Test accuracy = {test_acc:.4f}")


Epoch 1: Train accuracy = 0.9756, Test accuracy = 0.9725
Epoch 2: Train accuracy = 0.9845, Test accuracy = 0.9797
Epoch 3: Train accuracy = 0.9872, Test accuracy = 0.9790
