In [None]:
import os
import time
import numpy as np
import tensorflow as tf

# Connect to the TPU
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)

# Define the strategy for distributing computation
strategy = tf.distribute.TPUStrategy(resolver)

# Function to create and initialize random matrices
def create_random_matrix(shape, dtype):
    if dtype == tf.float32:
        return np.random.rand(*shape).astype(np.float32)
    elif dtype == tf.int32:
        return np.random.randint(0, 100000, size=shape, dtype=np.int32)
    elif dtype == tf.float64:
        return np.random.rand(*shape).astype(np.float64)
    elif dtype == tf.int64:
        return np.random.randint(0, 100000, size=shape, dtype=np.int64)
    else:
        raise ValueError("Unsupported data type")
# Function to perform matrix multiplication without using tf.matmul
def custom_matmul(a, b):
    # Get the TPU device
    tpu_device = resolver.master()

    # Cast the input matrices to tensors
    a_tensor = tf.convert_to_tensor(a)
    b_tensor = tf.convert_to_tensor(b)

    # Define the computation to be executed on TPU
    @tf.function
    def matmul_on_tpu(a, b):
        with tf.device(tpu_device):
            # Transpose matrix b for efficient multiplication
            b_transposed = tf.transpose(b)

            # Reshape matrix a and b for element-wise multiplication
            a_reshaped = tf.reshape(a, (-1, tf.shape(a)[-1], 1))
            b_reshaped = tf.reshape(b_transposed, (-1, 1, tf.shape(b)[-2]))

            # Perform element-wise multiplication
            mul_result = a_reshaped * b_reshaped

            # Reduce along the last dimension
            result = tf.reduce_sum(mul_result, axis=-2)

            return result

    # Execute the computation on TPU
    result = matmul_on_tpu(a_tensor, b_tensor)

    return result

# Function to perform matrix multiplication using for loops
def multiply_matrices(a, b):
    result = custom_matmul(a,b)
    return result

# Data types to test
data_types = [tf.int32, tf.float32, tf.int64, tf.float64]

output_file = "matrix_multiplication_results.txt"

with open(output_file, "w") as file:
    for dtype in data_types:
        # Generate two random matrices with specified data type
        matrix_a = create_random_matrix((4096, 4096), dtype)
        matrix_b = create_random_matrix((4096, 4096), dtype)

        # Wrap the matrix multiplication function in a strategy.scope to run on TPU
        with strategy.scope():
            # Convert the matrices to tensors and distribute them across the TPU
            distributed_matrix_a = tf.convert_to_tensor(matrix_a)
            distributed_matrix_b = tf.convert_to_tensor(matrix_b)

            # Start the timer
            start_time = time.time()

            # Perform matrix multiplication on TPU
            result = multiply_matrices(distributed_matrix_a.numpy(), distributed_matrix_b.numpy())

            # End the timer
            end_time = time.time()

        # Calculate the elapsed time
        elapsed_time = end_time - start_time

        # Print time taken
        print(f"Time taken for {dtype}: {elapsed_time:.4f} seconds")

        # Write result to file
        file.write(f"Data type: {dtype}\n")
        file.write(f"Time taken: {elapsed_time:.4f} seconds\n")
        file.write("First 10x10 resultant matrix:\n")
        np.savetxt(file, result[:10, :10], fmt="%d" if dtype == tf.int32 or dtype == tf.int64 else "%.6f")
        file.write("\n\n")




ValueError: in user code:

    File "<ipython-input-2-19952733bc11>", line 38, in matmul_on_tpu  *
        with tf.device(tpu_device):
    File "/usr/lib/python3.10/contextlib.py", line 135, in __enter__
        return next(self.gen)
    File "/usr/lib/python3.10/contextlib.py", line 135, in __enter__
        return next(self.gen)

    ValueError: Unknown attribute 'grpc' is encountered while parsing the device spec: 'grpc://10.13.130.10:8470'.


In [1]:
import tensorflow as tf
import time

# Define matrix dimensions
dim = 4096
dtype_list = [tf.int32, tf.int64, tf.float32, tf.float64]

for dtype in dtype_list:
    # Create random matrices
    if dtype == tf.int32:
        matrix_a = tf.random.uniform((dim, dim), minval=0, maxval=1000, dtype=dtype)
        matrix_b = tf.random.uniform((dim, dim), minval=0, maxval=1000, dtype=dtype)
    else:
        matrix_a = tf.random.uniform((dim, dim), minval=0, maxval=1000000, dtype=dtype)
        matrix_b = tf.random.uniform((dim, dim), minval=0, maxval=1000000, dtype=dtype)

    # Start timer
    start_time = time.time()

    # Perform matrix multiplication on GPU
    result = tf.matmul(matrix_a, matrix_b)

    # End timer
    end_time = time.time()

    # Print execution time
    print("Data type:", dtype, "Time taken:", end_time - start_time, "seconds")

    # Print the first 10 numbers of the resultant matrix
    # print("First 10 numbers of the resultant matrix:")
    # print(result[:10, :10].numpy())
    print("\n")


Data type: <dtype: 'int32'> Time taken: 12.163366556167603 seconds


Data type: <dtype: 'int64'> Time taken: 32.03870701789856 seconds


Data type: <dtype: 'float32'> Time taken: 0.13390040397644043 seconds


Data type: <dtype: 'float64'> Time taken: 0.019844770431518555 seconds


