#### **Sparse and Compressed Neural Network Representations**

In [12]:
import numpy as np
import sys

def compress_sparse_matrix(sparse_matrix):
    """
    Compress a sparse matrix by storing only the non-zero values and their indices.
    
    Args:
        sparse_matrix (numpy.ndarray): The input sparse matrix.
    
    Returns:
        tuple: A tuple containing:
            - non_zero_values (numpy.ndarray): The non-zero values from the sparse matrix.
            - non_zero_indices (list): A list of tuples, where each tuple represents the row and column indices of a non-zero value.
            - compression_ratio (float): The ratio of the compressed size to the original size of the sparse matrix.
    """
    # Get the non-zero values and their indices from the sparse matrix
    non_zero_values = sparse_matrix[np.nonzero(sparse_matrix)]
    non_zero_indices = list(zip(*np.nonzero(sparse_matrix)))
    
    # Calculate memory sizes
    original_size = sparse_matrix.size * sparse_matrix.itemsize
    compressed_size = non_zero_values.size * non_zero_values.itemsize + sys.getsizeof(non_zero_indices)
    
    # Calculate compression ratio
    compression_ratio = compressed_size / original_size
    
    return non_zero_values, non_zero_indices, compression_ratio

# Generate a large sparse matrix
np.random.seed(0)  # For reproducibility
sparse_matrix = np.random.choice([0, 1], size=(1000, 1000), p=[0.9, 0.1]) * np.random.rand(1000, 1000)

print("Original Sparse Matrix:")
print(sparse_matrix)

non_zero_values, non_zero_indices, compression_ratio = compress_sparse_matrix(sparse_matrix)

print("\nCompressed Representation:")
print("Non-zero Values:", non_zero_values)
# print("Non-zero Indices:", non_zero_indices)
print("Compression Ratio: {:.2f}".format(compression_ratio))

Original Sparse Matrix:
[[0.         0.         0.         ... 0.04901448 0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.49187027]]

Compressed Representation:
Non-zero Values: [0.60934925 0.53159247 0.2195387  ... 0.87796906 0.56985114 0.49187027]
Compression Ratio: 0.20


##### **Quantization**

In [15]:
import numpy as np

def quantize_matrix(matrix, bits):
    """
    Quantizes a matrix to reduce precision using a specified number of bits.

    Args:
        matrix (numpy.ndarray): The input matrix.
        bits (int): The number of bits used for quantization.

    Returns:
        numpy.ndarray: The quantized matrix.
        float: The scale factor used for quantization.
    """
    max_val = np.max(np.abs(matrix))
    scale_factor = (2 ** (bits - 1) - 1) / max_val
    quantized_matrix = np.round(matrix * scale_factor).astype(np.int8)
    return quantized_matrix, scale_factor

def dequantize_matrix(quantized_matrix, scale_factor):
    """
    Dequantizes a quantized matrix to approximate original floating-point values.

    Args:
        quantized_matrix (numpy.ndarray): The quantized matrix.
        scale_factor (float): The scale factor used for quantization.

    Returns:
        numpy.ndarray: The dequantized matrix.
    """
    dequantized_matrix = quantized_matrix / scale_factor
    return dequantized_matrix

def quantized_matrix_multiplication(matrix1, matrix2, bits):
    """
    Performs matrix multiplication with quantization.

    Args:
        matrix1 (numpy.ndarray): The first input matrix.
        matrix2 (numpy.ndarray): The second input matrix.
        bits (int): The number of bits used for quantization.

    Returns:
        numpy.ndarray: The result of matrix multiplication with quantization.
    """
    quantized_matrix1, scale_factor1 = quantize_matrix(matrix1, bits)
    quantized_matrix2, scale_factor2 = quantize_matrix(matrix2, bits)
    
    result_scale_factor = scale_factor1 * scale_factor2
    quantized_result = np.dot(quantized_matrix1, quantized_matrix2)
    
    dequantized_result = dequantize_matrix(quantized_result, result_scale_factor)
    return dequantized_result

# Example usage
matrix1 = np.array([[1.23, 4.56],
                    [7.89, 0.12]])
matrix2 = np.array([[3.45],
                    [6.78]])

# Perform matrix multiplication without quantization
float_result = np.dot(matrix1, matrix2)
print("Floating-point matrix multiplication result:")
print(float_result)

# Perform matrix multiplication with quantization
bits = 8
quantized_result = quantized_matrix_multiplication(matrix1, matrix2, bits)
print(f"\nQuantized matrix multiplication result ({bits} bits):")
print(quantized_result)

# Calculate memory usage
float_size = matrix1.nbytes + matrix2.nbytes + float_result.nbytes
quantized_size = matrix1.nbytes // 4 + matrix2.nbytes // 4 + quantized_result.nbytes // 4
print(f"\nMemory usage:")
print(f"Floating-point: {float_size} bytes")
print(f"Quantized: {quantized_size} bytes")
print(f"Memory reduction: {(1 - quantized_size / float_size) * 100:.2f}%")


Floating-point matrix multiplication result:
[[35.1603]
 [28.0341]]

Quantized matrix multiplication result (8 bits):
[[0.24874853]
 [0.20231547]]

Memory usage:
Floating-point: 64 bytes
Quantized: 16 bytes
Memory reduction: 75.00%
