In [1]:
%%writefile matrixmulti.cu

Writing matrixmulti.cu


In [17]:
%%writefile matrix_multi.h
#ifndef MATRIX_MULTI_H
#define MATRIX_MULTI_H

#include <vector>
#include <string>

// Structure for storing performance results
struct PerfResult {
    float time_ms;
    double gflops;
    std::string format;
    float sparsity;
};

// Structure for CSR matrix
struct CSRMatrix {
    std::vector<float> values;
    std::vector<int> row_ptr;
    std::vector<int> col_indices;
    int M, N, K;
    int nnz;
};

// Structure for COO matrix
struct COOMatrix {
    std::vector<float> values;
    std::vector<int> row_indices;
    std::vector<int> col_indices;
    int M, N, K;
    int nnz;
};

// Structure for Block-CSR matrix
struct BlockCSRMatrix {
    std::vector<float> values;
    std::vector<int> row_ptr;
    std::vector<int> col_indices;
    int M, N, K;
    int block_size;
};

#endif // MATRIX_MULTI_H


Overwriting matrix_multi.h


In [18]:
%%writefile main.cpp
#include <iostream>
#include <vector>
#include <chrono>
#include "matrix_multi.h"

// Simple dense matrix multiplication for validation
std::vector<std::vector<float>> denseMultiply(
    const std::vector<std::vector<float>>& A,
    const std::vector<std::vector<float>>& B) {

    int M = A.size();
    int K = A[0].size();
    int N = B[0].size();

    std::vector<std::vector<float>> C(M, std::vector<float>(N, 0));

    for (int i = 0; i < M; i++) {
        for (int k = 0; k < K; k++) {
            for (int j = 0; j < N; j++) {
                C[i][j] += A[i][k] * B[k][j];
            }
        }
    }
    return C;
}

// Print matrix
void printMatrix(const std::vector<std::vector<float>>& mat) {
    for (auto& row : mat) {
        for (auto val : row) {
            std::cout << val << " ";
        }
        std::cout << "\n";
    }
}

int main() {
    // Example dense matrices
    std::vector<std::vector<float>> A = {
        {1, 0, 2},
        {0, 3, 0}
    };

    std::vector<std::vector<float>> B = {
        {0, 3},
        {1, 0},
        {4, 5}
    };

    int M = A.size(), K = A[0].size(), N = B[0].size();

    // Measure time
    auto start = std::chrono::high_resolution_clock::now();
    auto C = denseMultiply(A, B);
    auto end = std::chrono::high_resolution_clock::now();

    // Calculate performance
    float time_ms = std::chrono::duration<float, std::milli>(end - start).count();
    double gflops = (2.0 * M * N * K) / (time_ms * 1e6);

    // Fill PerfResult struct
    PerfResult result;
    result.time_ms = time_ms;
    result.gflops = gflops;
    result.format = "Dense";
    result.sparsity = 0.0; // since dense

    // Print results
    std::cout << "Result Matrix C = A * B:\n";
    printMatrix(C);

    std::cout << "\nPerformance:\n";
    std::cout << "Time: " << result.time_ms << " ms\n";
    std::cout << "GFLOPS: " << result.gflops << "\n";
    std::cout << "Format: " << result.format << "\n";

    return 0;
}


Writing main.cpp


In [19]:
!g++ main.cpp -o matrix_multi
!./matrix_multi


Result Matrix C = A * B:
8 13 
3 0 

Performance:
Time: 0.001378 ms
GFLOPS: 0.0174165
Format: Dense
