In [12]:
%%writefile benchmark.cu

Overwriting benchmark.cu


In [32]:
%%writefile cpu_benchmark.cpp
#include <iostream>
#include <vector>
#include <chrono>
#include <random>
#include <iomanip>
#include <cmath>

// Helper function to initialize matrices with random values
void initialize_matrix(std::vector<float>& matrix, int size) {
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<float> dis(-1.0f, 1.0f);
    for (int i = 0; i < size * size; i++) {
        matrix[i] = dis(gen);
    }
}

// Helper function to calculate GFLOPS
double calculate_gflops(int N, double time_ms) {
    double operations = 2.0 * std::pow(N, 3);
    double time_s = time_ms / 1000.0;
    return (operations / time_s) / 1e9;
}

// Naive matrix multiplication
void matmul_naive(const std::vector<float>& A, const std::vector<float>& B, std::vector<float>& C, int N) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            float sum = 0.0f;
            for (int k = 0; k < N; k++) {
                sum += A[i * N + k] * B[k * N + j];
            }
            C[i * N + j] = sum;
        }
    }
}

int main() {
    std::vector<int> matrix_sizes = {256, 512, 1024};
    std::cout << std::setw(10) << "Size"
              << std::setw(20) << "Naive (GFLOPS)"
              << std::setw(15) << "Time (ms)" << std::endl;
    std::cout << std::string(50, '-') << std::endl;

    for (int N : matrix_sizes) {
        std::vector<float> A(N*N), B(N*N), C(N*N, 0.0f);
        initialize_matrix(A, N);
        initialize_matrix(B, N);

        auto start = std::chrono::high_resolution_clock::now();
        matmul_naive(A, B, C, N);
        auto end = std::chrono::high_resolution_clock::now();

        double time_ms = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0;
        double gflops = calculate_gflops(N, time_ms);

        std::cout << std::setw(10) << N
                  << std::setw(20) << std::fixed << std::setprecision(2) << gflops
                  << std::setw(15) << std::fixed << std::setprecision(2) << time_ms
                  << std::endl;
    }

    return 0;
}


Overwriting cpu_benchmark.cpp


In [33]:
!g++ -O2 -std=c++17 cpu_benchmark.cpp -o cpu_benchmark


In [34]:
!./cpu_benchmark


      Size      Naive (GFLOPS)      Time (ms)
--------------------------------------------------
       256                1.71          19.62
       512                1.52         177.16
      1024                0.67        3226.43
