Skip to content

Commit

Permalink
utils + small fixes + refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
eliazonta committed Aug 2, 2023
1 parent 98409ce commit bd41ca6
Show file tree
Hide file tree
Showing 10 changed files with 165 additions and 8 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/c-cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ jobs:
- name: make clean
run: make clean
- name: make sequential
run: make sequential
run: make sequential
# - name: make parallel
# run: make parallel
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.vscode
.DS_Store
12 changes: 10 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
CC := gcc
NVCC := /usr/local/cuda-12.2/bin/nvcc
BIN_FOLDER := bin
SRC_FOLDER := src
PARALLEL_FOLDER := parallel
Expand All @@ -12,14 +13,21 @@ SRC-CUDA := main.cu

all: sequential cuda

prova:
@mkdir -p prova
$(NVCC) $(SRC_FOLDER)/$(PARALLEL_FOLDER)/prova.cu
@mv a.out prova/prova


sequential:
@mkdir -p $(BIN_FOLDER)
$(CC) $(SRC_FOLDER)/$(SRC-SEQ) $(SRC_FOLDER)/parser.c $(SRC_FOLDER)/sequential.c
@mv a.out $(BIN_FOLDER)/$(NN-SEQ)

parallel:
/usr/local/cuda-10.0/bin/nvcc $(SRC_FOLDER)/$(PARALLEL_FOLDER)/$(SRC-CUDA) $(SRC_FOLDER)/parser.c $(SRC_FOLDER)/$(PARALLEL_FOLDER)/parallel.cu
@mkdir -p $(BIN_FOLDER)
$(NVCC) $(SRC_FOLDER)/$(PARALLEL_FOLDER)/$(SRC-CUDA) $(SRC_FOLDER)/parser.c $(SRC_FOLDER)/$(PARALLEL_FOLDER)/parallel.cu
@mv a.out $(BIN_FOLDER)/$(NN-CUDA)

clean:
rm -rf $(BIN_FOLDER)
rm -rf $(BIN_FOLDER) prova
1 change: 1 addition & 0 deletions include/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <string.h>
#include <time.h>


void read_matrix(int **row_ptr, int **col_ind, float **values, const char *filename, int *num_rows, int *num_cols, int *num_vals);

#endif // PARSER_H
Expand Down
122 changes: 122 additions & 0 deletions include/utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#pragma once

#include <chrono>
#include <cmath>
#include <mutex>
#include <iostream>

///////////////////////////////////////////////////////////////
// CUDA error check
//////////////////////////////////////////////////////////////
static void cuda_check_status(cudaError_t status)
{
if (status != cudaSuccess)
{
std::cerr << "error : CUDA API call : "
<< cudaGetErrorString(status) << std::endl;
exit(1);
}
}

//////////////////////////////////////////////////////////////
// memory allocation
//////////////////////////////////////////////////////////////
template <typename T>
T* malloc_device(size_t n)
{
void* p;
auto status = cudaMalloc(&p, n * sizeof(T));
cuda_check_status(status);
return (T*)p;
}

template <typename T>
T* malloc_managed(size_t n, T value = T())
{
T* p;
auto status = cudaMallocManaged(&p, n * sizeof(T));
cuda_check_status(status);
std::fill(p, p + n, value);
return p;
}

template <typename T>
T* malloc_pinned(size_t n, T value = T())
{
T* p = nullptr;
cudaHostAlloc((void**)&p, n * sizeof(T), 0);
std::fill(p, p + n, value);
return p;
}


///////////////////////////////////////////////////////////////////
// CUDA memory copy
//////////////////////////////////////////////////////////////////
template <typename T>
void copy_to_device(T* from, T* to, size_t n)
{
cuda_check_status(cudaMemcpy(to, from, n * sizeof(T), cudaMemcpyHostToDevice));
}

template <typename T>
void copy_to_host(T* from, T* to, size_t n)
{
cuda_check_status(cudaMemcpy(to, from, n * sizeof(T), cudaMemcpyDeviceToHost));
}

template <typename T>
void copy_to_device_async(const T* from, T* to, size_t n, cudaStream_t stream = NULL)
{
auto status = cudaMemcpyAsync(to, from, n * sizeof(T), cudaMemcpyHostToDevice, stream);
cuda_check_status(status);
}

template <typename T>
void copy_to_host_async(const T* from, T* to, size_t n, cudaStream_t stream = NULL)
{
auto status = cudaMemcpyAsync(to, from, n * sizeof(T), cudaMemcpyDeviceToHost, stream);
cuda_check_status(status);
}

///////////////////////////////////////////////////////////////////
// others
//////////////////////////////////////////////////////////////////
static size_t read_arg(int argc, char** argv, size_t index, int default_value)
{
if (argc > index)
{
try{
auto n = std::stoi(argv[index]);
if (n < 0)
{
return default_value;
}
return n;
}catch(std::exception& e)
{
std::cerr << "error [invalid argument, expected a positive integer] | compiler says : "
<< e.what() << std::endl;
exit(1);
}
}
return default_value;
}

template <typename T>
T* malloc_host(size_t n, T value = T())
{
T* p = (T*)malloc(n * sizeof(T));
std::fill(p, p + n, value);
return p;
}

//aliases
using clock_type = std::chrono::high_resolution_clock;
using duration_type = std::chrono::duration<double>;

static double get_time()
{
static auto start_time = clock_type::now();
return duration_type(clock_type::now() - start_time).count();
}
Binary file added prova/prova
Binary file not shown.
2 changes: 1 addition & 1 deletion src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ int main(int argc, char **argv) {
}

int *row_ptr, *col_ind, num_rows, num_cols, num_vals;
float *values, elapsed_time;;
float *values, elapsed_time;

int num_repeat = atoi(argv[1]);
int print_mode = atoi(argv[2]);
Expand Down
13 changes: 10 additions & 3 deletions src/parallel/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
#include <string.h>

#include "../../include/parser.h"
#include "../../include/parallel.h"
#include "../../include/utils.h"


int main(int argc, const char * argv[]) {
fprintf(stdout, "============================\n");
Expand All @@ -26,8 +29,10 @@ int main(int argc, const char * argv[]) {

read_matrix(&row_ptr, &col_ind, &values, filename, &num_rows, &num_cols, &num_vals);

float *x = (float *) malloc(num_rows * sizeof(float));
float *y = (float *) malloc(num_rows * sizeof(float));
// float *x = (float *) malloc(num_rows * sizeof(float));
float *x = malloc_host<float>(num_rows);
// float *y = (float *) malloc(num_rows * sizeof(float));
float *y = malloc_host<float>(num_rows);
for (int i = 0; i < num_rows; ++i) {
x[i] = 1.0;
y[i] = 0.0;
Expand Down Expand Up @@ -73,10 +78,12 @@ int main(int argc, const char * argv[]) {
cudaDeviceGetAttribute(&numSMs, cudaDevAttrMultiProcessorCount, 0);

// Copy from host to device
auto s = get_time();
cudaMemcpy(d_row_ptr, row_ptr, (num_rows + 1) * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_col_ind, col_ind, num_vals * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_values, values, num_vals * sizeof(float), cudaMemcpyHostToDevice);

auto time_H2D = get_time() - s;

// Time the iterations
float elapsed_time;
cudaEvent_t start, stop;
Expand Down
15 changes: 15 additions & 0 deletions src/parallel/prova.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include <iostream>

__global__
void kernel()
{
printf("Hello World from GPU! %d, %d\n", threadIdx.x, blockIdx.x);
}

int main(int argc, char** argv)
{
std::cout << "Hello World from CPU!" << std::endl;
kernel<<<1, 1>>>();
cudaDeviceSynchronize();
return 0;
}
2 changes: 1 addition & 1 deletion src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,4 @@ void read_matrix(int **row_ptr, int **col_ind, float **values, const char *filen
*row_ptr = row_ptr_t;
*col_ind = col_ind_t;
*values = values_t;
}
}

0 comments on commit bd41ca6

Please sign in to comment.