In [1]:
%%shell
ln -sfnv /usr/local/cuda-11/ /usr/local/cuda
wget https://openmp-course.s3.amazonaws.com/llvm.tar.gz
tar -xzvf llvm.tar.gz >/dev/null 2>&1

'/usr/local/cuda' -> '/usr/local/cuda-11/'
--2023-10-17 12:01:49--  https://openmp-course.s3.amazonaws.com/llvm.tar.gz
Resolving openmp-course.s3.amazonaws.com (openmp-course.s3.amazonaws.com)... 52.216.205.195, 52.216.37.185, 3.5.29.118, ...
Connecting to openmp-course.s3.amazonaws.com (openmp-course.s3.amazonaws.com)|52.216.205.195|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 810538565 (773M) [application/x-gzip]
Saving to: ‘llvm.tar.gz’


2023-10-17 12:02:16 (29.3 MB/s) - ‘llvm.tar.gz’ saved [810538565/810538565]





In [2]:
import os

os.environ['LLVM_PATH'] = '/content/llvm'
os.environ['PATH'] = os.environ['LLVM_PATH'] + '/bin:' + os.environ['PATH']
os.environ['LD_LIBRARY_PATH'] = os.environ['LLVM_PATH'] + '/lib:' + os.environ['LD_LIBRARY_PATH']
os.environ['TSAN_OPTIONS'] = 'ignore_noninstrumented_modules=1'

In [3]:
%%writefile test.c

#include <omp.h>
#include <stdio.h>

int main() {
  int num_devices = omp_get_num_devices();
  printf("Temos %d dispositivo(s) alocado(s)\n", num_devices);
}

Writing test.c


In [4]:
%%shell

clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_75 test.c -o teste

./teste

Temos 1 dispositivo(s) alocado(s)




# Atividade 3 - Algoritmos de processamento de imagens

Vetorização da filtragem de convolução de imagens




In [261]:
%%writefile atv3.c

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <sys/time.h>

#define N 20000
#define M 5

int main(int argc, char *argv[]) {
    struct timeval start, end;
    double t, soma;
    int i, j, x, y;

    srand(0);

    double *imagem = (double *) malloc(N * N * sizeof(double));
    double *resultado = (double *) malloc(N * N * sizeof(double));

    //Preenche as matrizes com valores randomicos
    for (i = 0; i < N; i++) {
        for (j = 0; j < N; j++) {
            imagem[i * N + j] = (fmod (rand(), 50.111));
        }
    }

    //Filtro Sobel Horizontal 5x5
    double mascara[M][M] = {{2, 2, 4, 2, 2},
                            {1, 1, 2, 1, 1},
                            {0, 0, 0, 0, 0},
                            {-1, -1, -2, -1, -1},
                            {-2, -2, -4, -2, -2}};


    gettimeofday(&start, NULL);

//Convolução da imagem
#pragma omp target teams distribute parallel for simd private(j, x, y, soma) schedule(guided) map(to:imagem[0:N*N], mascara[0:M*M]) map(tofrom:resultado[0:N*N])
    for (i = 0; i < N; i++) {
        for (j = 0; j < N; j++) {
            soma = 0;
            for (x = 0; x < M; x++) {
                for (y = 0; y < M; y++) {
                    int img_x = i - M / 2 + x;
                    int img_y = j - M / 2 + y;

                    if (img_x >= 0 && img_x < N && img_y >= 0 && img_y < N) {
                        soma += imagem[img_x * N + img_y] * mascara[x][y];
                    }
                }
            }

            resultado[i * N + j] = soma;

        }
    }

    gettimeofday(&end, NULL);

    t = (double) ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000000.0;

	  printf("Tempo gasto: %f\n", t);

    /*    //Imprimir o resultado
    for (i = 0; i < N; i++) {
        for (j = 0; j < N; j++) {
            printf("%f ", resultado[i *N + j]);
        }
        printf("\n");
    }*/

    return 0;
}

Overwriting atv3.c


In [262]:
!clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_75 -lm atv3.c -o atv3.x



In [267]:
!./atv3.x

Tempo gasto: 7.972466
