In [5]:
!nvcc --version
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-sw8n430u
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-sw8n430u
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit 0d2ab99cccbbc682722e708515fe9c4cfc50185a
  Preparing metadata (setup.py) ... [?25l[?25hdone
The nvcc_plugin extension is already loaded. To reload it, use:
  %reload_ext nvcc_plugin


In [45]:
%%cu

#include <iostream>
#include <memory>
#include <time.h>
#include <float.h>
#include <curand_kernel.h>
#include "/content/drive/MyDrive/raytracing/vec3.h"
#include "/content/drive/MyDrive/raytracing/ray.h"
#include "/content/drive/MyDrive/raytracing/sphere.h"
#include "/content/drive/MyDrive/raytracing/hitable_list.h"
#include "/content/drive/MyDrive/raytracing/camera.h"
#include "/content/drive/MyDrive/raytracing/material.h"
#include "/content/drive/MyDrive/raytracing/EasyBMP.hpp"

// Максимальная глубина рекурсии
#define RECURSION_DEPTH 5
// Проверка на ошибки возникающие в CUDA
#define checkCudaErrors(val) check_cuda( (val), #val, __FILE__, __LINE__ )

using namespace std;

// Функция вывода ошибок CUDA
void check_cuda(cudaError_t result, char const *const func, const char *const file, int const line) {
    if (result) {
        cerr << "CUDA error = " << static_cast<unsigned int>(result) << " at " <<
            file << ":" << line << " '" << func << "' \n";
        // Сброс устройства CUDA перед выходом
        cudaDeviceReset();
        exit(99);
    }
}

// Функция определяющая цвет полученной при столкновении луча и объекта
__device__ vec3 color(const ray& r, hitable **world, curandState *local_rand_state) {
    ray cur_ray = r;
    vec3 cur_attenuation = vec3(1, 1, 1);
    for(int i = 0; i < RECURSION_DEPTH; ++i) {
        hit_record rec;
        if ((*world)->hit(cur_ray, 0.001, FLT_MAX, rec)) {
            ray scattered;
            vec3 attenuation;
            if(rec.mat_ptr->scatter(cur_ray, rec, attenuation, scattered, local_rand_state)) {
                cur_attenuation *= attenuation;
                cur_ray = scattered;
            }
            else return vec3(0.0,0.0,0.0);
        }
        else {
            vec3 unit_direction = unit_vector(cur_ray.direction());
            float t = 0.5f*(unit_direction.y() + 1.0f);
            vec3 c = (1.0f-t)*vec3(1.0, 1.0, 1.0) + t*vec3(0.5, 0.7, 1.0);
            return cur_attenuation * c;
        }
    }
    return vec3(0.0,0.0,0.0); // Превышена глубина рекурсии
}

// Генерация случайных чисел на GPU
__global__ void rand_init(curandState *rand_state) {
    if (threadIdx.x == 0 && blockIdx.x == 0)
        curand_init(clock64(), 0, 0, rand_state); // Использование текущего времени как зерна
}

// Случайное заполнение пространства
__global__ void render_init(int max_x, int max_y, curandState *rand_state) {
    int i = threadIdx.x + blockIdx.x * blockDim.x;
    int j = threadIdx.y + blockIdx.y * blockDim.y;
    if((i >= max_x) || (j >= max_y)) return;
    int pixel_index = j*max_x + i;
    curand_init(1984 + pixel_index, 0, 0, &rand_state[pixel_index]);
}

// Рендеринг
__global__ void render(vec3 *fb, int max_x, int max_y, int ns, camera **cam, hitable **world, curandState *rand_state) {
    int i = threadIdx.x + blockIdx.x * blockDim.x;
    int j = threadIdx.y + blockIdx.y * blockDim.y;
    if(max_x <= i || max_y <= j) return;
    int pixel_index = j * max_x + i;
    curandState local_rand_state = rand_state[pixel_index];
    vec3 col(0, 0, 0);
    for(int s = 0; s < ns; ++s) {
        float u = float(i + curand_uniform(&local_rand_state)) / float(max_x);
        float v = float(j + curand_uniform(&local_rand_state)) / float(max_y);
        ray r = (*cam)->get_ray(u, v, &local_rand_state);
        col += color(r, world, &local_rand_state);
    }
    rand_state[pixel_index] = local_rand_state;
    col /= float(ns);
    col[0] = sqrt(col[0]);
    col[1] = sqrt(col[1]);
    col[2] = sqrt(col[2]);
    fb[pixel_index] = col;
}

// Создание сцены
__global__ void create_world(hitable **d_list, hitable **d_world, camera **d_camera, int nx, int ny, curandState *rand_state) {
    if (threadIdx.x == 0 && blockIdx.x == 0) {
        curandState local_rand_state = *rand_state;
        d_list[0] = new sphere(vec3(0, -1000, -1), 1000, new lambertian(vec3(0.5, 0.5, 0.5)));
        int i = 1;
        for(int a = -1; a < 2; ++a) {
            for(int b = -1; b < 2; ++b) {
                float choose_mat = curand_uniform(&local_rand_state);
                float radius = curand_uniform(&local_rand_state);
                vec3 center(a + 5 * curand_uniform(&local_rand_state), radius, b + 3 * curand_uniform(&local_rand_state));
                if(choose_mat < 0.8) // Диффузный материал
                    d_list[i++] = new sphere(center, radius,
                                             new lambertian(vec3(curand_uniform(&local_rand_state) * curand_uniform(&local_rand_state),
                                                                 curand_uniform(&local_rand_state) * curand_uniform(&local_rand_state),
                                                                 curand_uniform(&local_rand_state) * curand_uniform(&local_rand_state))));
                else if(choose_mat < 0.95) // Металлический материал
                    d_list[i++] = new sphere(center, radius,
                                             new metal(vec3(0.5 * (1 + curand_uniform(&local_rand_state)),
                                                            0.5 * (1 + curand_uniform(&local_rand_state)),
                                                            0.5 * (1 + curand_uniform(&local_rand_state))), 0.5 * curand_uniform(&local_rand_state)));
                else // Стеклянный материал
                    d_list[i++] = new sphere(center, radius, new dielectric(1.5));
            }
        }
        d_list[i++] = new sphere(vec3(0, 1, 0),  1, new dielectric(1.5));
        *rand_state = local_rand_state;
        *d_world  = new hitable_list(d_list, i);

        vec3 lookfrom(13, 2, 3);
        vec3 lookat(0, 0, 0);
        float dist_to_focus = 10;
        float aperture = 0.1;
        *d_camera = new camera(lookfrom, lookat, vec3(0, 1, 0), 30, nx / float(ny), aperture, dist_to_focus);
    }
}

// Удаление всех элементов сцены с CPU
__global__ void free_world(hitable **d_list, hitable **d_world, camera **d_camera) {
    for(int i = 0; i < 22 * 22 + 1 + 3; ++i) {
        delete ((sphere *)d_list[i])->mat_ptr;
        delete d_list[i];
    }
    delete *d_world;
    delete *d_camera;
}

int main() {
    int nx = 1200;
    int ny = 900;
    int ns = 10;
    int tx = 8;
    int ty = 8;

    cerr << "Rendering a " << nx << "x" << ny << " image with " << ns << " samples per pixel ";
    cerr << "in " << tx << "x" << ty << " blocks.\n";

    int num_pixels = nx * ny;
    size_t fb_size = num_pixels * sizeof(vec3);

    vec3 *fb;
    checkCudaErrors(cudaMallocManaged((void **)&fb, fb_size));

    curandState *d_rand_state;
    checkCudaErrors(cudaMalloc((void **)&d_rand_state, num_pixels * sizeof(curandState)));
    curandState *d_rand_state2;
    checkCudaErrors(cudaMalloc((void **)&d_rand_state2, sizeof(curandState)));

    rand_init<<<1,1>>>(d_rand_state2);
    checkCudaErrors(cudaGetLastError());
    checkCudaErrors(cudaDeviceSynchronize());

    hitable **d_list;
    int num_hitables = 22 * 22 + 1 + 3;
    checkCudaErrors(cudaMalloc((void **)&d_list, num_hitables*sizeof(hitable *)));
    hitable **d_world;
    checkCudaErrors(cudaMalloc((void **)&d_world, sizeof(hitable *)));
    camera **d_camera;
    checkCudaErrors(cudaMalloc((void **)&d_camera, sizeof(camera *)));
    create_world<<<1,1>>>(d_list, d_world, d_camera, nx, ny, d_rand_state2);
    checkCudaErrors(cudaGetLastError());
    checkCudaErrors(cudaDeviceSynchronize());

    clock_t start, stop;
    start = clock();
    dim3 blocks(nx / tx + 1, ny / ty + 1);
    dim3 threads(tx, ty);
    render_init<<<blocks, threads>>>(nx, ny, d_rand_state);
    checkCudaErrors(cudaGetLastError());
    checkCudaErrors(cudaDeviceSynchronize());
    render<<<blocks, threads>>>(fb, nx, ny,  ns, d_camera, d_world, d_rand_state);
    checkCudaErrors(cudaGetLastError());
    checkCudaErrors(cudaDeviceSynchronize());
    stop = clock();
    double timer_seconds = ((double)(stop - start)) / CLOCKS_PER_SEC;
    cerr << "took " << timer_seconds << " seconds.\n";

    EasyBMP::RGBColor black(0, 0, 0);
    string file_path = "out.bmp";
    EasyBMP::Image img(nx, ny, file_path.c_str(), black);
    cerr << "P3\n" << nx << " " << ny << "\n255\n";
    for (int j = 0; j < ny; ++j) for (int i = 0; i < nx; ++i) {
            size_t pixel_index = (ny - j - 1) * nx + i;
            int ir = int(255.99 * fb[pixel_index].r());
            int ig = int(255.99 * fb[pixel_index].g());
            int ib = int(255.99 * fb[pixel_index].b());
            img.SetPixel(i, j, EasyBMP::RGBColor(ir, ig, ib));
        }
    img.Write();

    checkCudaErrors(cudaDeviceSynchronize());
    free_world<<<1, 1>>>(d_list, d_world, d_camera);
    checkCudaErrors(cudaGetLastError());
    checkCudaErrors(cudaFree(d_camera));
    checkCudaErrors(cudaFree(d_world));
    checkCudaErrors(cudaFree(d_list));
    checkCudaErrors(cudaFree(d_rand_state));
    checkCudaErrors(cudaFree(d_rand_state2));
    checkCudaErrors(cudaFree(fb));

    cudaDeviceReset();
}


Rendering a 1200x900 image with 10 samples per pixel in 8x8 blocks.
took 0.106885 seconds.
P3
1200 900
255

