In [295]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git

Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-t_f81ilj
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-t_f81ilj
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit 28f872a2f99a1b201bcd0db14fdbc5a496b9bfd7
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [296]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [297]:
%%writefile kernel.cu
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "curand_kernel.h"
#include "cuda.h"
#include <ctime>
#include <stdio.h>
#include <cmath>
#include <fstream>
#include <iostream>

#define MAX_DEPTH 5
#define DIM 800
#define IMAGE_SIZE DIM * DIM * 4

#define SPHERESNUMBER 10
#define LIGHTSNUMBER 2
#define PI 3.14159265
const float fov = PI / 2.;

class Vec3f {
public:
    float x, y, z;
    __device__ Vec3f(float x = 0, float y = 0, float z = 0) : x(x), y(y), z(z) {}
    __device__ Vec3f operator-(const Vec3f& b) const { return Vec3f(x - b.x, y - b.y, z - b.z); }
    __device__ Vec3f operator+ (const Vec3f& b) const { return Vec3f(x + b.x, y + b.y, z + b.z); }
    __device__ Vec3f operator* (float b) const { return Vec3f(x * b, y * b, z * b); }
    __device__ float operator* (const Vec3f& b) const { return x * b.x + y * b.y + z * b.z; }
    __device__ Vec3f cross(const Vec3f& b) const { return Vec3f(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x); }
    __device__ float magnitude() const { return sqrtf(x * x + y * y + z * z); }
    __device__ Vec3f normalize() const { return *this * (1.0f / magnitude()); }
    __device__ Vec3f operator-() const { return Vec3f(-x, -y, -z); } // Negation operator
};

struct Light {
    __device__ Light(const Vec3f& position, const float& intensity) : position(position), intensity(intensity) {}
    Vec3f position;
    float intensity;
};

struct Material {
    __device__ Material(const Vec3f& albedo, const Vec3f& diffuse_color, const float& specular) : albedo(albedo), diffuse_color(diffuse_color), specular(specular) {}
    __device__ Material() : albedo(1, 0, 0), diffuse_color(1.0, 0.0, 0.0), specular(0) {}
    Vec3f albedo;
    Vec3f diffuse_color;
    float specular;
};

struct Sphere {
    Vec3f center;
    float radius;
    Material material;
    __device__ Sphere(const Vec3f& center, const float& radius, const Material& material) : center(center), radius(radius), material(material) {}
    __device__ bool RayIntersect(const Vec3f& ray_origin, const Vec3f& ray_direction, float& intersection_distance) const {
        Vec3f center_to_ray_origin = center - ray_origin;
        float projection_length = center_to_ray_origin * ray_direction;
        float squared_distance_from_center = center_to_ray_origin * center_to_ray_origin - projection_length * projection_length;
        if (squared_distance_from_center > radius * radius) {
            return false;
        }
        float half_circumference = sqrtf(radius * radius - squared_distance_from_center);
        intersection_distance = projection_length - half_circumference;
        float other_intersection_distance = projection_length + half_circumference;
        if (intersection_distance < 0) {
            intersection_distance = other_intersection_distance;
        }
        return intersection_distance >= 0;
    }
};

__device__ Vec3f Reflect(const Vec3f& directional, const Vec3f& normal) {
    return directional - normal * 2.f * (directional * normal);
}

__device__ bool SceneIntersect(const Vec3f& orig, const Vec3f& dir, Sphere* spheres, Vec3f& hit_point, Vec3f& normal, Material& mat) {
    float closest_sphere_dist = 1e6;
    for (int i = 0; i < SPHERESNUMBER; i++) {
        float dist_i;
        if (spheres[i].RayIntersect(orig, dir, dist_i) && dist_i < closest_sphere_dist) {
            closest_sphere_dist = dist_i;
            hit_point = orig + dir * dist_i;
            normal = (hit_point - spheres[i].center).normalize();
            mat = spheres[i].material;
        }
    }
    return closest_sphere_dist < 1e6;
}

__device__ Vec3f CastRay(const Vec3f& original, const Vec3f& directional, Sphere* spheres, Light* lights, int depth) {
    Vec3f point, N;
    Material material;

    if (depth > MAX_DEPTH || !SceneIntersect(original, directional, spheres, point, N, material)) {
        return Vec3f(170.0, 155.0, 200.0); // background color
    }

    Vec3f reflect_directional = Reflect(directional, N).normalize();
    Vec3f reflect_original = (reflect_directional * N < 0) ? point - N * 1e-3 : point + N * 1e-3; // offset to avoid occlusion
    Vec3f reflect_color = CastRay(reflect_original, reflect_directional, spheres, lights, depth + 1);

    float diffuse_light_intensity = 0, specular_light_intensity = 0;

    for (int i = 0; i < LIGHTSNUMBER; i++) {
        Vec3f light_dir = (lights[i].position - point).normalize();
        float light_distance = (lights[i].position - point).magnitude();

        diffuse_light_intensity += lights[i].intensity * fmaxf(0.f, light_dir * N);
        specular_light_intensity += powf(fmaxf(0.f, -Reflect(-light_dir, N) * directional), material.specular) * lights[i].intensity;
    }

    Vec3f color = material.diffuse_color * diffuse_light_intensity * material.albedo.x +
                  Vec3f(1.0, 1.0, 1.0) * specular_light_intensity * material.albedo.y +
                  reflect_color * material.albedo.z;

    // Clamp color values to [0, 255]
    color.x = fminf(fmaxf(color.x, 0), 255);
    color.y = fminf(fmaxf(color.y, 0), 255);
    color.z = fminf(fmaxf(color.z, 0), 255);

    return color;
}

__global__ void RayTracing(Sphere* spheres, Light* lights, unsigned char* image, int depth) {
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int pixelIndex = x + y * blockDim.x * gridDim.x;

    if (x < DIM && y < DIM) {
        float pixelX = (2 * (x + 0.5) / (float)DIM - 1) * tan(fov / 2.) * DIM / (float)DIM;
        float pixelY = -(2 * (y + 0.5) / (float)DIM - 1) * tan(fov / 2.);
        Vec3f rayDirection = Vec3f(pixelX, pixelY, 1).normalize();
        Vec3f rayColor = CastRay(Vec3f(0, 0, 0), rayDirection, spheres, lights, depth);

        image[pixelIndex * 4 + 0] = (unsigned char)(rayColor.x);
        image[pixelIndex * 4 + 1] = (unsigned char)(rayColor.y);
        image[pixelIndex * 4 + 2] = (unsigned char)(rayColor.z);
        image[pixelIndex * 4 + 3] = 255; // Alpha channel
    }
}

__global__ void GenerateSpheres(unsigned int seed, Sphere* spheres) {
    int index = threadIdx.x + blockIdx.x * blockDim.x;
    curandState_t state;

    if (index < SPHERESNUMBER) {
        curand_init(seed, index, 0, &state);
        float randomMaterialIndex = curand_uniform(&state);
        Material sphereMaterial;

    if (randomMaterialIndex > 0.66) {
        sphereMaterial = Material(Vec3f(2.0, 1.0, 0.5), Vec3f(1.0, 0.5, 0.2), 100.);
    } else if (randomMaterialIndex < 0.33) {
        sphereMaterial = Material(Vec3f(0.0, 10.0, 0.8), Vec3f(1.0, 1.0, 1.0), 1425.);
    } else {
        sphereMaterial = Material(Vec3f(0.5, 0.7, 1.0), Vec3f(0.1, 0.2, 0.5), 50.);
    }

        spheres[index].material = sphereMaterial;
        spheres[index].center.x = (curand_uniform(&state) * 2 - 1) * DIM / 20; // Random x coordinate
        spheres[index].center.y = (curand_uniform(&state) * 2 - 1) * DIM / 20; // Random y coordinate
        spheres[index].center.z = 100 + curand_uniform(&state) * 50; // Random z coordinate
        spheres[index].radius = curand_uniform(&state) * DIM / 200 + 5; // Random radius
    }
}

__global__ void GenerateLights(unsigned int seed, Light* lights) {
    int ind = threadIdx.x + blockIdx.x * blockDim.x;
    curandState_t state;

    if (ind < LIGHTSNUMBER) {
        curand_init(seed, ind, 0, &state);
        lights[ind].intensity = 255;
        lights[ind].position.x = (2 * curand_uniform(&state) - 1) * DIM / 2; // Random x coordinate
        lights[ind].position.y = (2 * curand_uniform(&state) - 1) * DIM / 2; // Random y coordinate
        lights[ind].position.z = (2 * curand_uniform(&state) - 1) * DIM / 2; // Random z coordinate
    }
}

#pragma pack(push, 1)
struct BMPHeader {
    uint16_t bfType = 0x4D42; // 'BM'
    uint32_t bfSize;          // File size in bytes
    uint16_t bfReserved1 = 0;
    uint16_t bfReserved2 = 0;
    uint32_t bfOffBits;       // Offset to the start of image data
};

struct DIBHeader {
    uint32_t biSize = 40;     // Size of DIB structure
    int32_t  biWidth;         // Image width
    int32_t  biHeight;        // Image height
    uint16_t biPlanes = 1;    // Number of color planes
    uint16_t biBitCount = 32; // Bits per pixel (32 bits = 4 bytes)
    uint32_t biCompression = 0; // No compression
    uint32_t biSizeImage;     // Size of image data in bytes
    int32_t  biXPelsPerMeter = 0; // Horizontal resolution
    int32_t  biYPelsPerMeter = 0; // Vertical resolution
    uint32_t biClrUsed = 0;    // Number of colors used
    uint32_t biClrImportant = 0; // Number of important colors
};
#pragma pack(pop)

void saveBMP(const char* filename, unsigned char* data, int width, int height) {
    BMPHeader bmpHeader;
    DIBHeader dibHeader;

    dibHeader.biWidth = width;
    dibHeader.biHeight = height;
    dibHeader.biSizeImage = width * height * 4; // 4 bytes per pixel (RGBA)

    bmpHeader.bfOffBits = sizeof(BMPHeader) + sizeof(DIBHeader);
    bmpHeader.bfSize = bmpHeader.bfOffBits + dibHeader.biSizeImage;

    std::ofstream file(filename, std::ios::binary);
    if (!file) {
        return;
    }

    file.write(reinterpret_cast<const char*>(&bmpHeader), sizeof(bmpHeader));
    file.write(reinterpret_cast<const char*>(&dibHeader), sizeof(dibHeader));
    // Преобразование данных из RGBA в BGRA
    for (int i = 0; i < width * height; ++i) {
        unsigned char r = data[i * 4 + 0]; // Красный
        unsigned char g = data[i * 4 + 1]; // Зеленый
        unsigned char b = data[i * 4 + 2]; // Синий
        unsigned char a = data[i * 4 + 3]; // Альфа

        // Записываем в формате BGRA
        data[i * 4 + 0] = b; // Синий
        data[i * 4 + 1] = g; // Зеленый
        data[i * 4 + 2] = r; // Красный
        data[i * 4 + 3] = a; // Альфа
    }

    file.write(reinterpret_cast<const char*>(data), dibHeader.biSizeImage);

    file.close();
}


struct DataBlock {
    unsigned char* bitmap;
    Sphere* spheres;
};

int main() {
    DataBlock data;
    data.bitmap = new unsigned char[IMAGE_SIZE];

    cudaEvent_t start, stop;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    cudaEventRecord(start, 0);

    unsigned char* dev_bitmap;
    Sphere* spheres;
    Light* lights;

    cudaMalloc((void**)&dev_bitmap, IMAGE_SIZE);
    cudaMalloc((void**)&spheres, sizeof(Sphere) * SPHERESNUMBER);
    cudaMalloc((void**)&lights, sizeof(Light) * LIGHTSNUMBER);

    dim3 grids(DIM / 16, DIM / 16);
    dim3 threads(16, 16);

    GenerateSpheres<<<1, SPHERESNUMBER>>>(time(0), spheres);
    cudaDeviceSynchronize();

    GenerateLights<<<1, LIGHTSNUMBER>>>(time(0), lights);
    cudaDeviceSynchronize();

    RayTracing<<<grids, threads>>>(spheres, lights, dev_bitmap, 1);
    cudaMemcpy(data.bitmap, dev_bitmap, IMAGE_SIZE, cudaMemcpyDeviceToHost);

    cudaEventRecord(stop, 0);
    cudaEventSynchronize(stop);

    float elapsed_time;
    cudaEventElapsedTime(&elapsed_time, start, stop);
    printf("Time: %f ms\n", elapsed_time);

    cudaEventDestroy(start);
    cudaEventDestroy(stop);
    cudaFree(dev_bitmap);
    cudaFree(spheres);
    cudaFree(lights);

    saveBMP("result.bmp", data.bitmap, DIM, DIM);
    delete[] data.bitmap;

    return 0;
}

Overwriting kernel.cu


In [None]:
# Компилируем код
!nvcc kernel.cu -o kernel

# Запускаем исполняемый файл
!./kernel