In [4]:
%%writefile MIRROR.cu

Overwriting MIRROR.cu


In [8]:
%%writefile MIRROR.cu
#include <stdio.h>
#include <cuda_runtime.h>

#define N 1024  // Number of elements
#define ETA 0.5f // Larger learning rate

// Mirror Maps
#define EUCLIDEAN         0  // Standard gradient descent
#define NEGATIVE_ENTROPY  1  // Exponentiated gradient descent
#define LOG_BARRIER       2  // Positive orthant

__global__ void mirror_descent(float *x, float *grad, float eta, int mirror_map, int n) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i >= n) return;

    float new_x = x[i];

    // Add printf for debugging
    if (i < 10) { // Print for the first 10 threads
        printf("Thread %d: Initial x[%d] = %f, grad[%d] = %f\n", i, i, x[i], i, grad[i]);
    }

    switch (mirror_map) {
        case EUCLIDEAN:
            new_x = x[i] - eta * grad[i];
            break;

        case NEGATIVE_ENTROPY:
            new_x = x[i] * expf(-eta * grad[i]); // Ensure updates are visible
            break;

        case LOG_BARRIER:
            new_x = x[i] / (1.0f + eta * grad[i]);
            break;

        default:
            new_x = x[i];
    }

    x[i] = new_x;

    // Add printf for debugging after update
    if (i < 10) { // Print for the first 10 threads
        printf("Thread %d: Updated x[%d] = %f\n", i, i, x[i]);
    }
}

void checkCuda(cudaError_t result, const char *msg) {
    if (result != cudaSuccess) {
        fprintf(stderr, "CUDA Error: %s (%s)\n", msg, cudaGetErrorString(result));
        exit(-1);
    }
}

int main() {
    float *x, *grad, *d_x, *d_grad;
    int mirror_map = NEGATIVE_ENTROPY; // Choose the method

    // Allocate memory
    x = (float*)malloc(N * sizeof(float));
    grad = (float*)malloc(N * sizeof(float));
    checkCuda(cudaMalloc(&d_x, N * sizeof(float)), "Alloc d_x");
    checkCuda(cudaMalloc(&d_grad, N * sizeof(float)), "Alloc d_grad");

    // Initialize x and grad
    for (int i = 0; i < N; i++) {
        x[i] = 1.0f;  // Start with x_t = 1
        grad[i] = 0.5f * i; // Larger gradient values for visible updates
    }

    // Copy to GPU
    checkCuda(cudaMemcpy(d_x, x, N * sizeof(float), cudaMemcpyHostToDevice), "Memcpy x -> d_x");
    checkCuda(cudaMemcpy(d_grad, grad, N * sizeof(float), cudaMemcpyHostToDevice), "Memcpy grad -> d_grad");

    // Kernel execution
    int blockSize = 256;
    int numBlocks = (N + blockSize - 1) / blockSize;
    mirror_descent<<<numBlocks, blockSize>>>(d_x, d_grad, ETA, mirror_map, N);
    checkCuda(cudaDeviceSynchronize(), "Kernel execution");

    // Copy results back
    checkCuda(cudaMemcpy(x, d_x, N * sizeof(float), cudaMemcpyDeviceToHost), "Memcpy d_x -> x");

    // Print first 10 results
    for (int i = 0; i < 10; i++) {
        printf("Host: x[%d] = %f\n", i, x[i]);
    }

    // Cleanup
    free(x);
    free(grad);
    cudaFree(d_x);
    cudaFree(d_grad);

    return 0;
}

Overwriting MIRROR.cu


In [9]:
!nvcc MIRROR.cu -o MIRROR -gencode arch=compute_75,code=sm_75 -lcublas

!./MIRROR.cu

/bin/bash: line 1: ./MIRROR.cu: Permission denied


In [10]:
!chmod +x MIRROR
!./MIRROR

Thread 0: Initial x[0] = 1.000000, grad[0] = 0.000000
Thread 1: Initial x[1] = 1.000000, grad[1] = 0.500000
Thread 2: Initial x[2] = 1.000000, grad[2] = 1.000000
Thread 3: Initial x[3] = 1.000000, grad[3] = 1.500000
Thread 4: Initial x[4] = 1.000000, grad[4] = 2.000000
Thread 5: Initial x[5] = 1.000000, grad[5] = 2.500000
Thread 6: Initial x[6] = 1.000000, grad[6] = 3.000000
Thread 7: Initial x[7] = 1.000000, grad[7] = 3.500000
Thread 8: Initial x[8] = 1.000000, grad[8] = 4.000000
Thread 9: Initial x[9] = 1.000000, grad[9] = 4.500000
Thread 0: Updated x[0] = 1.000000
Thread 1: Updated x[1] = 0.778801
Thread 2: Updated x[2] = 0.606531
Thread 3: Updated x[3] = 0.472367
Thread 4: Updated x[4] = 0.367879
Thread 5: Updated x[5] = 0.286505
Thread 6: Updated x[6] = 0.223130
Thread 7: Updated x[7] = 0.173774
Thread 8: Updated x[8] = 0.135335
Thread 9: Updated x[9] = 0.105399
Host: x[0] = 1.000000
Host: x[1] = 0.778801
Host: x[2] = 0.606531
Host: x[3] = 0.472367
Host: x[4] = 0.367879
Host: x[5]