<a href="https://colab.research.google.com/github/chi-yan/cuda_collatz_lengths/blob/main/CUDA_Collatz_Length_Calculator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This program calculates the Collatz length of random integers, and uses GPU parallel computing in CUDA.

Adapted from:

https://www.udemy.com/course/introduction-to-parallel-programming-using-gpgpu-and-cuda/learn/lecture/8077270#learning-tools

and CUDA Vector addition tutorials online

Notebook written for the Google Colab environment


In [9]:
%%writefile collatz.cu

#include "stdio.h"

__global__ 
void calc_length(int *value, int *length) {
    int index = threadIdx.x + blockIdx.x * blockDim.x;
    int x = value[index];
    while (x != 1){
      if (x%2 == 0)
        x = x / 2;
      else
        x = 3*x + 1;
      // breaking condition is needed in case number overflows
      if (x < 0) { 
          length[index] = -1;
          break;
      }

      length[index] += 1;
    }

    printf("Block: %d Thread: %d Index: %d Starting Value: %d, Length: %d\n", blockIdx.x, threadIdx.x, index, value[index], length[index]);
}

void random_ints(int* value, int N) {
   for (int i = 0; i < N; ++i)
    value[i] = rand() % 10000000;
}

void initialize_lengths(int *length, int N) {
  for (int i= 0; i< N; ++i) {
    length[i] = 0;
  }

}

#define N (16*16)
#define THREADS_PER_BLOCK 8
int main(void) {
    
    int *value, *length;// host copies of value, length
    int *d_value, *d_length;// device copies of value, length
    int size = N * sizeof(int);
  
    // Copy inputs to device
    cudaMalloc((void **)&d_value, size);
    cudaMalloc((void **)&d_length, size);

    value = (int *)malloc(size); random_ints(value, N);
    length = (int *)malloc(size); initialize_lengths(length, N);

    // Copy inputs to device
    cudaMemcpy(d_value, value, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_length, length, size, cudaMemcpyHostToDevice);
    // Launch add() kernel on GPU with N blocks
    calc_length<<<N/THREADS_PER_BLOCK, THREADS_PER_BLOCK>>>(d_value, d_length);
    // Copy result back to host
    cudaMemcpy(length, d_length, size, cudaMemcpyDeviceToHost);

    // Cleanup
    free(value); free(length); 
    cudaFree(d_value); cudaFree(d_length);
    return 0;
}

Overwriting collatz.cu


In [7]:
!nvcc collatz.cu 
!./a.out

Block: 27 Thread: 0 Index: 216 Starting Value: 6043324, Length: 152
Block: 27 Thread: 1 Index: 217 Starting Value: 6987743, Length: 129
Block: 27 Thread: 2 Index: 218 Starting Value: 9259470, Length: 132
Block: 27 Thread: 3 Index: 219 Starting Value: 9512183, Length: 101
Block: 27 Thread: 4 Index: 220 Starting Value: 298490, Length: 189
Block: 27 Thread: 5 Index: 221 Starting Value: 8295499, Length: 199
Block: 27 Thread: 6 Index: 222 Starting Value: 6689772, Length: 85
Block: 27 Thread: 7 Index: 223 Starting Value: 6206725, Length: 152
Block: 10 Thread: 0 Index: 80 Starting Value: 7171087, Length: 129
Block: 10 Thread: 1 Index: 81 Starting Value: 6426808, Length: 165
Block: 10 Thread: 2 Index: 82 Starting Value: 5117276, Length: 113
Block: 10 Thread: 3 Index: 83 Starting Value: 9947178, Length: 132
Block: 10 Thread: 4 Index: 84 Starting Value: 695788, Length: 105
Block: 10 Thread: 5 Index: 85 Starting Value: 9393584, Length: 176
Block: 10 Thread: 6 Index: 86 Starting Value: 1705403, Le