<a href="https://colab.research.google.com/github/hfathie/qso/blob/master/getCsound_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
%%writefile test.cu
#include <iostream> // iostream, fstream, cmath, string, vector, sstream.
#include <fstream>
#include <cmath>
#include <string>
#include <vector>
#include <sstream>
using namespace std;

const int N = 131504;

float mH = 1.6726e-24; // gram
float kB = 1.3807e-16; // cm2 g s-2 K-1
float mH2 = 2.7f * mH; 

float M_sun = 1.98992e+33; // gram
float grav_const_in_cgs = 6.67259e-8;// cm3 g-1 s-2

float G = 1.0f;

float gammah = 5.0f/3.0f;

float UnitRadius_in_pc = 2.0f;
float UnitRadius_in_cm = 3.086e18 * UnitRadius_in_pc;

float UnitMass_in_g = 10.0f * M_sun;
float UnitDensity_in_cgs = UnitMass_in_g / UnitRadius_in_cm/UnitRadius_in_cm/UnitRadius_in_cm;
float Unit_u_in_cgs = grav_const_in_cgs * UnitMass_in_g / UnitRadius_in_cm;
float Unit_P_in_cgs = UnitDensity_in_cgs * Unit_u_in_cgs;

float unitVelocity = sqrt(grav_const_in_cgs * UnitMass_in_g / UnitRadius_in_cm);


__global__ void getCsound(float *csnd, float *rho, float T_cld, float T_ps, float T_0, float kBmH2,
                          float UnitDensity_in_cgs, float unitVelocity, float gammah){

  int i = threadIdx.x + blockIdx.x * blockDim.x;

  if(i < N){

    float rhot = rho[i] * UnitDensity_in_cgs;

    if(rhot <= 1e-21){
      csnd[i] = sqrt(kBmH2 * T_cld) / unitVelocity;
    }

    if((rhot > 1e-21) && (rhot <= 2e-21)){
      csnd[i] = sqrt(kBmH2 * gammah * T_cld * pow((rhot/2e-21), (gammah - 1.0f))) / unitVelocity;
    }

    if((rhot > 2e-21) && (rhot <= 1e-18)){
      csnd[i] = sqrt(kBmH2 * T_ps) / unitVelocity;
    }

    if(rhot > 1e-18){
      csnd[i] = sqrt(kBmH2 * T_0 * (1.0f + gammah * pow((rhot/1e-14), (gammah - 1.0f)))) / unitVelocity;
    }
  }
}


int main(){

  // Reading Hydra file.
  string fname = "Hydra_130k.csv";

  vector<vector<string>> content;
  vector<string> row;
  string line, word;
  
  fstream file (fname, ios::in);
  if(file.is_open())
  {
  while(getline(file, line))
  {
  row.clear();
  
  stringstream str(line);
  
  while(getline(str, word, ','))
  row.push_back(word);
  content.push_back(row);
  }
  }
  else
  cout<<"Could not open the file\n";

  float *csnd, *d_csnd, *rho, *d_rho, T_cld, T_ps, T_0, kBmH2;

  T_cld = 10.0f;
  T_ps = 10.0f;
  T_0 = 10.0f;

  kBmH2 = kB/mH2;

  rho = new float[N];
  csnd = new float[N];

  // 0  1  2  3   4   5    6   7  8  9  10
  // x, y, z, vx, vy, vz, rho, P, c, h, m.

  for(int i=0; i<N; i++){

    rho[i] = stof(content[i][6]);
    csnd[i] = 0.0f;
  }

  cudaMalloc(&d_rho, N*sizeof(float));
  cudaMalloc(&d_csnd, N*sizeof(float));

  // Copy from Host to Device.
  cudaMemcpy(d_rho, rho, N*sizeof(float), cudaMemcpyHostToDevice);
  cudaMemcpy(d_csnd, csnd, N*sizeof(float), cudaMemcpyHostToDevice);

  // Launching the kernel on GPU
  int blockSize = 256; // number of threads in a block
  int gridSize = (N + blockSize - 1) / blockSize; // Number of blocks in a grid

  getCsound<<<gridSize, blockSize>>>(d_csnd, d_rho, T_cld,
                                       T_ps, T_0, kBmH2,
                                       UnitDensity_in_cgs,
                                       unitVelocity,
                                       gammah);

  // Wait for the GPU to finish before accessing the Host
  cudaDeviceSynchronize();

  // Copy from Device to Host.
  cudaMemcpy(csnd, d_csnd, N*sizeof(float), cudaMemcpyDeviceToHost);

  // visual inspection
  for(int i = 0; i < 10; i++){
    cout << csnd[i] << endl;
  }


}

Overwriting test.cu


In [11]:
%%shell
nvcc test.cu -o test



In [12]:
%%shell
./test

1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212




In [13]:
%%shell
nvprof ./test

==380== NVPROF is profiling process 380, command: ./test
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
==380== Profiling application: ./test
==380== Profiling result:
            Type  Time(%)      Time     Calls       Avg       Min       Max  Name
 GPU activities:   56.40%  91.102us         2  45.551us  45.503us  45.599us  [CUDA memcpy HtoD]
                   25.99%  41.983us         1  41.983us  41.983us  41.983us  [CUDA memcpy DtoH]
                   17.61%  28.447us         1  28.447us  28.447us  28.447us  getCsound(float*, float*, float, float, float, float, float, float, float)
      API calls:   99.63%  304.97ms         2  152.49ms  4.3260us  304.97ms  cudaMalloc
                    0.14%  437.76us         3  145.92us  134.99us  163.83us  cudaMemcpy
                    0.12%  361.66us         1  361.66us  361.66us  361.66us  cuDeviceTotalMem
                    0.06%  192.71us       101  1.9080us     129ns  98.157us  cuDeviceGetAttribute
     

