<a href="https://colab.research.google.com/github/hfathie/qso/blob/master/getPressure_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
%%writefile test.cu
#include <iostream>
#include <fstream>
#include <cmath>
#include <string>
#include <vector>
#include <sstream>
using namespace std;

const int N = 131504;

float mH = 1.6726e-24; // gram
float kB = 1.3807e-16; // cm2 g s-2 K-1
float mH2 = 2.7f * mH; 

float M_sun = 1.98992e+33; // gram
float grav_const_in_cgs = 6.67259e-8;// cm3 g-1 s-2

float G = 1.0f;

float gammah = 5.0f/3.0f;

float UnitRadius_in_pc = 2.0f;
float UnitRadius_in_cm = 3.086e18 * UnitRadius_in_pc;

float UnitMass_in_g = 10.0f * M_sun;
float UnitDensity_in_cgs = UnitMass_in_g / UnitRadius_in_cm/UnitRadius_in_cm/UnitRadius_in_cm;
float Unit_u_in_cgs = grav_const_in_cgs * UnitMass_in_g / UnitRadius_in_cm;
float Unit_P_in_cgs = UnitDensity_in_cgs * Unit_u_in_cgs;


__global__ void getPressure(float *P, float *rho, float T_cld, float T_ps, float T_0, float kBmH2,
                            float UnitDensity_in_cgs, float Unit_P_in_cgs, float gammah){

  int i = threadIdx.x + blockIdx.x * blockDim.x;

  if(i < N){

    float rhot = rho[i] * UnitDensity_in_cgs;

    if(rhot <= 1e-21){
      P[i] = rhot * kBmH2 * T_cld / Unit_P_in_cgs;
    }

    if((rhot > 1e-21) && (rhot <= 2e-21)){
      P[i] = rhot * kBmH2 * gammah * T_cld * pow((rhot/2e-21), (gammah - 1.0f)) / Unit_P_in_cgs;
    }

    if((rhot > 2e-21) && (rhot <= 1e-18)){
      P[i] = rhot * kBmH2 * T_ps / Unit_P_in_cgs;
    }

    if(rhot > 1e-18){
      P[i] = rhot * kBmH2 * T_0 * (1.0f + gammah * pow((rhot/1e-14), (gammah - 1.0f))) / Unit_P_in_cgs;
    }
  }
}


int main(){

  // Reading Hydra file.
  string fname = "Hydra_130k.csv";

  vector<vector<string>> content;
  vector<string> row;
  string line, word;
  
  fstream file (fname, ios::in);
  if(file.is_open())
  {
  while(getline(file, line))
  {
  row.clear();
  
  stringstream str(line);
  
  while(getline(str, word, ','))
  row.push_back(word);
  content.push_back(row);
  }
  }
  else
  cout<<"Could not open the file\n";

  float *P, *d_P, *rho, *d_rho, T_cld, T_ps, T_0, kBmH2;

  T_cld = 10.0f;
  T_ps = 10.0f;
  T_0 = 10.0f;

  kBmH2 = kB/mH2;

  rho = new float[N];
  P = new float[N];

  // 0  1  2  3   4   5    6   7  8  9  10
  // x, y, z, vx, vy, vz, rho, P, c, h, m.

  for(int i=0; i<N; i++){

    rho[i] = stof(content[i][6]);
    P[i] = 0.0f;
  }

  cudaMalloc(&d_rho, N*sizeof(float));
  cudaMalloc(&d_P, N*sizeof(float));

  // Copy from Host to Device.
  cudaMemcpy(d_rho, rho, N*sizeof(float), cudaMemcpyHostToDevice);
  cudaMemcpy(d_P, P, N*sizeof(float), cudaMemcpyHostToDevice);

  // Launching the kernel on GPU
  int blockSize = 256; // number of threads in a block
  int gridSize = (N + blockSize - 1) / blockSize; // Number of blocks in a grid

  getPressure<<<gridSize, blockSize>>>(d_P, d_rho, T_cld,
                                       T_ps, T_0, kBmH2,
                                       UnitDensity_in_cgs,
                                       Unit_P_in_cgs,
                                       gammah);

  // Wait for the GPU to finish before accessing the Host
  cudaDeviceSynchronize();

  // Copy from Device to Host.
  cudaMemcpy(P, d_P, N*sizeof(float), cudaMemcpyDeviceToHost);

  // visual inspection
  for(int i = N-10; i < N; i++){
    cout << P[i] << endl;
  }


}

Overwriting test.cu


In [26]:
%%shell
nvcc test.cu -o test



In [27]:
%%shell
./test

0.590059
0.449991
0.314419
0.459549
0.291624
0.203724
0.30299
0.848239
0.241866
0.247042




In [28]:
%%shell
nvprof ./test

==611== NVPROF is profiling process 611, command: ./test
0.590059
0.449991
0.314419
0.459549
0.291624
0.203724
0.30299
0.848239
0.241866
0.247042
==611== Profiling application: ./test
==611== Profiling result:
            Type  Time(%)      Time     Calls       Avg       Min       Max  Name
 GPU activities:   56.34%  90.848us         2  45.424us  45.280us  45.568us  [CUDA memcpy HtoD]
                   26.02%  41.951us         1  41.951us  41.951us  41.951us  [CUDA memcpy DtoH]
                   17.64%  28.447us         1  28.447us  28.447us  28.447us  getPressure(float*, float*, float, float, float, float, float, float, float)
      API calls:   99.65%  315.51ms         2  157.75ms  6.6220us  315.50ms  cudaMalloc
                    0.15%  480.01us         3  160.00us  149.21us  167.33us  cudaMemcpy
                    0.11%  343.25us         1  343.25us  343.25us  343.25us  cuDeviceTotalMem
                    0.05%  150.83us       101  1.4930us     128ns  64.664us  cuDeviceGetAttr

