<a href="https://colab.research.google.com/github/hfathie/hfvSPH_on_GPU/blob/main/getPressure_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
%%writefile test.cu
#include <iostream>
#include <fstream>
#include <cmath>
#include <string>
#include <vector>
#include <sstream>
#include "myCppSPHLibs.h"
using namespace std;

const int N = 131504;

float mH = 1.6726e-24; // gram
float kB = 1.3807e-16; // cm2 g s-2 K-1
float mH2 = 2.7f * mH; 

float M_sun = 1.98992e+33; // gram
float grav_const_in_cgs = 6.67259e-8;// cm3 g-1 s-2

float G = 1.0f;

float gammah = 5.0f/3.0f;

float UnitRadius_in_pc = 2.0f;
float UnitRadius_in_cm = 3.086e18 * UnitRadius_in_pc;

float UnitMass_in_g = 10.0f * M_sun;
float UnitDensity_in_cgs = UnitMass_in_g / UnitRadius_in_cm/UnitRadius_in_cm/UnitRadius_in_cm;
float Unit_u_in_cgs = grav_const_in_cgs * UnitMass_in_g / UnitRadius_in_cm;
float Unit_P_in_cgs = UnitDensity_in_cgs * Unit_u_in_cgs;


int main(){

  // Reading Hydra file.
  string fname = "Hydra_130k.csv";

  vector<vector<string>> content;
  vector<string> row;
  string line, word;
  
  fstream file (fname, ios::in);
  if(file.is_open())
  {
  while(getline(file, line))
  {
  row.clear();
  
  stringstream str(line);
  
  while(getline(str, word, ','))
  row.push_back(word);
  content.push_back(row);
  }
  }
  else
  cout<<"Could not open the file\n";

  float *P, *d_P, *rho, *d_rho, T_cld, T_ps, T_0, kBmH2;

  T_cld = 10.0f;
  T_ps = 10.0f;
  T_0 = 10.0f;

  kBmH2 = kB/mH2;

  rho = new float[N];
  P = new float[N];

  // 0  1  2  3   4   5    6   7  8  9  10
  // x, y, z, vx, vy, vz, rho, P, c, h, m.

  for(int i=0; i<N; i++){

    rho[i] = stof(content[i][6]);
    P[i] = 0.0f;
  }

  cudaMalloc(&d_rho, N*sizeof(float));
  cudaMalloc(&d_P, N*sizeof(float));

  // Copy from Host to Device.
  cudaMemcpy(d_rho, rho, N*sizeof(float), cudaMemcpyHostToDevice);
  cudaMemcpy(d_P, P, N*sizeof(float), cudaMemcpyHostToDevice);

  // Launching the kernel on GPU
  int blockSize = 256; // number of threads in a block
  int gridSize = (N + blockSize - 1) / blockSize; // Number of blocks in a grid

  getPressure<<<gridSize, blockSize>>>(d_P, d_rho, T_cld,
                                       T_ps, T_0, kBmH2,
                                       UnitDensity_in_cgs,
                                       Unit_P_in_cgs,
                                       gammah, N);

  // Wait for the GPU to finish before accessing the Host
  cudaDeviceSynchronize();

  // Copy from Device to Host.
  cudaMemcpy(P, d_P, N*sizeof(float), cudaMemcpyDeviceToHost);

  // visual inspection
  for(int i = 0; i < 10; i++){
    cout << P[i] << endl;
  }


}

Overwriting test.cu


In [8]:
%%shell
nvcc test.cu -o test



In [9]:
%%shell
./test

0.50391
0.351792
0.577481
0.210833
0.707346
0.4072
0.397506
0.598539
0.465576
0.351517




In [10]:
%%shell
nvprof ./test

==275== NVPROF is profiling process 275, command: ./test
0.50391
0.351792
0.577481
0.210833
0.707346
0.4072
0.397506
0.598539
0.465576
0.351517
==275== Profiling application: ./test
==275== Profiling result:
            Type  Time(%)      Time     Calls       Avg       Min       Max  Name
 GPU activities:   56.83%  93.279us         2  46.639us  45.439us  47.840us  [CUDA memcpy HtoD]
                   25.74%  42.239us         1  42.239us  42.239us  42.239us  [CUDA memcpy DtoH]
                   17.43%  28.607us         1  28.607us  28.607us  28.607us  getPressure(float*, float*, float, float, float, float, float, float, float, int)
      API calls:   99.64%  310.43ms         2  155.21ms  4.8140us  310.42ms  cudaMalloc
                    0.14%  430.52us         3  143.51us  129.11us  160.13us  cudaMemcpy
                    0.13%  396.81us         1  396.81us  396.81us  396.81us  cuDeviceTotalMem
                    0.05%  164.33us       101  1.6270us     127ns  69.135us  cuDeviceGetA

