<a href="https://colab.research.google.com/github/hfathie/hfvSPH_on_GPU/blob/main/getCsound.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
%%writefile test.cu
#include <iostream> // iostream, fstream, cmath, string, vector, sstream.
#include <fstream>
#include <cmath>
#include <string>
#include <vector>
#include <sstream>
#include "myCppSPHLibs.h"
using namespace std;

const int N = 131504;

float mH = 1.6726e-24; // gram
float kB = 1.3807e-16; // cm2 g s-2 K-1
float mH2 = 2.7f * mH; 

float M_sun = 1.98992e+33; // gram
float grav_const_in_cgs = 6.67259e-8;// cm3 g-1 s-2

float G = 1.0f;

float gammah = 5.0f/3.0f;

float UnitRadius_in_pc = 2.0f;
float UnitRadius_in_cm = 3.086e18 * UnitRadius_in_pc;

float UnitMass_in_g = 10.0f * M_sun;
float UnitDensity_in_cgs = UnitMass_in_g / UnitRadius_in_cm/UnitRadius_in_cm/UnitRadius_in_cm;
float Unit_u_in_cgs = grav_const_in_cgs * UnitMass_in_g / UnitRadius_in_cm;
float Unit_P_in_cgs = UnitDensity_in_cgs * Unit_u_in_cgs;

float unitVelocity = sqrt(grav_const_in_cgs * UnitMass_in_g / UnitRadius_in_cm);


int main(){

  // Reading Hydra file.
  string fname = "Hydra_130k.csv";

  vector<vector<string>> content;
  vector<string> row;
  string line, word;
  
  fstream file (fname, ios::in);
  if(file.is_open())
  {
  while(getline(file, line))
  {
  row.clear();
  
  stringstream str(line);
  
  while(getline(str, word, ','))
  row.push_back(word);
  content.push_back(row);
  }
  }
  else
  cout<<"Could not open the file\n";

  float *csnd, *d_csnd, *rho, *d_rho, T_cld, T_ps, T_0, kBmH2;

  T_cld = 10.0f;
  T_ps = 10.0f;
  T_0 = 10.0f;

  kBmH2 = kB/mH2;

  rho = new float[N];
  csnd = new float[N];

  // 0  1  2  3   4   5    6   7  8  9  10
  // x, y, z, vx, vy, vz, rho, P, c, h, m.

  for(int i=0; i<N; i++){

    rho[i] = stof(content[i][6]);
    csnd[i] = 0.0f;
  }

  cudaMalloc(&d_rho, N*sizeof(float));
  cudaMalloc(&d_csnd, N*sizeof(float));

  // Copy from Host to Device.
  cudaMemcpy(d_rho, rho, N*sizeof(float), cudaMemcpyHostToDevice);
  cudaMemcpy(d_csnd, csnd, N*sizeof(float), cudaMemcpyHostToDevice);

  // Launching the kernel on GPU
  int blockSize = 256; // number of threads in a block
  int gridSize = (N + blockSize - 1) / blockSize; // Number of blocks in a grid

  getCsound<<<gridSize, blockSize>>>(d_csnd, d_rho, T_cld,
                                       T_ps, T_0, kBmH2,
                                       UnitDensity_in_cgs,
                                       unitVelocity,
                                       gammah, N);

  // Wait for the GPU to finish before accessing the Host
  cudaDeviceSynchronize();

  // Copy from Device to Host.
  cudaMemcpy(csnd, d_csnd, N*sizeof(float), cudaMemcpyDeviceToHost);

  // visual inspection
  for(int i = 0; i < 10; i++){
    cout << csnd[i] << endl;
  }

  return 0;
}

Overwriting test.cu


In [9]:
%%shell
nvcc test.cu -o test



In [11]:
%%shell
./test

1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212




In [12]:
%%shell
nvprof ./test

==303== NVPROF is profiling process 303, command: ./test
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
1.19212
==303== Profiling application: ./test
==303== Profiling result:
            Type  Time(%)      Time     Calls       Avg       Min       Max  Name
 GPU activities:   56.61%  91.935us         2  45.967us  45.888us  46.047us  [CUDA memcpy HtoD]
                   25.87%  42.016us         1  42.016us  42.016us  42.016us  [CUDA memcpy DtoH]
                   17.52%  28.448us         1  28.448us  28.448us  28.448us  getCsound(float*, float*, float, float, float, float, float, float, float)
      API calls:   99.62%  291.51ms         2  145.75ms  5.1590us  291.50ms  cudaMalloc
                    0.16%  466.05us         3  155.35us  138.65us  165.58us  cudaMemcpy
                    0.13%  370.54us         1  370.54us  370.54us  370.54us  cuDeviceTotalMem
                    0.05%  151.74us       101  1.5020us     124ns  64.214us  cuDeviceGetAttribute
     

