Skip to content

Commit

Permalink
version 2.0, interleaved streams
Browse files Browse the repository at this point in the history
  • Loading branch information
sigrimm committed Apr 29, 2020
1 parent 70fbd8d commit 052c56c
Show file tree
Hide file tree
Showing 6 changed files with 1,666 additions and 1,149 deletions.
79 changes: 58 additions & 21 deletions define.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

#ifndef M_PI
#define _USE_MATH_DEFINES //for Windows
#endif
Expand Down Expand Up @@ -25,7 +26,7 @@
#endif


#define VERSION 1.721
#define VERSION 2.0


#define def_T0 296.0 //Reference Temperature in K
Expand All @@ -41,14 +42,24 @@

#define def_TOL 1.43e-17 //Tolerance in the Voigt function 3.58e-9 2.48e-12 1.43e-17 3.25e-27 1.69e-33
#define def_TOLF 2.48e-12f //Tolerance in the Voigt function
#define def_nthmax 32768 //Maximum number of threads in 2.0 Cards
#define def_nthmax 1048576 //Maximum number of threads for line and sort kernels
#define def_nlmax 32768 //Maximum number of lines per kernel launch, to prevent from time out on Desktop machines
//#define def_maxlines 30000000ll //maximum number of lines stored on the GPU, Should not be less than maximum in HITEMP lines
#define def_maxlines 8000000ll //maximum number of lines stored on the GPU, Should not be less than maximum in HITEMP lines, must be long long int type
#define def_maxlines 1000000ll //maximum number of lines stored on the GPU, Should not be less than maximum in HITEMP lines, must be long long int type
#define def_maxfiles 500 //maximum number of files per molecule
#define def_doTuning 1 //use the self-tuning routines

#define def_NmaxSample 100 //Maximum Number of resample coefficients for K(y)


#define def_nlA 1024 //split lines into blocks of this size, A Line kernels
#define def_nlB 1024 //split lines into blocks of this size, A Line kernels
#define def_nlC 1024 //split lines into blocks of this size, A Line kernels

#define def_rBs 256 //number of streams in readBuffer copy

#define def_KSn 32


//default values of parameters
#define def_qALPHA_L 0.5 //q value in the Lorentz half width q = Pself / P
#define def_gammaF 1.0 //scaling factor for Lorentzian halfwidth
Expand Down Expand Up @@ -138,29 +149,55 @@ struct Param{
int units;
int useIndividualX;
int replaceFiles;
int RLOW;
int profile;
double gammaF;
int doTuning;
};

struct Line{
double *nu_h, *nu_d; //Wavenumber
double *S_h, *S_d; //Intensity
double *nu_h, *nu_d; //Wavenumber
double *S_h, *S_d; //Intensity
float *Sf_d;
double *S1_d; //modified Intensity
double *S1_d; //modified Intensity
float *S1f_d;
double *A_h, *A_d; //Einstein A coefficient
double *delta_h, *delta_d; //pressure induced line shift
double *EL_h, *EL_d; //Energy of lower state
double *vy_h, *vy_d; //Lorentz Halfwidth / Doppler Halfwidth
double *A_h, *A_d; //Einstein A coefficient
double *delta_h, *delta_d; //pressure induced line shift
double *EL_h, *EL_d; //Energy of lower state
double *vy_h, *vy_d; //Lorentz Halfwidth / Doppler Halfwidth
float *vyf_d;
float *va_d; //(numin - nu) * ialphaD
float *vb_d; //dnu * ialphaD
float *vcut2_d; //(cut * ialphaD)^2
double *ialphaD_h, *ialphaD_d; //Doppler Halfwidth
double *n_h, *n_d; //temperature dependent exponent
double *Q_h, *Q_d; //partition function
int *ID_h, *ID_d; //line id used for sorting


float *va_d; //(numin - nu) * ialphaD
float *vb_d; //dnu * ialphaD
float *vcut2_d; //(cut * ialphaD)^2
double *ialphaD_h, *ialphaD_d; //Doppler Halfwidth
double *n_h, *n_d; //temperature dependent exponent
double *Sort_d; //helper array used to sort the other arrays
int *ID_d; //line id used for sorting
double *nuLimitsA0_d; //limits for Line blocks, min
double *nuLimitsA1_d; //limits for Line blocks, max
double *nuLimitsAL0_d; //limits for Line blocks, min
double *nuLimitsAL1_d; //limits for Line blocks, max
double *nuLimitsAR0_d; //limits for Line blocks, min
double *nuLimitsAR1_d; //limits for Line blocks, max
double *nuLimitsB0_d; //limits for Line blocks, min
double *nuLimitsB1_d; //limits for Line blocks, max
double *nuLimitsC0_d; //limits for Line blocks, min
double *nuLimitsC1_d; //limits for Line blocks, max

long long int *iiLimitsA0_h, *iiLimitsA0_d; //limits for Line blocks, min
long long int *iiLimitsA1_h, *iiLimitsA1_d; //limits for Line blocks, max
long long int *iiLimitsAL0_h, *iiLimitsAL0_d; //limits for Line blocks, min
long long int *iiLimitsAL1_h, *iiLimitsAL1_d; //limits for Line blocks, max
long long int *iiLimitsAR0_h, *iiLimitsAR0_d; //limits for Line blocks, min
long long int *iiLimitsAR1_h, *iiLimitsAR1_d; //limits for Line blocks, max
long long int *iiLimitsB0_h, *iiLimitsB0_d; //limits for Line blocks, min
long long int *iiLimitsB1_h, *iiLimitsB1_d; //limits for Line blocks, max
long long int *iiLimitsC0_h, *iiLimitsC0_d; //limits for Line blocks, min
long long int *iiLimitsC1_h, *iiLimitsC1_d; //limits for Line blocks, max

long long int *iiLimitsAT_m, *iiLimitsAT_d; //limits for all blocks, mapped memory
long long int *iiLimitsALT_m, *iiLimitsALT_d; //limits for all blocks, mapped memory
long long int *iiLimitsART_m, *iiLimitsART_d; //limits for all blocks, mapped memory
long long int *iiLimitsBT_m, *iiLimitsBT_d; //limits for all blocks, mapped memory
long long int *iiLimitsCT_m, *iiLimitsCT_d; //limits for all blocks, mapped memory

};

0 comments on commit 052c56c

Please sign in to comment.