Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

The CUDA code can now be switched on via configure

The new version of mixed_solve.cu includes now temporal gauge
  • Loading branch information...
commit 67afe7c850e3e260cecf7298e640b2ba10091990 1 parent 220c803
Florian Burger authored
View
74 GPU/Makefile.in
@@ -0,0 +1,74 @@
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+abs_top_srcdir = @abs_top_srcdir@
+top_builddir = .
+abs_top_builddir = @abs_top_builddir@
+builddir = @builddir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+bindir = @bindir@
+program_transform_name = @program_transform_name@
+subdir = .
+
+AR = ar
+RANLIB = @RANLIB@
+CC =
+CCDEP =
+CFLAGS = @GPUCFLAGS@
+LDFLAGS = @LDFLAGS@
+DEPFLAGS =
+CPPFLAGS = @CPPFLAGS@
+CCLD = @CCLD@
+LEX = @LEX@
+AUTOCONF = @AUTOCONF@
+LIBS = @LIBS@
+SHELL = @SHELL@
+OPTARGS = @OPTARGS@
+SOPTARGS = @SOPTARGS@
+DEFS = @DEFS@
+GPUOBJECTS = @GPUDIR@
+USESUBDIRS = @USESUBDIRS@
+NVCC = @NVCC@
+
+INCLUDES = @INCLUDES@
+
+COMPILE = ${NVCC} -c ${DEFS} ${INCLUDES} -o $@ ${CFLAGS}
+
+
+GPUSOURCES := $(wildcard *.cu)
+GPUOBJECTS := $(patsubst %.cu, %.o, $(GPUSOURCES))
+DEPS := $(patsubst %.o,%.d,$(GPUOBJECTS))
+
+.SUFFIXES:
+
+all: Makefile dummy
+
+#ifneq (,$(findstring lapack,${LIBS}))
+#all: Makefile all-recursive dep hmc_tm invert invert_doublet
+#else
+#all: Makefile all-recursive dep hmc_tm invert invert_doublet
+#endif
+
+
+.NOTPARALLEL:
+
+-include $(addsuffix .d,$(GPUTARGETS))
+
+include ${top_srcdir}/Makefile.global
+
+
+%.o: ${srcdir}/%.cu Makefile
+ @$(COMPILE) ${INCLUDES} $< > $@
+
+dummy: ${GPUOBJECTS} Makefile
+ @echo "Have generated all %.o files"
+
+compile-clean: Makefile
+ rm -f ${GPUOBJECTS} *.d
+
+clean: compile-clean
+
+distclean: compile-clean
+ rm -f Makefile
+
+.PHONY: all compile-clean
View
22 GPU/cudadefs.h
@@ -0,0 +1,22 @@
+
+#ifndef _CUDADEF_H
+#define _CUDADEF_H
+
+#define ACCUM_N 2048
+#define DOTPROD_DIM 128
+
+#define GF_8
+#define TEMPORALGAUGE
+//#define USETEXTURE
+
+
+
+#define REAL float
+#define BLOCK 192 // Block Size
+#define BLOCK2 320 // Block Size 2 for dev_mul_one_pm...
+#define maxblockdim 512
+
+
+#endif
+
+
View
39 GPU/cudaglobal.h
@@ -1,44 +1,29 @@
-/***********************************************************************
- * Copyright (C) 2010 Florian Burger
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD. If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-/* GPU Stuff */
-
-
-#define REAL float
-#define BLOCK 128 // Block Size
-#define maxblockdim 512
-
+#include "cudadefs.h"
+/* GPU Stuff */
-typedef struct __align__(8) {
+typedef struct {
REAL re;
REAL im;
} dev_complex;
/* Device Gauge Fields */
typedef dev_complex dev_su3 [3][3]; /* su(3)-Matrix 3x3 komplexe Einträge DEVICE */
+
+typedef struct{
+ dev_su3 m;
+ float pad;
+} dev_su3_pad;
+
+
+
typedef float4 su3_2v ; /* 2 Zeilen der su(3)-Matrix, 6 komplexe Einträge HOST
3*4*VOLUME in array -> texture */
typedef float4 dev_su3_2v ; /* 2 Zeilen der su(3)-Matrix
3*2 komplexe Einträge DEVICE 3*4*VOLUME in array -> texture*/
-typedef float4 dev_su3_8 ; /* 8 numbers to reconstruc the gauge field as described in M. Clark */
+typedef float4 dev_su3_8 ; /* 8 numbers to reconstruct the gauge field as described in M. Clark */
/* Device Spinor Fields */
typedef float4 dev_spinor;
View
2,411 GPU/mixed_solve.cu
1,948 additions, 463 deletions not shown
View
22 GPU/mixed_solve.h
@@ -1,23 +1,3 @@
-/***********************************************************************
- * Copyright (C) 2010 Florian Burger
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD. If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-
#ifndef _MIXED_SOLVE_H_
void initnn();
@@ -29,6 +9,8 @@ extern "C" int mixed_solve_eo (spinor * const P, spinor * const Q, const int max
extern "C" int bind_texture_spin(dev_spinor* s, int i);
extern "C" int unbind_texture_spin(int i);
+extern "C" int bind_texture_nn(int* nn);
+extern "C" int unbind_texture_nn();
#define _MIXED_SOLVE_H_
#endif
View
23 Makefile.in
@@ -26,6 +26,11 @@ SHELL = @SHELL@
OPTARGS = @OPTARGS@
SOPTARGS = @SOPTARGS@
DEFS = @DEFS@
+GPUDIR = @GPUDIR@
+USESUBDIRS = @USESUBDIRS@
+NVCC = @NVCC@
+
+
INCLUDES = @INCLUDES@
LINK = $(CCLD) -o $@ ${LDFLAGS}
@@ -60,7 +65,13 @@ MODULES = read_input gamma hybrid_update observables start \
little_D block sf_gauge_monomial sf_utils sf_calc_action \
sf_get_staples sf_get_rectangle_staples sf_observables \
Dov_psi operator poly_monomial measurements pion_norm Dov_proj \
- xchange_field_tslice
+ xchange_field_tslice temporalgauge
+
+
+## the GPU modules (all .cu files in $GPUDIR)
+GPUSOURCES := $(wildcard $(GPUDIR)/*.cu)
+GPUOBJECTS := $(patsubst $(GPUDIR)/%.cu, $(GPUDIR)/%.o, $(GPUSOURCES))
+
NOOPTMOD = test/check_xchange test/check_geometry
@@ -69,7 +80,10 @@ PROGRAMS = hmc_tm benchmark invert gwc2ildg \
check_locallity test_lemon hopping_test
ALLOBJ = ${MODULES} ${PROGRAMS} ${SMODULES}
-SUBDIRS = io linalg solver
+SUBDIRS = ${USESUBDIRS}
+
+
+
.SUFFIXES:
@@ -116,8 +130,11 @@ ${addsuffix .o, ${SMODULES}}: %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir
${addsuffix .o, ${PROGRAMS}}: %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/config.h
${COMPILE} ${OPTARGS} -c $<
+
+
${PROGRAMS}: %: %.o libhmc.a all-recursive
- ${LINK} $@.o $(LIBS)
+ ${LINK} $@.o $(GPUOBJECTS) $(LIBS)
+
dep: $(addsuffix .d,$(ALLOBJ))
View
2  config.h.in
@@ -170,6 +170,8 @@
/* Define to 1 for rts_get_dram_window usage for halfspinor on BG/L */
#undef BGL2
+/* Define if we want to use CUDA GPU */
+#undef HAVE_GPU
#endif
View
3,021 configure
858 additions, 2,163 deletions not shown
View
48 configure.in
@@ -601,6 +601,51 @@ else
AC_MSG_RESULT(no)
fi
+
+AC_MSG_CHECKING(whether we want to use CUDA GPU)
+AC_ARG_ENABLE(gpu,
+ [ --enable-gpu use GPU [default=no]],
+ usegpu=$enableval, usegpu=no)
+if test $usegpu = yes; then
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_GPU,1,Using CUDA GPU)
+ NVCC="nvcc"
+ USESUBDIRS="io linalg solver GPU"
+ GPUDIR="$srcdir/GPU"
+ LIBS="$LIBS -lcuda -lcudart -lcublas"
+
+ AC_MSG_CHECKING([where to search for CUDA libs])
+ AC_ARG_WITH(cuda,
+ [ --with-cuda[=dir] use CUDA GPU with lib dir [default=/usr/local/cuda/lib]],
+ cuda_dir=$withval, cuda_dir="/usr/local/cuda/lib")
+ AC_MSG_RESULT($cuda_dir)
+ if test $usegpu = yes; then
+ LDFLAGS="$LDFLAGS -L$cuda_dir"
+ fi
+
+
+ AC_MSG_CHECKING([CUDA compile args])
+ AC_ARG_WITH(cudacompileargs,
+ [ --with-cudacompileargs[=string] use CUDA compile args [default=--gpu-architecture sm_13 --use_fast_math -O3]],
+ cuda_compileargs=$withval, cuda_compileargs="--gpu-architecture sm_13 --use_fast_math -O3")
+ AC_MSG_RESULT($cuda_compileargs)
+ if test $usegpu = yes; then
+ GPUCFLAGS="$GPUCFLAGS $cuda_compileargs"
+ fi
+else
+ AC_MSG_RESULT(no)
+ USESUBDIRS="io linalg solver"
+ NVCC=""
+fi
+
+
+AC_SUBST(USESUBDIRS)
+AC_SUBST(NVCC)
+AC_SUBST(GPUDIR)
+AC_SUBST(GPUCFLAGS)
+
+
+
AC_MSG_CHECKING(checking consistency)
if test $enable_mpi = yes ; then
if test $enable_iig = yes && test $withnonblock != yes ; then
@@ -642,6 +687,7 @@ AUTOCONF=autoconf
AC_CONFIG_FILES([Makefile
linalg/Makefile
solver/Makefile
- io/Makefile])
+ io/Makefile
+ GPU/Makefile])
AC_OUTPUT
View
3  global.h
@@ -170,6 +170,9 @@ EXTERN su3 ** g_gauge_field_copys;
EXTERN su3 ** g_gauge_field_copy;
#endif
+/*for temporalgauge in GPU part*/
+EXTERN su3 ** g_tempgauge_field;
+
EXTERN su3adj ** moment;
EXTERN su3adj ** df0;
EXTERN su3adj ** ddummy;
View
139 invert_eo.c
@@ -55,10 +55,17 @@
#ifdef HAVE_GPU
-extern int mixed_solve (spinor * const P, spinor * const Q, const int max_iter,
+ #include"GPU/cudadefs.h"
+ #include"temporalgauge.h"
+ #include"observables.h"
+
+ extern int mixed_solve (spinor * const P, spinor * const Q, const int max_iter,
double eps, const int rel_prec,const int N);
-extern int mixed_solve_eo (spinor * const P, spinor * const Q, const int max_iter,
+ extern int mixed_solve_eo (spinor * const P, spinor * const Q, const int max_iter,
double eps, const int rel_prec, const int N);
+ #ifdef TEMPORALGAUGE
+ extern su3* g_trafo;
+ #endif
#endif
@@ -73,6 +80,46 @@ int invert_eo(spinor * const Even_new, spinor * const Odd_new,
/* here comes the inversion using even/odd preconditioning */
if(even_odd_flag) {
if(g_proc_id == 0) {printf("# Using Even/Odd preconditioning!\n"); fflush(stdout);}
+
+ #ifdef HAVE_GPU
+ #ifdef TEMPORALGAUGE
+ /* initialize temporal gauge here */
+ int retval;
+ double dret;
+ double plaquette = 0.0;
+
+ if(usegpu_flag){
+
+ /* need VOLUME here (not N=VOLUME/2)*/
+ if((retval=init_temporalgauge_trafo(VOLUME, g_gauge_field)) !=0){
+ if(g_proc_id == 0) printf("Error while gauge fixing to temporal gauge. Aborting...\n");
+ exit(200);
+ }
+ plaquette = measure_gauge_action();
+ if(g_proc_id == 0) printf("Plaquette before gauge fixing: %.16e\n", plaquette/6./VOLUME);
+ /* do trafo */
+ apply_gtrafo(g_gauge_field, g_trafo);
+ plaquette = measure_gauge_action();
+ if(g_proc_id == 0) printf("Plaquette after gauge fixing: %.16e\n", plaquette/6./VOLUME);
+
+ /* do trafo to odd part of source */
+ dret = square_norm(Odd, VOLUME/2 , 1);
+ if(g_proc_id == 0) printf("square norm before gauge fixing: %.16e\n", dret);
+ apply_gtrafo_spinor_odd(Odd, g_trafo);
+ dret = square_norm(Odd, VOLUME/2, 1);
+ if(g_proc_id == 0) printf("square norm after gauge fixing: %.16e\n", dret);
+
+ /* do trafo to even part of source */
+ dret = square_norm(Even, VOLUME/2 , 1);
+ if(g_proc_id == 0) printf("square norm before gauge fixing: %.16e\n", dret);
+ apply_gtrafo_spinor_even(Even, g_trafo);
+ dret = square_norm(Even, VOLUME/2, 1);
+ if(g_proc_id == 0) printf("square norm after gauge fixing: %.16f\n", dret);
+ }
+ #endif
+ #endif /* HAVE_GPU*/
+
+
assign_mul_one_pm_imu_inv(Even_new, Even, +1.);
Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new);
@@ -137,15 +184,20 @@ int invert_eo(spinor * const Even_new, spinor * const Odd_new,
printf("# mu = %f\n", g_mu);
fflush(stdout);
}
-#ifdef HAVE_GPU
- if(g_proc_id == 0) {printf("Using GPU for inversion\n");
- fflush(stdout);}
- iter = mixed_solve_eo(Odd_new, g_spinor_field[DUM_DERI], max_iter, precision, rel_prec, VOLUME/2);
-#else
+ #ifdef HAVE_GPU
+ if(usegpu_flag){
+ if(g_proc_id == 0) printf("Using GPU for inversion\n");
+ iter = mixed_solve_eo(Odd_new, g_spinor_field[DUM_DERI], max_iter, precision, rel_prec, VOLUME/2);
+ }
+ else{
+ iter = cg_her(Odd_new, g_spinor_field[DUM_DERI], max_iter, precision, rel_prec, VOLUME/2, &Qtm_pm_psi);
+ Qtm_minus_psi(Odd_new, Odd_new);
+ }
+ #else
iter = cg_her(Odd_new, g_spinor_field[DUM_DERI], max_iter, precision, rel_prec,
VOLUME/2, &Qtm_pm_psi);
Qtm_minus_psi(Odd_new, Odd_new);
-#endif
+ #endif /*HAVE_GPU*/
}
else if(solver_flag == MR) {
if(g_proc_id == 0) {printf("# Using MR!\n"); fflush(stdout);}
@@ -185,6 +237,57 @@ int invert_eo(spinor * const Even_new, spinor * const Odd_new,
/* The sign is plus, since in Hopping_Matrix */
/* the minus is missing */
assign_add_mul_r(Even_new, g_spinor_field[DUM_DERI], +1., VOLUME/2);
+
+ #ifdef HAVE_GPU
+ /* return from temporal gauge again */
+ #ifdef TEMPORALGAUGE
+ if(usegpu_flag){
+ plaquette = measure_gauge_action();
+ if(g_proc_id == 0) printf("Plaquette before inverse gauge fixing: %.16e\n", plaquette/6./VOLUME);
+
+ /* undo trafo */
+
+ /*apply_inv_gtrafo(g_gauge_field, g_trafo);*/
+ /* copy back the saved original field located in g_tempgauge_field -> update necessary*/
+ copy_gauge_field(g_gauge_field, g_tempgauge_field);
+ g_update_gauge_copy = 1;
+
+
+ plaquette = measure_gauge_action();
+ if(g_proc_id == 0) printf("Plaquette after inverse gauge fixing: %.16e\n", plaquette/6./VOLUME);
+
+ /* undo trafo to source (Even, Odd) */
+ dret = square_norm(Even, VOLUME/2 , 1);
+ if(g_proc_id == 0) printf("square norm before gauge fixing: %.16f\n", dret);
+ apply_inv_gtrafo_spinor_even(Even, g_trafo);
+ dret = square_norm(Even, VOLUME/2, 1);
+ if(g_proc_id == 0) printf("square norm after gauge fixing: %.16f\n", dret);
+ dret = square_norm(Odd, VOLUME/2 , 1);
+ if(g_proc_id == 0) printf("square norm before gauge fixing: %.16f\n", dret);
+ apply_inv_gtrafo_spinor_odd(Odd, g_trafo);
+ dret = square_norm(Odd, VOLUME/2, 1);
+ if(g_proc_id == 0) printf("square norm after gauge fixing: %.16f\n", dret);
+
+
+ dret = square_norm(Even_new, VOLUME/2 , 1);
+ if(g_proc_id == 0) printf("square norm before gauge fixing: %.16e\n", dret);
+ apply_inv_gtrafo_spinor_even(Even_new, g_trafo);
+ dret = square_norm(Even_new, VOLUME/2, 1);
+ if(g_proc_id == 0) printf("square norm after gauge fixing: %.16e\n", dret);
+
+ dret = square_norm(Odd_new, VOLUME/2 , 1);
+ if(g_proc_id == 0) printf("square norm before gauge fixing: %.16e\n", dret);
+ apply_inv_gtrafo_spinor_odd(Odd_new, g_trafo);
+ dret = square_norm(Odd_new, VOLUME/2, 1);
+ if(g_proc_id == 0) printf("square norm after gauge fixing: %.16e\n", dret);
+
+
+ finalize_temporalgauge();
+ }
+ #endif
+ #endif
+
+
}
else {
@@ -249,17 +352,23 @@ int invert_eo(spinor * const Even_new, spinor * const Odd_new,
}
else {
if(g_proc_id == 0) {printf("# Using CG!\n"); fflush(stdout);}
-#ifdef HAVE_GPU
- if(g_proc_id == 0) {printf("Using GPU for inversion\n");
- fflush(stdout);}
- iter = mixed_solve(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], max_iter, precision,
- rel_prec, VOLUME);
-#else
+ #ifdef HAVE_GPU
+ if(usegpu_flag){
+ if(g_proc_id == 0) printf("Using GPU for inversion\n");
+ iter = mixed_solve(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], max_iter, precision, rel_prec, VOLUME);
+ }
+ else{
+ gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], VOLUME);
+ iter = cg_her(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision,
+ rel_prec, VOLUME, &Q_pm_psi);
+ Q_minus_psi(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI]);
+ }
+ #else
gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], VOLUME);
iter = cg_her(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision,
rel_prec, VOLUME, &Q_pm_psi);
Q_minus_psi(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI]);
-#endif
+ #endif
}
convert_lexic_to_eo(Even_new, Odd_new, g_spinor_field[DUM_DERI+1]);
}
View
7,515 read_input.c
4,516 additions, 2,999 deletions not shown
View
5 read_input.h
@@ -95,6 +95,11 @@ extern "C"
extern int reweighting_samples;
extern int no_samples;
+ extern int usegpu_flag;
+ extern int max_innersolver_it;
+ extern double innersolver_precision;
+ extern int device_num;
+
extern int nblocks_t, nblocks_x, nblocks_y, nblocks_z;
int read_input(char *);
View
42 read_input.l
@@ -148,6 +148,11 @@ inline void rmQuotes(char *str){
int reweighting_samples;
int no_samples;
+ int usegpu_flag;
+ int max_innersolver_it;
+ int device_num;
+ double innersolver_precision;
+
int propagator_comparison;
int nb_cores;
@@ -238,7 +243,8 @@ inline void rmQuotes(char *str){
%x INITINTEGRATOR
%x INTEGRATOR
-
+%x GPU
+%x INITGPU
%x INITOPERATOR
%x TMOP
@@ -361,6 +367,38 @@ inline void rmQuotes(char *str){
^DflFieldIter{EQL} BEGIN(DFLFIELDITER);
^DflPolyIter{EQL} BEGIN(DFLPOLYITER);
+^BeginGPU BEGIN(INITGPU);
+
+
+
+<INITGPU>Init{SPC}* {
+ if(myverbose) printf("Initialising GPU line %d\n", line_of_file);
+ usegpu_flag = 1;
+ if(myverbose!=0) printf(" Using help of GPU for inversions\n");
+ BEGIN(GPU);
+ }
+<GPU>{
+ {SPC}*InnersolverPrecision{SPC}*={SPC}*{FLT} {
+ sscanf(yytext, " %[2a-zA-Z] = %f", name, &c);
+ innersolver_precision = c;
+ if(myverbose) printf(" Inner solver precision set to %f line %d\n", c, line_of_file);
+ }
+ {SPC}*MaxInnersolverIteration{SPC}*={SPC}*{DIGIT}+ {
+ sscanf(yytext, " %[2a-zA-Z] = %d", name, &a);
+ max_innersolver_it = a;
+ if(myverbose) printf(" Inner solver iterations set to %d line %d\n", a, line_of_file);
+ }
+ {SPC}*DeviceNum{SPC}*={SPC}*{DIGIT}+ {
+ sscanf(yytext, " %[2a-zA-Z] = %d", name, &a);
+ device_num = a;
+ if(myverbose) printf(" Device Number set to %d line %d\n", a, line_of_file);
+ }
+ EndGPUInit{SPC}* {
+ if(myverbose) printf("GPU parsed in line %d\n\n", line_of_file);
+ BEGIN(0);
+ }
+}
+
<INITOPERATOR>{TYPE} {
current_operator++;
@@ -1638,7 +1676,7 @@ inline void rmQuotes(char *str){
}
-<INITMONOMIAL,DETMONOMIAL,NDPOLYMONOMIAL,GAUGEMONOMIAL,SFGAUGEMONOMIAL,INTEGRATOR,INITINTEGRATOR,INITMEASUREMENT,PIONNORMMEAS,ONLINEMEAS,INITOPERATOR,TMOP,DBTMOP,OVERLAPOP,WILSONOP,POLYMONOMIAL,PLOOP>\n {
+<INITMONOMIAL,DETMONOMIAL,NDPOLYMONOMIAL,GAUGEMONOMIAL,SFGAUGEMONOMIAL,INTEGRATOR,INITINTEGRATOR,INITMEASUREMENT,PIONNORMMEAS,ONLINEMEAS,INITOPERATOR,TMOP,DBTMOP,OVERLAPOP,WILSONOP,POLYMONOMIAL,PLOOP,INITGPU,GPU>\n {
line_of_file++;
}
<*>\n {
View
501 temporalgauge.c
@@ -0,0 +1,501 @@
+#include "global.h"
+#include "GPU/cudadefs.h"
+#include "su3.h"
+#include "geometry_eo.h"
+#include "start.h"
+#include "temporalgauge.h"
+#include "stdio.h"
+#include "stdlib.h"
+
+su3 * g_trafo;
+su3 * tempgauge_field = NULL;
+
+
+
+static su3 unit_su3(void)
+{
+ su3 u;
+
+ u.c00.re=1.0;
+ u.c00.im=0.0;
+ u.c01.re=0.0;
+ u.c01.im=0.0;
+ u.c02.re=0.0;
+ u.c02.im=0.0;
+
+ u.c10.re=0.0;
+ u.c10.im=0.0;
+ u.c11.re=1.0;
+ u.c11.im=0.0;
+ u.c12.re=0.0;
+ u.c12.im=0.0;
+
+ u.c20.re=0.0;
+ u.c20.im=0.0;
+ u.c21.re=0.0;
+ u.c21.im=0.0;
+ u.c22.re=1.0;
+ u.c22.im=0.0;
+
+ return(u);
+}
+
+
+
+/*copy a complete gauge field*/
+/* THINK OF PARALLELIZATION (RAND!!!)*/
+void copy_gauge_field(su3** to, su3** from){
+ int ix;
+ for(ix=0; ix<VOLUME; ix++){
+ _su3_assign(to[ix][0], from[ix][0]);
+ _su3_assign(to[ix][1], from[ix][1]);
+ _su3_assign(to[ix][2], from[ix][2]);
+ _su3_assign(to[ix][3], from[ix][3]);
+ }
+}
+
+
+
+
+
+/*
+ Set the trafo field for a temporal gauge
+ g(t=0) == ID
+ other g's are determined recursively from U (gfield) requiering that U^{'}_0 != ID
+ => only the U(t=T-1) are not ID!!
+*/
+int init_temporalgauge_trafo(const int V, su3** gfield){
+ int it, iz, iy, ix;
+
+
+ if((void*)(g_trafo = (su3*)calloc(V, sizeof(su3))) == NULL) {
+ printf("malloc error in 'init_temporalgauge_trafo'\n");
+ return(2);
+ }
+
+
+ /* initialize first timeslice (t=0) with unit matrices*/
+ for(ix=0; ix<LX; ix++){
+ for(iy=0; iy<LY; iy++){
+ for(iz=0; iz<LZ; iz++){
+ g_trafo[g_ipt[0][ix][iy][iz]] = unit_su3();
+ }
+ }
+ }
+
+
+ /* U^{'}_0(x) g(x) U_0(x) g^{+}(x+0) != ID => g_(x+0) = g(x) U_0(x) */
+ for(it=1; it<T; it++){
+ for(ix=0; ix<LX; ix++){
+ for(iy=0; iy<LY; iy++){
+ for(iz=0; iz<LZ; iz++){
+ _su3_times_su3( g_trafo[g_ipt[it][ix][iy][iz] ] ,
+ g_trafo[ g_ipt[it-1][ix][iy][iz] ] ,
+ gfield[g_ipt[it-1][ix][iy][iz]][0]
+ );
+
+ }
+ }
+ }
+ }
+
+
+ /*
+ allocate and initialize g_tempgauge_field which holds a copy of the
+ global gauge field g_gauge_field which is copied back after the inversion
+ when the temporal gauge is undone again
+ */
+ int i=0;
+ if((void*)(g_tempgauge_field = (su3**)calloc(V, sizeof(su3*))) == NULL) {
+ printf ("malloc error in 'init_temporalgauge_trafo'\n");
+ return(1);
+ }
+ if((void*)(tempgauge_field = (su3*)calloc(4*V+1, sizeof(su3))) == NULL) {
+ printf ("malloc error in 'init_temporalgauge_trafo'\n");
+ return(2);
+ }
+#if (defined SSE || defined SSE2 || defined SSE3)
+ g_tempgauge_field[0] = (su3*)(((unsigned long int)(tempgauge_field)+ALIGN_BASE)&~ALIGN_BASE);
+#else
+ g_tempgauge_field[0] = tempgauge_field;
+#endif
+ for(i = 1; i < V; i++){
+ g_tempgauge_field[i] = g_tempgauge_field[i-1]+4;
+ }
+
+ /* copy the original field */
+ copy_gauge_field(g_tempgauge_field, g_gauge_field);
+
+ return(0);
+}
+
+
+
+void finalize_temporalgauge(){
+ free(g_trafo);
+ free(tempgauge_field);
+ free(g_tempgauge_field);
+}
+
+
+
+/*
+ apply gauge transform to gfield with the trafo stored in trafofield
+*/
+void apply_gtrafo(su3 ** gfield, su3 * trafofield){
+ int it, iz, iy, ix, xpos, mu;
+ su3 temp1;
+ if(g_proc_id == 0) {
+ printf("Applying gauge transformation...");
+ }
+ for(it=0; it<T; it++){
+ for(ix=0; ix<LX; ix++){
+ for(iy=0; iy<LY; iy++){
+ for(iz=0; iz<LZ; iz++){
+ xpos = g_ipt[it][ix][iy][iz];
+ for(mu=0;mu<4;mu++){
+ /* help = g(x) U_mu(x) */
+ _su3_times_su3( temp1, trafofield[xpos], gfield[xpos][mu] );
+ /* U_mu(x) <- U_mu^{'}(x) = help g^{+}(x+mu)*/
+ _su3_times_su3d( gfield[xpos][mu],temp1, trafofield[ g_iup[xpos][mu] ]);
+ }
+ }
+ }
+ }
+ }
+ if(g_proc_id == 0) {
+ printf("done\n");
+ }
+ /* update gauge copy fields in the next call to HoppingMatrix */
+ g_update_gauge_copy = 1;
+}
+
+
+
+
+/*
+ apply the inverse gauge transform to gfield with the trafo stored in trafofield
+*/
+void apply_inv_gtrafo(su3 ** gfield, su3 * trafofield){
+ int it, iz, iy, ix, xpos, mu;
+ su3 temp1, temp2;
+ if(g_proc_id == 0) {
+ printf("Applying INVERSE gauge transformation...");
+ }
+ for(it=0; it<T; it++){
+ for(ix=0; ix<LX; ix++){
+ for(iy=0; iy<LY; iy++){
+ for(iz=0; iz<LZ; iz++){
+ xpos = g_ipt[it][ix][iy][iz];
+ for(mu=0;mu<4;mu++){
+ /*
+ _su3d_times_su3( temp1, trafofield[xpos], gfield[xpos][mu] );
+
+ _su3_times_su3( gfield[xpos][mu],temp1, trafofield[ g_iup[xpos][mu] ]);
+ */
+
+ /* help = U^{'}_mu(x) g(x+mu)*/
+ _su3_times_su3( temp1, gfield[xpos][mu], trafofield[ g_iup[xpos][mu]] );
+
+ /* U_mu(x) <- g^{+}(x) help */
+ _su3_dagger(temp2, trafofield[xpos] )
+ _su3_times_su3( gfield[xpos][mu], temp2, temp1);
+
+
+ }
+ }
+ }
+ }
+ }
+ if(g_proc_id == 0) {
+ printf("done\n");
+ }
+ /* update gauge copy fields in the next call to HoppingMatrix */
+ g_update_gauge_copy = 1;
+}
+
+
+
+
+/*
+ apply inverse gauge transform to spinor
+ U_0(x) = g^{+}(x) U^{'}_0(x) g(x+0)
+ => psi(x) = g^{+}(x) psi^{'}(x)
+ (the primed (^{'}) quantities are the gauge transformed fields)
+*/
+void apply_inv_gtrafo_spinor(spinor * spin, su3 * trafofield){
+ int it, iz, iy, ix, xpos;
+ spinor temp;
+ if(g_proc_id == 0) {
+ printf("Applying INVERSE gauge transformation to spinor...");
+ }
+ for(it=0; it<T; it++){
+ for(ix=0; ix<LX; ix++){
+ for(iy=0; iy<LY; iy++){
+ for(iz=0; iz<LZ; iz++){
+ xpos = g_ipt[it][ix][iy][iz];
+ _su3_inverse_multiply(temp.s0, trafofield[xpos], spin[xpos].s0);
+ _su3_inverse_multiply(temp.s1, trafofield[xpos], spin[xpos].s1);
+ _su3_inverse_multiply(temp.s2, trafofield[xpos], spin[xpos].s2);
+ _su3_inverse_multiply(temp.s3, trafofield[xpos], spin[xpos].s3);
+
+ _vector_assign(spin[xpos].s0,temp.s0);
+ _vector_assign(spin[xpos].s1,temp.s1);
+ _vector_assign(spin[xpos].s2,temp.s2);
+ _vector_assign(spin[xpos].s3,temp.s3);
+ }
+ }
+ }
+ }
+ if(g_proc_id == 0) {
+ printf("done\n");
+ }
+}
+
+
+
+
+
+
+/*
+ apply gauge transform to spinor
+ U^{'}_0(x) = g(x) U_0(x) g^{+}(x+0)
+ => psi^{'}(x) = g(x) psi(x)
+ (the primed (^{'}) quantities are the gauge transformed fields)
+*/
+void apply_gtrafo_spinor(spinor * spin, su3 * trafofield){
+ int it, iz, iy, ix, xpos;
+ spinor temp;
+
+ if(g_proc_id == 0) {
+ printf("Applying gauge transformation to spinor...");
+ }
+ for(it=0; it<T; it++){
+ for(ix=0; ix<LX; ix++){
+ for(iy=0; iy<LY; iy++){
+ for(iz=0; iz<LZ; iz++){
+ xpos = g_ipt[it][ix][iy][iz];
+ _su3_multiply(temp.s0, trafofield[xpos], spin[xpos].s0);
+ _su3_multiply(temp.s1, trafofield[xpos], spin[xpos].s1);
+ _su3_multiply(temp.s2, trafofield[xpos], spin[xpos].s2);
+ _su3_multiply(temp.s3, trafofield[xpos], spin[xpos].s3);
+
+ _vector_assign(spin[xpos].s0,temp.s0);
+ _vector_assign(spin[xpos].s1,temp.s1);
+ _vector_assign(spin[xpos].s2,temp.s2);
+ _vector_assign(spin[xpos].s3,temp.s3);
+ }
+ }
+ }
+ }
+ if(g_proc_id == 0) {
+ printf("done\n");
+ }
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/*
+ apply gauge transform to ODD spinor
+ U^{'}_0(x) = g(x) U_0(x) g^{+}(x+0)
+ => psi^{'}(x) = g(x) psi(x)
+ (the primed (^{'}) quantities are the gauge transformed fields)
+*/
+void apply_gtrafo_spinor_odd(spinor * spin, su3 * trafofield){
+ int it, iz, iy, ix, xpos, oddpos;
+ spinor temp;
+
+ if(g_proc_id == 0) {
+ printf("Applying gauge transformation to odd spinor...");
+ }
+ for(it=0; it<T; it++){
+ for(ix=0; ix<LX; ix++){
+ for(iy=0; iy<LY; iy++){
+ for(iz=0; iz<LZ; iz++){
+ if(((it+ix+iy+iz)%2 != 0)){
+ /* odd positions */
+ xpos = g_ipt[it][ix][iy][iz];
+ oddpos = g_lexic2eosub[ xpos ];
+
+ _su3_multiply(temp.s0, trafofield[xpos], spin[oddpos].s0);
+ _su3_multiply(temp.s1, trafofield[xpos], spin[oddpos].s1);
+ _su3_multiply(temp.s2, trafofield[xpos], spin[oddpos].s2);
+ _su3_multiply(temp.s3, trafofield[xpos], spin[oddpos].s3);
+
+ _vector_assign(spin[oddpos].s0,temp.s0);
+ _vector_assign(spin[oddpos].s1,temp.s1);
+ _vector_assign(spin[oddpos].s2,temp.s2);
+ _vector_assign(spin[oddpos].s3,temp.s3);
+
+ }
+ }
+ }
+ }
+ }
+ if(g_proc_id == 0) {
+ printf("done\n");
+ }
+}
+
+
+
+/*
+ apply inverse gauge transform to ODD spinor
+ U_0(x) = g^{+}(x) U^{'}_0(x) g(x+0)
+ => psi(x) = g^{+}(x) psi^{'}(x)
+ (the primed (^{'}) quantities are the gauge ttemp.s0ransformed fields)
+*/
+void apply_inv_gtrafo_spinor_odd(spinor * spin, su3 * trafofield){
+ int it, iz, iy, ix, xpos, oddpos;
+ spinor temp;
+ if(g_proc_id == 0) {
+ printf("Applying INVERSE gauge transformation to odd spinor...");
+ }
+ for(it=0; it<T; it++){
+ for(ix=0; ix<LX; ix++){
+ for(iy=0; iy<LY; iy++){
+ for(iz=0; iz<LZ; iz++){
+ if(((it+ix+iy+iz)%2 != 0)){
+ /* odd positions */
+ xpos = g_ipt[it][ix][iy][iz];
+ oddpos = g_lexic2eosub[ xpos ];
+
+ _su3_inverse_multiply(temp.s0, trafofield[xpos], spin[oddpos].s0);
+ _su3_inverse_multiply(temp.s1, trafofield[xpos], spin[oddpos].s1);
+ _su3_inverse_multiply(temp.s2, trafofield[xpos], spin[oddpos].s2);
+ _su3_inverse_multiply(temp.s3, trafofield[xpos], spin[oddpos].s3);
+
+ _vector_assign(spin[oddpos].s0,temp.s0);
+ _vector_assign(spin[oddpos].s1,temp.s1);
+ _vector_assign(spin[oddpos].s2,temp.s2);
+ _vector_assign(spin[oddpos].s3,temp.s3);
+
+
+ }
+ }
+ }
+ }
+ }
+ if(g_proc_id == 0) {
+ printf("done\n");
+ }
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/*
+ apply gauge transform to EVENspinor
+ U^{'}_0(x) = g(x) U_0(x) g^{+}(x+0)
+ => psi^{'}(x) = g(x) psi(x)
+ (the primed (^{'}) quantities are the gauge transformed fields)
+*/
+void apply_gtrafo_spinor_even(spinor * spin, su3 * trafofield){
+ int it, iz, iy, ix, xpos, evenpos;
+ spinor temp;
+
+ if(g_proc_id == 0) {
+ printf("Applying gauge transformation to even spinor...");
+ }
+ for(it=0; it<T; it++){
+ for(ix=0; ix<LX; ix++){
+ for(iy=0; iy<LY; iy++){
+ for(iz=0; iz<LZ; iz++){
+ if(((it+ix+iy+iz)%2 == 0)){
+ /* even positions */
+ xpos = g_ipt[it][ix][iy][iz];
+ evenpos = g_lexic2eosub[ xpos ];
+
+ _su3_multiply(temp.s0, trafofield[xpos], spin[evenpos].s0);
+ _su3_multiply(temp.s1, trafofield[xpos], spin[evenpos].s1);
+ _su3_multiply(temp.s2, trafofield[xpos], spin[evenpos].s2);
+ _su3_multiply(temp.s3, trafofield[xpos], spin[evenpos].s3);
+
+ _vector_assign(spin[evenpos].s0,temp.s0);
+ _vector_assign(spin[evenpos].s1,temp.s1);
+ _vector_assign(spin[evenpos].s2,temp.s2);
+ _vector_assign(spin[evenpos].s3,temp.s3);
+
+ }
+ }
+ }
+ }
+ }
+ if(g_proc_id == 0) {
+ printf("done\n");
+ }
+}
+
+
+
+/*
+ apply inverse gauge transform to EVEN spinor
+ U_0(x) = g^{+}(x) U^{'}_0(x) g(x+0)
+ => psi(x) = g^{+}(x) psi^{'}(x)
+ (the primed (^{'}) quantities are the gauge transformed fields)
+*/
+void apply_inv_gtrafo_spinor_even(spinor * spin, su3 * trafofield){
+ int it, iz, iy, ix, xpos, evenpos;
+ spinor temp;
+ if(g_proc_id == 0) {
+ printf("Applying INVERSE gauge transformation to even spinor...");
+ }
+ for(it=0; it<T; it++){
+ for(ix=0; ix<LX; ix++){
+ for(iy=0; iy<LY; iy++){
+ for(iz=0; iz<LZ; iz++){
+ if(((it+ix+iy+iz)%2 == 0)){
+ /* even positions */
+ xpos = g_ipt[it][ix][iy][iz];
+ evenpos = g_lexic2eosub[ xpos ];
+
+ _su3_inverse_multiply(temp.s0, trafofield[xpos], spin[evenpos].s0);
+ _su3_inverse_multiply(temp.s1, trafofield[xpos], spin[evenpos].s1);
+ _su3_inverse_multiply(temp.s2, trafofield[xpos], spin[evenpos].s2);
+ _su3_inverse_multiply(temp.s3, trafofield[xpos], spin[evenpos].s3);
+
+ _vector_assign(spin[evenpos].s0,temp.s0);
+ _vector_assign(spin[evenpos].s1,temp.s1);
+ _vector_assign(spin[evenpos].s2,temp.s2);
+ _vector_assign(spin[evenpos].s3,temp.s3);
+
+
+ }
+ }
+ }
+ }
+ }
+ if(g_proc_id == 0) {
+ printf("done\n");
+ }
+}
+
+
+
+
+
+
+
+
View
41 temporalgauge.h
@@ -0,0 +1,41 @@
+/***********************************************************************
+ * Copyright (C) 2010 Florian Burger
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD. If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifndef _TEMPORALGAUGE_H
+#define _TEMPORALGAUGE_H
+
+int init_temporalgauge_trafo(const int V, su3** gfield);
+void apply_gtrafo(su3 ** gfield, su3 * trafofield);
+void apply_gtrafo_spinor(spinor * spin, su3 * trafofield);
+void apply_inv_gtrafo(su3 ** gfield, su3 * trafofield);
+void apply_inv_gtrafo_spinor(spinor * spin, su3 * trafofield);
+void finalize_temporalgauge();
+
+void apply_gtrafo_spinor_odd(spinor * spin, su3 * trafofield);
+void apply_inv_gtrafo_spinor_odd(spinor * spin, su3 * trafofield);
+void apply_gtrafo_spinor_even(spinor * spin, su3 * trafofield);
+void apply_inv_gtrafo_spinor_even(spinor * spin, su3 * trafofield);
+
+void copy_gauge_field(su3** to, su3** from);
+
+#endif
+
+
+
+
Please sign in to comment.
Something went wrong with that request. Please try again.