From aa8e08ecf390ebcc502856ad9a3e1193609052aa Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Sat, 10 Jul 2021 18:02:36 +0200 Subject: [PATCH 01/35] better document huayno integrators and some test fixes --- src/amuse/community/huayno/interface.py | 60 ++++++------- .../test/suite/codes_tests/test_huayno.py | 88 ++++++++++++------- 2 files changed, 83 insertions(+), 65 deletions(-) diff --git a/src/amuse/community/huayno/interface.py b/src/amuse/community/huayno/interface.py index 38bb844f93..cd32b6b807 100644 --- a/src/amuse/community/huayno/interface.py +++ b/src/amuse/community/huayno/interface.py @@ -175,42 +175,32 @@ class Huayno(GravitationalDynamics,GravityFieldCode): __interface__ = HuaynoInterface class inttypes(object): - # http://stackoverflow.com/questions/36932/whats-the-best-way-to-implement-an-enum-in-python - SHARED2=1 - EXTRAPOLATE=5 - PASS_KDK=2 - PASS_DKD=7 - HOLD_KDK=3 - HOLD_DKD=8 - PPASS_DKD=9 - BRIDGE_KDK=4 - BRIDGE_DKD=10 - CC=11 - CC_KEPLER=12 - OK=13 - KEPLER=14 - SHARED4=15 - SHARED6=18 - SHARED8=19 - SHARED10=20 - SHAREDBS=21 - CCC=22 - CCC_KEPLER=23 - CC_BS=24 - CCC_BS=25 - BS_CC_KEPLER=26 - CC_BSA=27 - CCC_BSA=28 - SHARED2_COLLISIONS=29 - SHARED4_COLLISIONS=30 - SHARED6_COLLISIONS=31 - SHARED8_COLLISIONS=32 - SHARED10_COLLISIONS=33 - + """ + CONSTANT# = constant global timestep, of different order + SHARED# = shared, but varying global timestep, of different order + SHARED#_COLLISION = shared, but varying global timestep, of different order with collision detection + CC_.. = various variant of connected component (termination with KEPLER or Bulirsch-stoer, see paper Janes + CCC_... = with centering of subsys + OK = Optimal Kick (see paper Janes) + PASS, HOLD, BRIDGE and variants= momentum conserving individual timestepping see paper Pelupessy + NAIVE = naive implementation of individual timestepping + others are experimental, testing, development + """ @classmethod def _list(cls): return set([x for x in cls.__dict__.keys() if not x.startswith('_')]) + all_inttypes=dict(CONSTANT = 0, SHARED2 = 1, PASS_KDK = 2, HOLD_KDK = 3, BRIDGE_KDK = 4, + EXTRAPOLATE = 5, VARIABLE = 6, PASS_DKD = 7, HOLD_DKD = 8, PPASS_DKD = 9, BRIDGE_DKD = 10, + CC = 11, CC_KEPLER = 12, OK = 13, KEPLER = 14, SHARED4 = 15, FOURTH_M4 = 16, FOURTH_M5 = 17, + SHARED6 = 18, SHARED8 = 19, SHARED10 = 20, SHAREDBS = 21, CCC = 22, CCC_KEPLER = 23, + CC_BS = 24, CCC_BS = 25, BS_CC_KEPLER = 26, CC_BSA = 27, CCC_BSA = 28, SHARED2_COLLISIONS = 29, + SHARED4_COLLISIONS = 30, SHARED6_COLLISIONS = 31, SHARED8_COLLISIONS = 32, + SHARED10_COLLISIONS = 33, CONSTANT2 = 34, CONSTANT4 = 35, CONSTANT6 = 36, + CONSTANT8 = 37, CONSTANT10 = 38, ) + + for key, val in all_inttypes.items(): + setattr(inttypes, key, val) def __init__(self, convert_nbody = None, **options): self.stopping_conditions = StoppingConditions(self) @@ -260,11 +250,15 @@ def define_parameters(self, handler): default_value = 0 ) + inttypes=sorted([(getattr(self.inttypes,t),t ) + for i,t in enumerate(sorted(self.inttypes._list()))]) + handler.add_method_parameter( "get_inttype_parameter", "set_inttype_parameter", "inttype_parameter", - "integrator method to use", + "integrator method to use, this can be one of: "+ + ",".join( ["{0}={1}".format(i, t) for i,t in inttypes]), default_value = 8 ) diff --git a/src/amuse/test/suite/codes_tests/test_huayno.py b/src/amuse/test/suite/codes_tests/test_huayno.py index ae55742c4c..bfc03fcdd2 100644 --- a/src/amuse/test/suite/codes_tests/test_huayno.py +++ b/src/amuse/test/suite/codes_tests/test_huayno.py @@ -369,39 +369,68 @@ def test14(self): import hashlib numpy.random.seed(123456) - particles = plummer.new_plummer_model(64) + particles = plummer.new_plummer_model(32) sha=hashlib.sha1() - class inttypes(object): - SHARED2=1 - EXTRAPOLATE=5 - PASS_KDK=2 - PASS_DKD=7 - HOLD_KDK=3 - HOLD_DKD=8 - PPASS_DKD=9 - BRIDGE_KDK=4 - BRIDGE_DKD=10 - CC=11 - CC_KEPLER=12 - OK=13 - KEPLER=14 - SHARED4=15 - SHARED6=18 - SHARED8=19 - SHARED10=20 - SHAREDBS=21 + test_set=["CONSTANT", "SHARED2", "EXTRAPOLATE", + "PASS_KDK", "PASS_DKD", "HOLD_KDK", "HOLD_DKD", + "PPASS_DKD", "BRIDGE_KDK", "BRIDGE_DKD", + "CC", "CC_KEPLER", "CC_BS", "CC_BSA", + "OK", "SHAREDBS", "SHARED4", "SHARED6", "SHARED8", + "SHARED10", "SHAREDBS"] - @classmethod - def _list(cls): - return set([x for x in cls.__dict__.keys() if not x.startswith('_')]) + for itype in test_set: + instance = Huayno() + instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] + instance.particles.add_particles(particles) + E1=instance.kinetic_energy+instance.potential_energy + instance.evolve_model(0.125 | nbody_system.time) + E2=instance.kinetic_energy+instance.potential_energy + if itype!="CONSTANT": + self.assertLess((E2-E1).number, 1.e-5) + + part_out= instance.particles.copy() + position = part_out.position.number + if hasattr(position,'tobytes'): + as_bytes = position.tobytes() + else: + as_bytes = numpy.array(position.data, copy=True, order='C') + sha.update(as_bytes) + + instance.stop() + + # this result is probably dependent on system architecture hence no good for assert + print() + print(sha.hexdigest()) + print("7e63d59b807a12d92671ea6da9777e262fa8e2f9") + + def test14b(self): + import hashlib + + numpy.random.seed(123456) + particles = plummer.new_plummer_model(16) + p2 = plummer.new_plummer_model(16) + p2.mass*=0 + sha=hashlib.sha1() - for itype in sorted(inttypes._list()): - if itype in ("KEPLER"): continue + test_set=["CONSTANT", "SHARED2", "EXTRAPOLATE", + "PASS_KDK", "PASS_DKD", "HOLD_KDK", "HOLD_DKD", + "PPASS_DKD", "BRIDGE_KDK", "BRIDGE_DKD", + "CC", "CC_KEPLER", "CC_BS", "CC_BSA", + "OK", "SHAREDBS", "SHARED4", "SHARED6", "SHARED8", + "SHARED10", "SHAREDBS"] + + for itype in test_set: instance = Huayno() - instance.parameters.inttype_parameter=getattr(Huayno.inttypes,itype) + instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] instance.particles.add_particles(particles) + instance.particles.add_particles(p2) + E1=instance.kinetic_energy+instance.potential_energy instance.evolve_model(0.125 | nbody_system.time) + E2=instance.kinetic_energy+instance.potential_energy + if itype!="CONSTANT": + self.assertLess((E2-E1).number, 1.e-5) + part_out= instance.particles.copy() position = part_out.position.number if hasattr(position,'tobytes'): @@ -415,7 +444,7 @@ def _list(cls): # this result is probably dependent on system architecture hence no good for assert print() print(sha.hexdigest()) - print("8e71f9441578a43af4af927943577ad2c4130e4c") + print("9e2025989eead2b37198db730bbaa32fd7dd1051") def test15(self): particles = plummer.new_plummer_model(512) @@ -516,15 +545,10 @@ def _run_collision_with_integrator(self, inttype_parameter): merged.position = (p1 + p2).center_of_mass() merged.velocity = (p1 + p2).center_of_mass_velocity() - print(instance.model_time) - print(instance.particles) instance.particles.remove_particles(collisions.particles(0) + collisions.particles(1)) instance.particles.add_particles(sticky_merged) instance.evolve_model(1.0 | nbody_system.time) - print() - print(instance.model_time) - print(instance.particles) self.assertTrue(collisions.is_set()) self.assertTrue(instance.model_time < 1.0 | nbody_system.time) self.assertEqual(len(collisions.particles(0)), 1) From b83f47e403b3230dcf8100b00c40d59e6125bbdf Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Wed, 14 Jul 2021 14:57:33 +0200 Subject: [PATCH 02/35] accelerate huayno for test particles (zero mass particles) (wip) --- src/amuse/community/huayno/Makefile | 4 +- src/amuse/community/huayno/interface.c | 16 +- src/amuse/community/huayno/interface.py | 23 ++ src/amuse/community/huayno/src/evolve.c | 247 ++++++++++++------ src/amuse/community/huayno/src/evolve.h | 12 +- src/amuse/community/huayno/src/evolve_bs.c | 56 ++-- src/amuse/community/huayno/src/evolve_cc.c | 18 +- src/amuse/community/huayno/src/evolve_cl.c | 81 +++--- .../community/huayno/src/evolve_kepler.c | 86 +++++- .../community/huayno/src/evolve_kepler.h | 1 - src/amuse/community/huayno/src/evolve_ok.c | 4 +- src/amuse/community/huayno/src/evolve_sf.c | 80 ++++-- .../huayno/src/evolve_shared_collisions.c | 18 +- .../test/suite/codes_tests/test_huayno.py | 25 +- 14 files changed, 467 insertions(+), 204 deletions(-) diff --git a/src/amuse/community/huayno/Makefile b/src/amuse/community/huayno/Makefile index 7da6ae17ec..a6c7cddf5e 100644 --- a/src/amuse/community/huayno/Makefile +++ b/src/amuse/community/huayno/Makefile @@ -92,8 +92,8 @@ huayno_worker_mp: CXXFLAGS += $(OPENMP_CFLAGS) huayno_worker_mp: __init__.py worker_code.cc worker_code.h $(OPENMP_BUILDDIR)/libhuayno.a $(OPENMP_BUILDDIR)/interface.o $(MPICXX) $(SC_FLAGS) $(CXXFLAGS) $(INCLUDE) $(LDFLAGS) worker_code.cc $(OPENMP_BUILDDIR)/interface.o $(OPENMP_BUILDDIR)/libhuayno.a -o $@ $(LIBS) $(SC_CLIBS) -huayno_worker_mp.so: CFLAGS += $(OPENMP_CFLAGS) -huayno_worker_mp.so: CXXFLAGS += $(OPENMP_CFLAGS) +#~ huayno_worker_mp.so: CFLAGS += $(OPENMP_CFLAGS) +#~ huayno_worker_mp.so: CXXFLAGS += $(OPENMP_CFLAGS) huayno_worker_cl: CFLAGS += -fopenmp -DEVOLVE_OPENCL huayno_worker_cl: CXXFLAGS += -fopenmp -DEVOLVE_OPENCL diff --git a/src/amuse/community/huayno/interface.c b/src/amuse/community/huayno/interface.c index 4c01b731a8..ba72d2c97d 100644 --- a/src/amuse/community/huayno/interface.c +++ b/src/amuse/community/huayno/interface.c @@ -35,7 +35,8 @@ int initialize_code() mainsys.part=(struct particle*) malloc(nmax*sizeof(struct particle)); if(mainsys.part == NULL) return -1; mainsys.last=NULL; - dt_param=.03; + dt_param=.03; + accel_zero_mass=1; eps2=0.; inttype=8; t_now=0.; @@ -58,6 +59,7 @@ int cleanup_code() } mainsys.last=NULL; dt_param=.03; + accel_zero_mass=1; t_now=0.; dtime=0.; eps2=0.; @@ -335,6 +337,18 @@ int get_verbosity_parameter(int *t) return 0; } +int set_accel_zero_mass_parameter(int t) +{ + accel_zero_mass=t; + return 0; +} + +int get_accel_zero_mass_parameter(int *t) +{ + *t=accel_zero_mass; + return 0; +} + int evolve_model(double t_end) { diff --git a/src/amuse/community/huayno/interface.py b/src/amuse/community/huayno/interface.py index cd32b6b807..c93f080091 100644 --- a/src/amuse/community/huayno/interface.py +++ b/src/amuse/community/huayno/interface.py @@ -152,6 +152,21 @@ def set_eps2_parameter(): function.result_type = 'i' return function + @legacy_function + def get_accel_zero_mass_parameter(): + function = LegacyFunctionSpecification() + function.addParameter('accelerate_zero_mass', dtype='b', direction=function.OUT) + function.result_type = 'i' + return function + + @legacy_function + def set_accel_zero_mass_parameter(): + function = LegacyFunctionSpecification() + function.addParameter('accelerate_zero_mass', dtype='b', direction=function.IN) + function.result_type = 'i' + return function + + def set_eps2(self, e): return self.set_eps2_parameter(e) @@ -250,6 +265,14 @@ def define_parameters(self, handler): default_value = 0 ) + handler.add_method_parameter( + "get_accel_zero_mass_parameter", + "set_accel_zero_mass_parameter", + "accelerate_zero_mass", + "accelerate zero mass particles (should always be true, except for testing)", + default_value = True + ) + inttypes=sorted([(getattr(self.inttypes,t),t ) for i,t in enumerate(sorted(self.inttypes._list()))]) diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index 38deaa0426..6de65c53a4 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -21,7 +21,8 @@ int verbosity=0; FLOAT eps2; FLOAT dt_param; -struct sys zerosys ={0,NULL,NULL,NULL}; +struct sys zerosys ={0,0,NULL,NULL,NULL,NULL,NULL}; +int accel_zero_mass=1; struct diagnostics global_diag; struct diagnostics *diag; @@ -34,12 +35,14 @@ static void report(struct sys s,DOUBLE etime, int inttype); void move_system(struct sys s, DOUBLE dpos[3],DOUBLE dvel[3],int dir) { + struct particle *ipart; for(UINT p=0;ppos[i],ipart->pos_e[i],dir*dpos[i]) + COMPSUMV(ipart->vel[i],ipart->vel_e[i],dir*dvel[i]) } } } @@ -47,14 +50,16 @@ void move_system(struct sys s, DOUBLE dpos[3],DOUBLE dvel[3],int dir) void system_center_of_mass(struct sys s, DOUBLE *cmpos, DOUBLE *cmvel) { DOUBLE mass=0.,pos[3]={0.,0.,0.},vel[3]={0.,0.,0.}; + struct particle *ipart; for(UINT p=0;pmass*ipart->pos[i]; + vel[i]+=(DOUBLE) ipart->mass*ipart->vel[i]; } - mass+=(DOUBLE) s.part[p].mass; + mass+=(DOUBLE) ipart->mass; } for(int i=0;i<3;i++) { @@ -67,10 +72,14 @@ FLOAT system_kinetic_energy(struct sys s) { UINT i; DOUBLE e=0.; - for(i=0;imass*( ipart->vel[0]*ipart->vel[0]+ + ipart->vel[1]*ipart->vel[1]+ + ipart->vel[2]*ipart->vel[2] ); + } return (FLOAT) e; } @@ -78,7 +87,12 @@ FLOAT system_potential_energy(struct sys s) { UINT i; DOUBLE e=0.; - for(i=0;imass*ipart->pot; + } return (FLOAT) e/2; } @@ -100,16 +114,17 @@ void stop_code() void init_evolve(struct sys s,int inttype) { - UINT i; - for(i=0;ipostime=0.; + ipart->pot=0.; #ifdef COMPENSATED_SUMMP - s.part[i].pos_e[0]=0.;s.part[i].pos_e[1]=0.;s.part[i].pos_e[2]=0.; + ipart->pos_e[0]=0.;ipart->pos_e[1]=0.;ipart->pos_e[2]=0.; #endif #ifdef COMPENSATED_SUMMV - s.part[i].vel_e[0]=0.;s.part[i].vel_e[1]=0.;s.part[i].vel_e[2]=0.; + ipart->vel_e[0]=0.;ipart->vel_e[1]=0.;ipart->vel_e[2]=0.; #endif } potential(s,s); @@ -179,11 +194,12 @@ void sum_diagnostics(struct diagnostics* total,struct diagnostics* diag) void do_evolve(struct sys s, double dt, int inttype) { - UINT p; int i,clevel; + struct particle *ipart; if(dt==0) return; - for(p=0;ppostime=0.; clevel=0; + if(accel_zero_mass) split_zeromass(&s); zero_diagnostics(diag); switch (inttype) { @@ -275,8 +291,7 @@ void do_evolve(struct sys s, double dt, int inttype) evolve_ok2(clevel,s, zeroforces, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt,1); break; case KEPLER: - if(s.n==2) evolve_kepler(clevel,s,(DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt); - else evolve_kepler_test(clevel,s,(DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt); + evolve_kepler(clevel,s,(DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt); break; case FOURTH_M4: evolve_sf_4m4(clevel,s,(DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt,1); @@ -303,20 +318,21 @@ void do_evolve(struct sys s, double dt, int inttype) ENDRUN("unknown integrator\n"); break; } - for(p=0;ppot=0; potential(s,s); if(verbosity>0) report(s,(DOUBLE) dt, inttype); } void drift(int clevel,struct sys s, DOUBLE etime, DOUBLE dt) { - UINT i; - for(i=0;ipos[0],ipart->pos_e[0],dt*ipart->vel[0]) + COMPSUMP(ipart->pos[1],ipart->pos_e[1],dt*ipart->vel[1]) + COMPSUMP(ipart->pos[2],ipart->pos_e[2],dt*ipart->vel[2]) + ipart->postime=etime; } diag->dstep[clevel]++; diag->dcount[clevel]+=s.n; @@ -325,47 +341,49 @@ void drift(int clevel,struct sys s, DOUBLE etime, DOUBLE dt) static void kick_cpu(struct sys s1, struct sys s2, DOUBLE dt) { - UINT i,j; FLOAT dx[3],dr3,dr2,dr,acci; FLOAT acc[3]; + struct particle *ipart, *jpart; -#pragma omp parallel for if((ULONG) s1.n*s2.n>MPWORKLIMIT && !omp_in_parallel()) default(none) \ - private(i,j,dx,dr3,dr2,dr,acc,acci) \ +#pragma omp parallel for if((ULONG) s1.n*(s2.n-s2.nzero)>MPWORKLIMIT && !omp_in_parallel()) default(none) \ + private(dx,dr3,dr2,dr,acc,acci,ipart,jpart) \ shared(dt,s1,s2,eps2) - for(i=0;ipos[0]-jpart->pos[0]; + dx[1]=ipart->pos[1]-jpart->pos[1]; + dx[2]=ipart->pos[2]-jpart->pos[2]; dr2=dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2]+eps2; if(dr2>0) { dr=sqrt(dr2); dr3=dr*dr2; - acci=s2.part[j].mass/dr3; + acci=jpart->mass/dr3; acc[0]-=dx[0]*acci; acc[1]-=dx[1]*acci; acc[2]-=dx[2]*acci; } } - COMPSUMV(s1.part[i].vel[0],s1.part[i].vel_e[0],dt*acc[0]); - COMPSUMV(s1.part[i].vel[1],s1.part[i].vel_e[1],dt*acc[1]); - COMPSUMV(s1.part[i].vel[2],s1.part[i].vel_e[2],dt*acc[2]); + COMPSUMV(ipart->vel[0],ipart->vel_e[0],dt*acc[0]); + COMPSUMV(ipart->vel[1],ipart->vel_e[1],dt*acc[1]); + COMPSUMV(ipart->vel[2],ipart->vel_e[2],dt*acc[2]); } } void kick(int clevel,struct sys s1, struct sys s2, DOUBLE dt) { #ifdef EVOLVE_OPENCL - if((ULONG) s1.n*s2.n>CLWORKLIMIT) + if((ULONG) s1.n*(s2.n-s2.nzero)>CLWORKLIMIT) { #pragma omp critical kick_cl(s1,s2,dt); @@ -387,37 +405,39 @@ void kick(int clevel,struct sys s1, struct sys s2, DOUBLE dt) static void potential_cpu(struct sys s1,struct sys s2) { - UINT i,j; FLOAT dx[3],dr2,dr; FLOAT pot; + struct particle *ipart, *jpart; -#pragma omp parallel for if((ULONG) s1.n*s2.n>MPWORKLIMIT && !omp_in_parallel()) default(none) \ - private(i,j,dx,dr2,dr,pot) \ +#pragma omp parallel for if((ULONG) s1.n*(s2.n-s2.nzero)>MPWORKLIMIT && !omp_in_parallel()) default(none) \ + private(dx,dr2,dr,pot, ipart,jpart) \ shared(s1,s2,eps2) - for(i=0;ipos[0]-jpart->pos[0]; + dx[1]=ipart->pos[1]-jpart->pos[1]; + dx[2]=ipart->pos[2]-jpart->pos[2]; dr2=dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2]+eps2; if(dr2>0) { dr=sqrt(dr2); - pot-=s2.part[j].mass/dr; + pot-=jpart->mass/dr; } } - s1.part[i].pot+=pot; + ipart->pot+=pot; } } void potential(struct sys s1, struct sys s2) { #ifdef EVOLVE_OPENCL - if((ULONG) s1.n*s2.n>CLWORKLIMIT) + if((ULONG) s1.n*(s2.n-s2.nzero)>CLWORKLIMIT) { #pragma omp critical potential_cl(s1,s2); @@ -477,21 +497,24 @@ inline FLOAT timestep_ij(struct particle *i, struct particle *j,int dir) { static void timestep_cpu(struct sys s1, struct sys s2,int dir) { - UINT i,j; + UINT i,j, jmax; FLOAT timestep,tau; -#pragma omp parallel for if((ULONG) s1.n*s2.n>MPWORKLIMIT && !omp_in_parallel()) default(none) \ - private(i,j,tau,timestep) copyin(dt_param) \ + struct particle *ipart; +#pragma omp parallel for if((ULONG) (s1.n*s2.n-s1.nzero*s2.nzero)>MPWORKLIMIT && !omp_in_parallel()) default(none) \ + private(i,j,tau,timestep, jmax, ipart) copyin(dt_param) \ shared(s1,s2,stdout,dir) for(i=0;i=s1.n-s1.nzero) jmax=s2.n-s2.nzero; + for(j=0;jtimestep=timestep; } } @@ -518,6 +541,7 @@ static void report(struct sys s,DOUBLE etime, int inttype) int maxlevel=0,i; long int ktot=0,dtot=0, kstot=0,dstot=0,ttot=0,tstot=0; UINT n,p,err=0; + struct particle *ipart; n=s.n; printf("** report **\n"); printf("interaction counts:\n"); @@ -541,10 +565,7 @@ static void report(struct sys s,DOUBLE etime, int inttype) printf("steps: %18li, equiv: %18li, maxlevel: %i\n", diag->deepsteps,((long) 1)<postime != (DOUBLE) etime) err++; printf("postime errors: %u \n",err); printf("target time, actual time: %12.8g %12.8g %12.8g\n", (double) etime,(double) diag->simtime,(double) ((DOUBLE) etime-diag->simtime)); @@ -596,26 +617,49 @@ static void report(struct sys s,DOUBLE etime, int inttype) fflush(stdout); } -struct sys join(struct sys s1,struct sys s2) +void join_array(UINT n1, struct particle *p1, struct particle *l1, + UINT n2, struct particle *p2, struct particle *l2, + UINT *n, struct particle **p, struct particle **l) { - struct sys s=zerosys; - if(s1.n == 0) return s2; - if(s2.n == 0) return s1; - s.n=s1.n+s2.n; - if(s1.part+s1.n == s2.part) + if(n1==0 && n2==0) { - s.part=s1.part; - s.last=s2.last; - } else + *n=0;*p=NULL;*l=NULL; + } + if(n1!=0 && n2==0) + { + *n=n1;*p=p1;*l=l1; + } + if(n1==0 && n2!=0) + { + *n=n2;*p=p2;*l=l2; + } + if(n1!=0 && n2!=0) { - if(s2.part+s2.n == s1.part) + *n=n1+n2; + if(l1+1==p2) { - s.part=s2.part; - s.last=s1.last; - } else - ENDRUN("join error 1"); - } - if(s.last-s.part + 1 != s.n) ENDRUN("join error 2"); + *p=p1;*l=l2; + } + else + { + if(l2+1==p1) + { + *p=p2;*l=l1; + } else ENDRUN("join_array error"); + } + } +} + +struct sys join(struct sys s1,struct sys s2) +{ + struct sys s=zerosys; + if(s1.n==0) return s2; + if(s2.n==0) return s1; + join_array(s1.n-s1.nzero, s1.part, s1.last, s2.n-s2.nzero, s2.part,s2.last, &s.n, &s.part, &s.last); + join_array(s1.nzero, s1.zeropart, s1.lastzero, s2.nzero, s2.zeropart,s2.lastzero, &s.nzero, &s.zeropart, &s.lastzero); + s.n=s.n+s.nzero; + if(s.n-s.nzero>0 && s.last-s.part + 1 != s.n-s.nzero) ENDRUN("join error 1"); + if(s.nzero>0 && s.lastzero-s.zeropart + 1 != s.nzero) ENDRUN("join error 2"); return s; } @@ -624,9 +668,11 @@ FLOAT global_timestep(struct sys s) UINT i; FLOAT mindt; mindt=HUGE_VAL; + struct particle *ipart; for(i=0;is.part[i].timestep) mindt=s.part[i].timestep; + ipart=GETPART(s, i); + if(mindt>ipart->timestep) mindt=ipart->timestep; } return mindt; } @@ -647,3 +693,42 @@ void dkd(int clevel,struct sys s1,struct sys s2, DOUBLE stime, DOUBLE etime, DOU if(s2.n>0) kick(clevel,s2, s1, dt); drift(clevel,s1,etime, dt/2); } + +void split_zeromass(struct sys *s) +{ + UINT i=0; + struct particle *left, *right; + if(s->n==0) return; + if(s->n-s->nzero==0) + { + if(s->lastzero-s->zeropart+1!=s->nzero) ENDRUN( "split_zeromass malformed input sys"); + return; + } + if(s->nzero!=0 && s->last+1!=s->zeropart) + ENDRUN("split_zeromass can only work on fully contiguous sys"); + left=s->part; + right=s->part+(s->n-1); + while(1) + { + if(i>=s->n) ENDRUN("split_zeromass error 1"); + i++; + while(left->mass!=0 && leftmass==0 && leftmass!=0) left++; + s->nzero=s->n-(left-s->part); + if(s->nzero<0) ENDRUN("split_zeromass find negative number of part"); + if(s->nzero>0) + { + s->last=left-1; + s->zeropart=left; + s->lastzero=left+(s->nzero-1); + } + if((left-s->part)+s->nzero !=s->n) ENDRUN( "split_zeromass error 2"); + for(i=0;i<(s->n-s->nzero);i++) if(s->part[i].mass==0) ENDRUN ("split_zromass error 3"); + for(i=s->n-s->nzero;in;i++) if(s->part[i].mass!=0) ENDRUN ("split_zeromass error 4"); +} diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index 6a18923f96..0a13fb1e27 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -46,12 +46,16 @@ struct jparticle struct sys { - UINT n; - struct particle *part; - struct particle *last; + UINT n, nzero; // n=total particles, nzero=# zero mass particles + struct particle *part; // start of particles (non-zero mass) + struct particle *zeropart; // start of zero mass particles + struct particle *last; // last of non-zero mass; not needed, serves mainly to check consistency + struct particle *lastzero; // last of zero mass; not needed, serves mainly to check consistency struct sys *next_cc; // used in the CC split only }; +#define GETPART(s, i) (ipos[0]; + bs.part[i].pos[1]=ipart->pos[1]; + bs.part[i].pos[2]=ipart->pos[2]; + bs.part[i].vel[0]=ipart->vel[0]; + bs.part[i].vel[1]=ipart->vel[1]; + bs.part[i].vel[2]=ipart->vel[2]; } } void bsys_to_sys(struct bsys bs, struct sys s) { UINT i; + struct particle* ipart; if(s.n!=bs.n) ENDRUN("bsys to sys copy mismatch %d,%d\n",s.n,bs.n); if(!bs.part || !s.part) ENDRUN("bsys to sys unallocated error\n"); for(i=0;ipos[0]=bs.part[i].pos[0]; + ipart->pos[1]=bs.part[i].pos[1]; + ipart->pos[2]=bs.part[i].pos[2]; + ipart->vel[0]=bs.part[i].vel[0]; + ipart->vel[1]=bs.part[i].vel[1]; + ipart->vel[2]=bs.part[i].vel[2]; } } @@ -135,14 +139,16 @@ static int BulirschStoer(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DO struct bsys bsys_array1[JMAX]; struct bsys *jline; struct bsys *j1line; - struct sys tmpsys; + struct sys tmpsys=zerosys; int j,k; UINT i; DOUBLE error; + struct particle *ipart; - tmpsys.n=s.n; + tmpsys.n=s.n; // todo: fix zero mass part, though it will work as is tmpsys.part=(struct particle*) malloc(s.n*sizeof(struct particle)); if(!tmpsys.part) ENDRUN("failed allocation of tmpsys\n"); + tmpsys.last=tmpsys.part+tmpsys.n-1; ZEROBSYS_ARRAY(bsys_array) ZEROBSYS_ARRAY(bsys_array1) @@ -164,7 +170,7 @@ static int BulirschStoer(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DO } SWAP(jline,j1line, struct bsys*) - for(i=0;ibsstep[clevel]+=1; diag->jcount[clevel]+=j; bsys_to_sys(jline[j-1],tmpsys); - for(i=0;ipos_e[0]=0.; + ipart->pos_e[1]=0.; + ipart->pos_e[2]=0.; #endif #ifdef COMPENSATED_SUMMV - for(i=0;ivel_e[0]=0.; + ipart->vel_e[1]=0.; + ipart->vel_e[2]=0.; #endif + } FREEBSYS_ARRAY(bsys_array); FREEBSYS_ARRAY(bsys_array1); free(tmpsys.part); @@ -274,7 +284,7 @@ static void ndkd(int clevel,int n,struct sys s1,struct sys s2, DOUBLE stime, DOU static void n_cc_kepler(int clevel,int n,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { UINT id; - struct sys tmpsys; + struct sys tmpsys=zerosys; FLOAT odt_param=dt_param; dt_param=dt_param/n; tmpsys.n=s.n; diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index 074b596e2e..496dd13d54 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -18,8 +18,8 @@ //#define CC_DEBUG // perform (time-consuming, but thorough) CC sanity checks -#define IS_ZEROSYS(SYS) (((SYS)->n == 0) && ((SYS)->part == NULL) && ((SYS)->last == NULL) && ((SYS)->next_cc == NULL)) -#define IS_ZEROSYSs(SYS) (((SYS).n == 0) && ((SYS).part == NULL) && ((SYS).last == NULL) && ((SYS).next_cc == NULL)) +#define IS_ZEROSYS(SYS) (((SYS)->n == 0) && ((SYS)->nzero == 0) && ((SYS)->part == NULL) && ((SYS)->last == NULL) && ((SYS)->next_cc == NULL)) +#define IS_ZEROSYSs(SYS) (((SYS).n == 0) && ((SYS).nzero == 0) && ((SYS).part == NULL) && ((SYS).last == NULL) && ((SYS).next_cc == NULL)) #define LOG_CC_SPLIT(C, R) \ { \ LOG("clevel = %d s.n = %d c.n = {", clevel, s.n); \ @@ -41,6 +41,8 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) dt=fabs(dt); diag->tstep[clevel]++; // not directly comparable to corresponding SF-split statistics struct sys *c_next; + if(s.nzero>0 && s.n!=s.nzero && s.zeropart!=s.last+1) + ENDRUN("split_cc only works on contiguous systems"); c_next = c; *c_next = zerosys; UINT processed = 0; // increase if something is added from the stack to the cc @@ -115,7 +117,7 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) if (r->n > 0) { r->part = &( s.part[rest_next + 1] ); - r->last = s.last; + r->last = s.part+s.n-1; } else { @@ -392,7 +394,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, // Independently integrate every C_i at reduced pivot time step h/2 (1st time) int nc=0; for (struct sys *ci = &c; !IS_ZEROSYS(ci); ci = ci->next_cc) nc++; - + if(nc>1 || r.n>0) recentersub=1; for (struct sys *ci = &c; !IS_ZEROSYS(ci); ci = ci->next_cc) @@ -404,7 +406,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, diag->taskcount[clevel]+=ci->n; #pragma omp task firstprivate(clevel,ci,stime,dt,recentersub) untied { - struct sys lsys; + struct sys lsys=zerosys; lsys.n=ci->n; struct particle* lpart=(struct particle*) malloc(lsys.n*sizeof(struct particle)); lsys.part=lpart;lsys.last=lpart+lsys.n-1; @@ -437,6 +439,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, } } + if(r.n>0 && accel_zero_mass) split_zeromass(&r); // kick c <-> rest (eq 24) if(r.n>0) for (struct sys *ci = &c; !IS_ZEROSYS(ci); ci = ci->next_cc) { @@ -458,7 +461,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, diag->taskcount[clevel]+=ci->n; #pragma omp task firstprivate(clevel,ci,stime,etime,dt,recentersub) untied { - struct sys lsys; + struct sys lsys=zerosys; lsys.n=ci->n; struct particle* lpart=(struct particle*) malloc(lsys.n*sizeof(struct particle)); lsys.part=lpart;lsys.last=lpart+lsys.n-1; @@ -482,5 +485,8 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, } free_sys(c.next_cc); + + if(accel_zero_mass) split_zeromass(&s); + } #undef TASKCONDITION diff --git a/src/amuse/community/huayno/src/evolve_cl.c b/src/amuse/community/huayno/src/evolve_cl.c index e63cbefe38..8b07e5625b 100644 --- a/src/amuse/community/huayno/src/evolve_cl.c +++ b/src/amuse/community/huayno/src/evolve_cl.c @@ -35,6 +35,7 @@ void kick_cl(struct sys s1, struct sys s2, DOUBLE dt) int i; int groupsize,nthread,blocksize; CLFLOAT cleps2=(CLFLOAT) eps2; + struct particle *ipart; if(s1.n==0 || s2.n==0) return; @@ -51,18 +52,20 @@ void kick_cl(struct sys s1, struct sys s2, DOUBLE dt) for(i=0;ipos[0]; + ipos[i].s1=ipart->pos[1]; + ipos[i].s2=ipart->pos[2]; + ipos[i].s3=ipart->mass; } for(i=s1.n;ipos[0]; + jpos[i].s1=ipart->pos[1]; + jpos[i].s2=ipart->pos[2]; + jpos[i].s3=ipart->mass; } for(i=0;ivel[0]+=dt*acc[i].s0; + ipart->vel[1]+=dt*acc[i].s1; + ipart->vel[2]+=dt*acc[i].s2; } clfree( ipos); @@ -98,7 +102,8 @@ void timestep_cl(struct sys s1, struct sys s2,int dir) int groupsize,nthread,blocksize; CLFLOAT cleps2=(CLFLOAT) eps2; CLFLOAT cldtparam=(CLFLOAT) dt_param; - + struct particle *ipart; + if(s1.n==0 || s2.n==0) return; groupsize=NTHREAD; @@ -114,21 +119,24 @@ void timestep_cl(struct sys s1, struct sys s2,int dir) CLFLOAT4* jpos = (CLFLOAT4*) clmalloc(CLCONTEXT,s2.n*sizeof(CLFLOAT4),0); CLFLOAT4* jvel = (CLFLOAT4*) clmalloc(CLCONTEXT,s2.n*sizeof(CLFLOAT4),0); + // an unnecessary block of nzero*nzero timesteps is calculated for(i=0;ipos[0]; ivel[i].s0=ipart->vel[0]; + ipos[i].s1=ipart->pos[1]; ivel[i].s1=ipart->vel[1]; + ipos[i].s2=ipart->pos[2]; ivel[i].s2=ipart->vel[2]; + ipos[i].s3=ipart->mass; ivel[i].s3=0.; } for(i=s1.n;ipos[0]; jvel[i].s0=ipart->vel[0]; + jpos[i].s1= ipart->pos[1]; jvel[i].s1=ipart->vel[1]; + jpos[i].s2= ipart->pos[2]; jvel[i].s2=ipart->vel[2]; + jpos[i].s3= ipart->mass; jvel[i].s3=0.; } for(i=0;itimestep=timestep[i]; clfree( ipos); clfree( ivel); @@ -172,6 +177,7 @@ void potential_cl(struct sys s1, struct sys s2) int i; int groupsize,nthread,blocksize; CLFLOAT cleps2=(CLFLOAT) eps2; + struct particle *ipart; if(s1.n==0 || s2.n==0) return; @@ -188,18 +194,20 @@ void potential_cl(struct sys s1, struct sys s2) for(i=0;ipos[0]; + ipos[i].s1=ipart->pos[1]; + ipos[i].s2=ipart->pos[2]; + ipos[i].s3=ipart->mass; } for(i=s1.n;ipos[0]; + jpos[i].s1=ipart->pos[1]; + jpos[i].s2=ipart->pos[2]; + jpos[i].s3=ipart->mass; } for(i=0;ipot+=pot[i]; clfree( ipos); clfree( jpos); diff --git a/src/amuse/community/huayno/src/evolve_kepler.c b/src/amuse/community/huayno/src/evolve_kepler.c index 03242674cd..6da76a0651 100644 --- a/src/amuse/community/huayno/src/evolve_kepler.c +++ b/src/amuse/community/huayno/src/evolve_kepler.c @@ -9,7 +9,8 @@ #include "evolve_shared.h" #include "universal_kepler_solver.h" -void evolve_kepler(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { +static void evolve_kepler_2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) +{ CHECK_TIMESTEP(etime,stime,dt,clevel); if (s.n != 2) ENDRUN("two-body solver was called with sys.n=%u\n", s.n); // translate coordinates original frame to 2-body frame @@ -47,31 +48,94 @@ void evolve_kepler(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE d diag->cecount[clevel]++; } +static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) +{ + int k; + struct particle *ipart; + DOUBLE dpos[3],dpos0[3],pos_cm[3]; + DOUBLE dvel[3],dvel0[3],vel_cm[3]; + + CHECK_TIMESTEP(etime,stime,dt,clevel); + if (s.n-s.nzero > 1) ENDRUN("kepler-n solver was called with too many massive particles sys.n=%u\n", s.n-s.nzero); + if(s.n==s.nzero) + { + drift(clevel,s,etime, dt); + return; + } + for(UINT i=1;ipos[k] - ipart->pos[k]; + for(k=0;k<3;k++) dvel0[k] = s.part->vel[k] - ipart->vel[k]; + int err=universal_kepler_solver(dt,s.part->mass,eps2, + dpos0[0],dpos0[1],dpos0[2], + dvel0[0],dvel0[1],dvel0[2], + &dpos[0],&dpos[1],&dpos[2], + &dvel[0],&dvel[1],&dvel[2]); + if (err != 0) ENDRUN("kepler solver failure"); // failure of the kepler solver should be very rare now + + for(k=0;k<3;k++) s.part->pos[k]+= s.part->vel[k]*dt; + for(k=0;k<3;k++) ipart->pos[k] = s.part->pos[k] - dpos[k]; + for(k=0;k<3;k++) ipart->vel[k] = s.part->vel[k] - dvel[k]; + } + diag->cecount[clevel]+=s.nzero; +} + + // special solver to test kepler -void evolve_kepler_test(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { - struct particle *central=s.part; +static void evolve_kepler_test(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) +{ + struct particle *central; + struct particle *ipart; struct particle p[2]; struct sys s2; CHECK_TIMESTEP(etime,stime,dt,clevel); + if (s.n <= 1) ENDRUN("kepler test solver was called with too few massive particles sys.n=%u\n this hsouldn't happen\n", s.n); - for(struct particle *i=s.part+1;i<=s.last;i++) if(central->massmass) central=i; + central=s.part; + for(UINT i=1;imassmass) central=ipart; + } - for(struct particle *i=s.part;i<=s.last;i++) + for(UINT i=0;imass+i->mass; + p[0].mass=central->mass+ipart->mass; p[1].mass=0; s2.n=2; s2.part=&p[0]; s2.last=&p[1]; - evolve_kepler(clevel,s2,stime,etime,dt); - p[1].mass=i->mass; - *i=p[1]; + evolve_kepler_2(clevel,s2,stime,etime,dt); + p[1].mass=ipart->mass; + *ipart=p[1]; } for(int k=0;k<3;k++) central->pos[k]+=central->vel[k]*dt; } + +void evolve_kepler(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) +{ + if(s.n==2) // 2 body + { + evolve_kepler_2(clevel,s,stime,etime,dt); + return; + } + if(s.n-s.nzero==1 && s.nzero>0) // 1 massive, n orbiters + { + evolve_kepler_n(clevel,s,stime,etime,dt); + return; + } + if(s.n-s.nzero>1) // more than 1 massive particle, consider heaviest as central; + { + evolve_kepler_test(clevel,s,stime,etime,dt); + return; + } + drift(clevel,s,etime, dt); // 1 massive or only zero mass +} diff --git a/src/amuse/community/huayno/src/evolve_kepler.h b/src/amuse/community/huayno/src/evolve_kepler.h index ada36be138..1e8375ca67 100644 --- a/src/amuse/community/huayno/src/evolve_kepler.h +++ b/src/amuse/community/huayno/src/evolve_kepler.h @@ -1,2 +1 @@ void evolve_kepler(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt); -void evolve_kepler_test(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt); diff --git a/src/amuse/community/huayno/src/evolve_ok.c b/src/amuse/community/huayno/src/evolve_ok.c index 095cade059..9daa6a6ccc 100644 --- a/src/amuse/community/huayno/src/evolve_ok.c +++ b/src/amuse/community/huayno/src/evolve_ok.c @@ -100,8 +100,8 @@ void evolve_ok_init(struct sys s) { if (i != j) { - ok_main_forces.forc[k].parti = &( s.part[i] ); - ok_main_forces.forc[k].partj = &( s.part[j] ); + ok_main_forces.forc[k].parti = GETPART(s,i); + ok_main_forces.forc[k].partj = GETPART(s,j); k++; } } diff --git a/src/amuse/community/huayno/src/evolve_sf.c b/src/amuse/community/huayno/src/evolve_sf.c index c83e075d6c..1e9ad02d8a 100644 --- a/src/amuse/community/huayno/src/evolve_sf.c +++ b/src/amuse/community/huayno/src/evolve_sf.c @@ -165,13 +165,15 @@ static void drift_naive(int clevel,struct sys s, DOUBLE etime) { UINT i; DOUBLE dt; + struct particle *ipart; for(i=0;ipostime; + COMPSUMP(ipart->pos[0],ipart->pos_e[0],dt*ipart->vel[0]) + COMPSUMP(ipart->pos[1],ipart->pos_e[1],dt*ipart->vel[1]) + COMPSUMP(ipart->pos[2],ipart->pos_e[2],dt*ipart->vel[2]) + ipart->postime=etime; } diag->dstep[clevel]++; diag->dcount[clevel]+=s.n; @@ -314,17 +316,15 @@ void evolve_sf_4m4(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE d #undef D1 #undef D2 - -static void split(FLOAT dt, struct sys s, struct sys *slow, struct sys *fast) +// partitions a contiguous array bordered by left and right according to a pivot timestep dt +static struct particle *partition(FLOAT dt, struct particle *left, struct particle *right) { - UINT i=0; - struct particle *left, *right; - left=s.part; - right=s.last; + UINT i,n; dt=fabs(dt); + n=right-left+1; while(1) { - if(i>=s.n) ENDRUN("split error 1"); + if(i>=n) ENDRUN("partition error"); i++; while(left->timestep
timestep>=dt && lefttimestepn=s.last-left+1; - fast->n=(left-s.part); - if(fast->n==1) + return left; +} + +static void split(FLOAT dt, struct sys s, struct sys *slow, struct sys *fast) +{ + struct particle *left, *right, *pivot; + slow->n=0; + fast->n=0; + if(s.n-s.nzero>0) + { + left=s.part; + right=s.last; + pivot=partition(dt, left, right); + slow->n=right-pivot+1; + fast->n=(pivot-left); + } + slow->nzero=0; + fast->nzero=0; + if(s.nzero>0) + { + left=s.zeropart; + right=s.lastzero; + pivot=partition(dt, left, right); + slow->nzero=right-pivot+1; + fast->nzero=(pivot-left); + slow->n+=slow->nzero; + fast->n+=fast->nzero; + } + if(fast->n<=1) { - fast->n=0; + *fast=zerosys; slow->n=s.n; + slow->nzero=s.nzero; } - if(slow->n > 0) + if(slow->n>0) { - slow->part=s.part+fast->n; - slow->last=s.last;//slow->part+slow->n-1; + slow->part=s.part+fast->n-fast->nzero; + slow->last=s.last; } - if(fast->n > 0) + if(fast->n>0) { fast->part=s.part; - fast->last=s.part+fast->n-1; + fast->last=s.part+(fast->n-fast->nzero)-1; + } + if(slow->nzero>0) + { + slow->zeropart=s.zeropart+fast->nzero; + slow->lastzero=s.lastzero; + } + if(fast->nzero > 0) + { + fast->zeropart=s.zeropart; + fast->lastzero=s.zeropart+fast->nzero-1; } if(fast->n+slow->n !=s.n) ENDRUN( "split error 2"); + if(fast->nzero+slow->nzero !=s.nzero) ENDRUN( "split error 3"); } diff --git a/src/amuse/community/huayno/src/evolve_shared_collisions.c b/src/amuse/community/huayno/src/evolve_shared_collisions.c index 6a79476faa..a51c7c1ff6 100644 --- a/src/amuse/community/huayno/src/evolve_shared_collisions.c +++ b/src/amuse/community/huayno/src/evolve_shared_collisions.c @@ -46,25 +46,27 @@ static int update_steps_and_get_next_level(int *steps, int current_level) { static void detect_collisions(struct sys s) { UINT i, j; FLOAT dx[3], dr2, radius_sum; - + struct particle *ipart, *jpart; #pragma omp parallel for if((ULONG) s.n*s.n>MPWORKLIMIT && !omp_in_parallel()) default(none) \ - private(i,j,dx,dr2,radius_sum) \ + private(i,j,dx,dr2,radius_sum, ipart, jpart) \ shared(s) for (i=0; ipos[0] - jpart->pos[0]; + dx[1] = ipart->pos[1] - jpart->pos[1]; + dx[2] = ipart->pos[2] - jpart->pos[2]; dr2 = dx[0]*dx[0] + dx[1]*dx[1] + dx[2]*dx[2]; - radius_sum = s.part[i].radius + s.part[j].radius; + radius_sum = ipart->radius + jpart->radius; if (dr2 <= radius_sum*radius_sum) { #pragma omp critical { int stopping_index = next_index_for_stopping_condition(); if (stopping_index >= 0) { set_stopping_condition_info(stopping_index, COLLISION_DETECTION); - set_stopping_condition_particle_index(stopping_index, 0, s.part[i].id); - set_stopping_condition_particle_index(stopping_index, 1, s.part[j].id); + set_stopping_condition_particle_index(stopping_index, 0, ipart->id); + set_stopping_condition_particle_index(stopping_index, 1, jpart->id); } } } diff --git a/src/amuse/test/suite/codes_tests/test_huayno.py b/src/amuse/test/suite/codes_tests/test_huayno.py index bfc03fcdd2..a3466b3401 100644 --- a/src/amuse/test/suite/codes_tests/test_huayno.py +++ b/src/amuse/test/suite/codes_tests/test_huayno.py @@ -377,17 +377,23 @@ def test14(self): "PPASS_DKD", "BRIDGE_KDK", "BRIDGE_DKD", "CC", "CC_KEPLER", "CC_BS", "CC_BSA", "OK", "SHAREDBS", "SHARED4", "SHARED6", "SHARED8", - "SHARED10", "SHAREDBS"] + "SHARED10"] + + #~ test_set=["CONSTANT"] + for itype in test_set: + print() + print(itype) instance = Huayno() instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] + #~ instance.parameters.accelerate_zero_mass=False instance.particles.add_particles(particles) E1=instance.kinetic_energy+instance.potential_energy instance.evolve_model(0.125 | nbody_system.time) E2=instance.kinetic_energy+instance.potential_energy if itype!="CONSTANT": - self.assertLess((E2-E1).number, 1.e-5) + self.assertLess(abs(E2-E1).number, 1.e-5) part_out= instance.particles.copy() position = part_out.position.number @@ -402,7 +408,7 @@ def test14(self): # this result is probably dependent on system architecture hence no good for assert print() print(sha.hexdigest()) - print("7e63d59b807a12d92671ea6da9777e262fa8e2f9") + print("a43263ea713b4b944513d0597e5651a07114eefc") def test14b(self): import hashlib @@ -420,17 +426,22 @@ def test14b(self): "OK", "SHAREDBS", "SHARED4", "SHARED6", "SHARED8", "SHARED10", "SHAREDBS"] + #~ test_set=["CC_BS"] + for itype in test_set: - instance = Huayno() + print() + print(itype) + instance = Huayno(redirection="none") instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] + instance.parameters.accelerate_zero_mass=False instance.particles.add_particles(particles) instance.particles.add_particles(p2) E1=instance.kinetic_energy+instance.potential_energy instance.evolve_model(0.125 | nbody_system.time) E2=instance.kinetic_energy+instance.potential_energy - if itype!="CONSTANT": - self.assertLess((E2-E1).number, 1.e-5) - + #~ if itype!="CONSTANT": + #~ self.assertLess(abs(E2-E1).number, 1.e-5) + print((E2-E1).number) part_out= instance.particles.copy() position = part_out.position.number if hasattr(position,'tobytes'): From c921aa451cf30bb653bc106de249721ca23efcb0 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Wed, 14 Jul 2021 17:17:06 +0200 Subject: [PATCH 03/35] some fixes --- src/amuse/community/huayno/interface.c | 10 +++- src/amuse/community/huayno/interface.py | 4 +- src/amuse/community/huayno/src/evolve.c | 6 ++- .../community/huayno/src/evolve_kepler.c | 31 +++++++---- .../test/suite/codes_tests/test_huayno.py | 53 +++++++++++++++++-- 5 files changed, 85 insertions(+), 19 deletions(-) diff --git a/src/amuse/community/huayno/interface.c b/src/amuse/community/huayno/interface.c index ba72d2c97d..7c24bbec1e 100644 --- a/src/amuse/community/huayno/interface.c +++ b/src/amuse/community/huayno/interface.c @@ -69,6 +69,11 @@ int cleanup_code() return 0; } +void refresh_lookup() +{ + for(int p=0;pvel_e[0]=0.;ipart->vel_e[1]=0.;ipart->vel_e[2]=0.; #endif } + if(accel_zero_mass) split_zeromass(&s); // because of potential calc potential(s,s); evolve_ok_stop(); @@ -357,7 +358,7 @@ static void kick_cpu(struct sys s1, struct sys s2, DOUBLE dt) for(UINT j=0;jpos[0]-jpart->pos[0]; dx[1]=ipart->pos[1]-jpart->pos[1]; @@ -569,7 +570,8 @@ static void report(struct sys s,DOUBLE etime, int inttype) printf("postime errors: %u \n",err); printf("target time, actual time: %12.8g %12.8g %12.8g\n", (double) etime,(double) diag->simtime,(double) ((DOUBLE) etime-diag->simtime)); - printf("time track, ratio: %12.8g %12.8g\n", (double) diag->timetrack,(double) (diag->timetrack/diag->simtime)); + printf("time track, ratio: %12.8g %12.8g\n", (double) diag->timetrack, + (double) (diag->simtime!=0? (diag->timetrack/diag->simtime) :1)); #ifdef EVOLVE_OPENCL printf("cpu step,count: %12li,%18li\n",diag->cpu_step,diag->cpu_count); diff --git a/src/amuse/community/huayno/src/evolve_kepler.c b/src/amuse/community/huayno/src/evolve_kepler.c index 6da76a0651..35c63a0f6f 100644 --- a/src/amuse/community/huayno/src/evolve_kepler.c +++ b/src/amuse/community/huayno/src/evolve_kepler.c @@ -8,6 +8,9 @@ #include "evolve.h" #include "evolve_shared.h" #include "universal_kepler_solver.h" +#ifdef _OPENMP +#include +#endif static void evolve_kepler_2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { @@ -50,10 +53,10 @@ static void evolve_kepler_2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { - int k; struct particle *ipart; - DOUBLE dpos[3],dpos0[3],pos_cm[3]; - DOUBLE dvel[3],dvel0[3],vel_cm[3]; + DOUBLE dpos[3],dpos0[3],cmpos[3]; + DOUBLE dvel[3],dvel0[3]; + UINT err; CHECK_TIMESTEP(etime,stime,dt,clevel); if (s.n-s.nzero > 1) ENDRUN("kepler-n solver was called with too many massive particles sys.n=%u\n", s.n-s.nzero); @@ -62,22 +65,30 @@ static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, drift(clevel,s,etime, dt); return; } + for(int k=0;k<3;k++) cmpos[k]= s.part->pos[k] + s.part->vel[k]*dt; + + err=0; +#pragma omp parallel for if((ULONG) s.n>omp_get_num_threads() && !omp_in_parallel()) default(none) \ + private(ipart, dpos,dvel,dpos0,dvel0) shared(clevel, dt,cmpos, s, eps2) reduction(+: err) for(UINT i=1;ipos[k] - ipart->pos[k]; - for(k=0;k<3;k++) dvel0[k] = s.part->vel[k] - ipart->vel[k]; - int err=universal_kepler_solver(dt,s.part->mass,eps2, + for(int k=0;k<3;k++) dpos0[k] = s.part->pos[k] - ipart->pos[k]; + for(int k=0;k<3;k++) dvel0[k] = s.part->vel[k] - ipart->vel[k]; + err+=universal_kepler_solver(dt,s.part->mass,eps2, dpos0[0],dpos0[1],dpos0[2], dvel0[0],dvel0[1],dvel0[2], &dpos[0],&dpos[1],&dpos[2], &dvel[0],&dvel[1],&dvel[2]); - if (err != 0) ENDRUN("kepler solver failure"); // failure of the kepler solver should be very rare now - for(k=0;k<3;k++) s.part->pos[k]+= s.part->vel[k]*dt; - for(k=0;k<3;k++) ipart->pos[k] = s.part->pos[k] - dpos[k]; - for(k=0;k<3;k++) ipart->vel[k] = s.part->vel[k] - dvel[k]; + + for(int k=0;k<3;k++) ipart->pos[k] = cmpos[k] - dpos[k]; + for(int k=0;k<3;k++) ipart->vel[k] = s.part->vel[k] - dvel[k]; } + if (err != 0) { + ENDRUN("kepler solver failure"); // failure of the kepler solver should be very rare now + } + for(int k=0;k<3;k++) s.part->pos[k]=cmpos[k]; diag->cecount[clevel]+=s.nzero; } diff --git a/src/amuse/test/suite/codes_tests/test_huayno.py b/src/amuse/test/suite/codes_tests/test_huayno.py index a3466b3401..1865ef436a 100644 --- a/src/amuse/test/suite/codes_tests/test_huayno.py +++ b/src/amuse/test/suite/codes_tests/test_huayno.py @@ -413,10 +413,54 @@ def test14(self): def test14b(self): import hashlib + numpy.random.seed(123456) + particles = plummer.new_plummer_model(32) + p2 = plummer.new_plummer_model(32) + p2.mass*=0 + sha=hashlib.sha1() + + test_set=["CONSTANT", "SHARED2", "EXTRAPOLATE", + "PASS_KDK", "PASS_DKD", "HOLD_KDK", "HOLD_DKD", + "PPASS_DKD", "BRIDGE_KDK", "BRIDGE_DKD", + "CC", "CC_KEPLER", "CC_BS", "CC_BSA", + "OK", "SHAREDBS", "SHARED4", "SHARED6", "SHARED8", + "SHARED10"] + + for itype in test_set: + instance = Huayno() + instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] + instance.parameters.accelerate_zero_mass=False + instance.particles.add_particles(particles) + instance.particles.add_particles(p2) + E1=instance.kinetic_energy+instance.potential_energy + instance.evolve_model(0.125 | nbody_system.time) + E2=instance.kinetic_energy+instance.potential_energy + if itype!="CONSTANT": + self.assertLess((E2-E1).number, 1.e-5) + + part_out= instance.particles.copy() + position = part_out.position.number + if hasattr(position,'tobytes'): + as_bytes = position.tobytes() + else: + as_bytes = numpy.array(position.data, copy=True, order='C') + sha.update(as_bytes) + + instance.stop() + + # this result is probably dependent on system architecture hence no good for assert + print() + print(sha.hexdigest()) + print("36cf912fb27cbf8169aa6d58e592a29bca4d1991") + + def test14c(self): + import hashlib + numpy.random.seed(123456) particles = plummer.new_plummer_model(16) + particles.mass*=0.25 p2 = plummer.new_plummer_model(16) - p2.mass*=0 + p2.mass*=0.75 sha=hashlib.sha1() test_set=["CONSTANT", "SHARED2", "EXTRAPOLATE", @@ -424,7 +468,7 @@ def test14b(self): "PPASS_DKD", "BRIDGE_KDK", "BRIDGE_DKD", "CC", "CC_KEPLER", "CC_BS", "CC_BSA", "OK", "SHAREDBS", "SHARED4", "SHARED6", "SHARED8", - "SHARED10", "SHAREDBS"] + "SHARED10"] #~ test_set=["CC_BS"] @@ -433,7 +477,7 @@ def test14b(self): print(itype) instance = Huayno(redirection="none") instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] - instance.parameters.accelerate_zero_mass=False + instance.parameters.accelerate_zero_mass=True instance.particles.add_particles(particles) instance.particles.add_particles(p2) E1=instance.kinetic_energy+instance.potential_energy @@ -455,7 +499,8 @@ def test14b(self): # this result is probably dependent on system architecture hence no good for assert print() print(sha.hexdigest()) - print("9e2025989eead2b37198db730bbaa32fd7dd1051") + print("ac993b464a8efe1f56a57748259d6388ce1e1e44 ") + def test15(self): particles = plummer.new_plummer_model(512) From a5641f2e7a32d76af6ac50a28c3f1ed0e3328c1b Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Wed, 14 Jul 2021 17:25:36 +0200 Subject: [PATCH 04/35] fix error code --- src/amuse/community/huayno/src/evolve_kepler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve_kepler.c b/src/amuse/community/huayno/src/evolve_kepler.c index 35c63a0f6f..875f8494ec 100644 --- a/src/amuse/community/huayno/src/evolve_kepler.c +++ b/src/amuse/community/huayno/src/evolve_kepler.c @@ -69,13 +69,13 @@ static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, err=0; #pragma omp parallel for if((ULONG) s.n>omp_get_num_threads() && !omp_in_parallel()) default(none) \ - private(ipart, dpos,dvel,dpos0,dvel0) shared(clevel, dt,cmpos, s, eps2) reduction(+: err) + private(ipart, dpos,dvel,dpos0,dvel0) shared(clevel, dt,cmpos, s, eps2) reduction(|: err) for(UINT i=1;ipos[k] - ipart->pos[k]; for(int k=0;k<3;k++) dvel0[k] = s.part->vel[k] - ipart->vel[k]; - err+=universal_kepler_solver(dt,s.part->mass,eps2, + err|=universal_kepler_solver(dt,s.part->mass,eps2, dpos0[0],dpos0[1],dpos0[2], dvel0[0],dvel0[1],dvel0[2], &dpos[0],&dpos[1],&dpos[2], From f45b2572b5e358ff0de69f78f304af3eb9000083 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Wed, 14 Jul 2021 21:12:41 +0200 Subject: [PATCH 05/35] fixup of kepler evolve logic for various cases, allow termination of CC with kepler in case of heavy body with orbiters, fix error in time bookkeeping of CC --- src/amuse/community/huayno/src/evolve_cc.c | 7 +++- .../community/huayno/src/evolve_kepler.c | 41 ++++++++++++------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index 496dd13d54..c5ef6775d9 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -299,7 +299,10 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, struct sys c = zerosys, r = zerosys; CHECK_TIMESTEP(etime,stime,dt,clevel); - if (s.n == 2 && (inttype==CCC_KEPLER || inttype==CC_KEPLER)) + if(accel_zero_mass) split_zeromass(&s); + + if ((s.n == 2 || s.n-s.nzero<=1 )&& (inttype==CCC_KEPLER || inttype==CC_KEPLER)) + //~ if (s.n == 2 && (inttype==CCC_KEPLER || inttype==CC_KEPLER)) { evolve_kepler(clevel,s, stime, etime, dt); return; @@ -473,7 +476,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, } else #endif { - evolve_cc2(clevel+1,*ci, stime, stime+dt/2, dt/2,inttype,recentersub); + evolve_cc2(clevel+1,*ci, stime+dt/2, etime, dt/2,inttype,recentersub); } } #pragma omp taskwait diff --git a/src/amuse/community/huayno/src/evolve_kepler.c b/src/amuse/community/huayno/src/evolve_kepler.c index 875f8494ec..ac7d4c9bc2 100644 --- a/src/amuse/community/huayno/src/evolve_kepler.c +++ b/src/amuse/community/huayno/src/evolve_kepler.c @@ -14,22 +14,25 @@ static void evolve_kepler_2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { + struct particle *ipart,*jpart; CHECK_TIMESTEP(etime,stime,dt,clevel); if (s.n != 2) ENDRUN("two-body solver was called with sys.n=%u\n", s.n); // translate coordinates original frame to 2-body frame int k; + ipart=GETPART(s,0); + jpart=GETPART(s,1); DOUBLE dpos[3],dpos0[3],pos_cm[3]; DOUBLE dvel[3],dvel0[3],vel_cm[3]; - DOUBLE m1 = s.part->mass; - DOUBLE m2 = s.last->mass; - DOUBLE mtot = s.part->mass + s.last->mass; + DOUBLE m1 = ipart->mass; + DOUBLE m2 = jpart->mass; + DOUBLE mtot = ipart->mass + jpart->mass; DOUBLE f1 = m2 / mtot; DOUBLE f2 = m1 / mtot; if(mtot>0.) { - for(k=0;k<3;k++) dpos0[k] = s.part->pos[k] - s.last->pos[k]; - for(k=0;k<3;k++) dvel0[k] = s.part->vel[k] - s.last->vel[k]; - for(k=0;k<3;k++) pos_cm[k] = (m1 * s.part->pos[k] + m2 * s.last->pos[k]) / mtot; - for(k=0;k<3;k++) vel_cm[k] = (m1 * s.part->vel[k] + m2 * s.last->vel[k]) / mtot; + for(k=0;k<3;k++) dpos0[k] = ipart->pos[k] - jpart->pos[k]; + for(k=0;k<3;k++) dvel0[k] = ipart->vel[k] - jpart->vel[k]; + for(k=0;k<3;k++) pos_cm[k] = (m1 * ipart->pos[k] + m2 * jpart->pos[k]) / mtot; + for(k=0;k<3;k++) vel_cm[k] = (m1 * ipart->vel[k] + m2 * jpart->vel[k]) / mtot; // evolve center of mass for dt for(k=0;k<3;k++) pos_cm[k] += vel_cm[k] * dt; // call kepler solver @@ -40,14 +43,17 @@ static void evolve_kepler_2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, &dvel[0],&dvel[1],&dvel[2]); if (err != 0) ENDRUN("kepler solver failure"); // failure of the kepler solver should be very rare now // translate coordinates from 2-body frame to original frame - for(k=0;k<3;k++) s.part->pos[k] = pos_cm[k] + f1 * dpos[k]; - for(k=0;k<3;k++) s.part->vel[k] = vel_cm[k] + f1 * dvel[k]; - for(k=0;k<3;k++) s.last->pos[k] = pos_cm[k] - f2 * dpos[k]; - for(k=0;k<3;k++) s.last->vel[k] = vel_cm[k] - f2 * dvel[k]; + for(k=0;k<3;k++) ipart->pos[k] = pos_cm[k] + f1 * dpos[k]; + for(k=0;k<3;k++) ipart->vel[k] = vel_cm[k] + f1 * dvel[k]; + for(k=0;k<3;k++) jpart->pos[k] = pos_cm[k] - f2 * dpos[k]; + for(k=0;k<3;k++) jpart->vel[k] = vel_cm[k] - f2 * dvel[k]; } else { - for(k=0;k<3;k++) s.part->pos[k]+=s.part->vel[k]*dt; - for(k=0;k<3;k++) s.last->pos[k]+=s.last->vel[k]*dt; + for(k=0;k<3;k++) ipart->pos[k]+=ipart->vel[k]*dt; + for(k=0;k<3;k++) jpart->pos[k]+=jpart->vel[k]*dt; } + ipart->postime=etime; + jpart->postime=etime; + diag->cecount[clevel]++; } @@ -60,6 +66,7 @@ static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, CHECK_TIMESTEP(etime,stime,dt,clevel); if (s.n-s.nzero > 1) ENDRUN("kepler-n solver was called with too many massive particles sys.n=%u\n", s.n-s.nzero); + if (s.n-s.nzero < 1) ENDRUN("kepler-n solver was called with too few massive particles sys.n=%u\n", s.n-s.nzero); if(s.n==s.nzero) { drift(clevel,s,etime, dt); @@ -69,7 +76,7 @@ static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, err=0; #pragma omp parallel for if((ULONG) s.n>omp_get_num_threads() && !omp_in_parallel()) default(none) \ - private(ipart, dpos,dvel,dpos0,dvel0) shared(clevel, dt,cmpos, s, eps2) reduction(|: err) + private(ipart, dpos,dvel,dpos0,dvel0) shared(etime,clevel, dt,cmpos, s, eps2) reduction(|: err) for(UINT i=1;ipos[k] = cmpos[k] - dpos[k]; for(int k=0;k<3;k++) ipart->vel[k] = s.part->vel[k] - dvel[k]; + ipart->postime=etime; } if (err != 0) { ENDRUN("kepler solver failure"); // failure of the kepler solver should be very rare now } for(int k=0;k<3;k++) s.part->pos[k]=cmpos[k]; + s.part->postime=etime; diag->cecount[clevel]+=s.nzero; } @@ -127,13 +136,15 @@ static void evolve_kepler_test(int clevel,struct sys s, DOUBLE stime, DOUBLE eti evolve_kepler_2(clevel,s2,stime,etime,dt); p[1].mass=ipart->mass; *ipart=p[1]; + ipart->postime=etime; } for(int k=0;k<3;k++) central->pos[k]+=central->vel[k]*dt; + central->postime=etime; } void evolve_kepler(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { - if(s.n==2) // 2 body + if(s.n-s.nzero==2) // 2 body { evolve_kepler_2(clevel,s,stime,etime,dt); return; From 01bef2de25be1445d2e76f752d12c12d56b8c2b0 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 20 Jul 2021 15:26:15 +0200 Subject: [PATCH 06/35] various fixes, setting zerosys and consistency checks fix --- src/amuse/community/huayno/interface.c | 7 ++-- src/amuse/community/huayno/src/evolve.h | 4 +- src/amuse/community/huayno/src/evolve_cc.c | 47 ++++++++++++++-------- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/src/amuse/community/huayno/interface.c b/src/amuse/community/huayno/interface.c index 7c24bbec1e..64ef3c0378 100644 --- a/src/amuse/community/huayno/interface.c +++ b/src/amuse/community/huayno/interface.c @@ -31,10 +31,9 @@ int initialize_code() nmax=NMAX; err=LOOKUPSYMBOL(init_,)(&lookup, nmax*sizeof(struct particle)/sizeof(int)); if(err != 0) return err; - mainsys.n=0; + mainsys=zerosys; mainsys.part=(struct particle*) malloc(nmax*sizeof(struct particle)); if(mainsys.part == NULL) return -1; - mainsys.last=NULL; dt_param=.03; accel_zero_mass=1; eps2=0.; @@ -586,7 +585,7 @@ int get_indices_of_colliding_particles(int *p1,int*p2) int get_gravity_at_point(double * eps, double * x, double * y, double * z, double * ax, double * ay, double * az, int n) { - struct sys tmpsys; + struct sys tmpsys=zerosys; tmpsys.n=n; tmpsys.part=(struct particle*) malloc(n*sizeof(struct particle)); tmpsys.last=&tmpsys.part[n]; @@ -620,7 +619,7 @@ int get_potential_at_point(double * eps, double * x, double * y, double * z, double * phi, int n) { - struct sys tmpsys; + struct sys tmpsys=zerosys; tmpsys.n=n; tmpsys.part=(struct particle*) malloc(n*sizeof(struct particle)); tmpsys.last=&tmpsys.part[n]; diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index 0a13fb1e27..11028bea07 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -19,6 +19,8 @@ #define COMPENSATED_SUMMP //#define COMPENSATED_SUMMV +#define CONSISTENCY_CHECKS // perform (time-consuming, but thorough) sanity checks + struct particle { UINT id; @@ -54,7 +56,7 @@ struct sys struct sys *next_cc; // used in the CC split only }; -#define GETPART(s, i) (i #endif +#include + #include "evolve.h" #include "evolve_kepler.h" #include "evolve_bs.h" -//#define CC_DEBUG // perform (time-consuming, but thorough) CC sanity checks +// this is a bit weird, why not use NULL pointer? +#define IS_ZEROSYS(SYS) (((SYS)->n == 0) && \ + ((SYS)->nzero == 0) && \ + ((SYS)->part == NULL) && \ + ((SYS)->last == NULL) && \ + ((SYS)->next_cc == NULL) && \ + ((SYS)->zeropart == NULL) && \ + ((SYS)->lastzero == NULL) ) + +#define IS_ZEROSYSs(SYS) IS_ZEROSYS(&(SYS)) -#define IS_ZEROSYS(SYS) (((SYS)->n == 0) && ((SYS)->nzero == 0) && ((SYS)->part == NULL) && ((SYS)->last == NULL) && ((SYS)->next_cc == NULL)) -#define IS_ZEROSYSs(SYS) (((SYS).n == 0) && ((SYS).nzero == 0) && ((SYS).part == NULL) && ((SYS).last == NULL) && ((SYS).next_cc == NULL)) #define LOG_CC_SPLIT(C, R) \ { \ LOG("clevel = %d s.n = %d c.n = {", clevel, s.n); \ @@ -132,14 +141,14 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { * split_cc_verify: explicit verification if connected components c and rest system r form a correct * connected components decomposition of the system. */ - //LOG("split_cc_verify ping s.n=%d r->n=%d\n", s.n, r->n); + LOG("split_cc_verify ping s.n=%d r->n=%d\n", s.n, r->n); //LOG_CC_SPLIT(c, r); UINT pcount_check = 0; for (UINT i = 0; i < s.n; i++) { pcount_check = 0; UINT particle_found = 0; - struct particle *p = &( s.part[i] ); + struct particle *p = GETPART(s, i); for (struct sys *cj = c; !IS_ZEROSYS(cj); cj = cj->next_cc) { pcount_check += cj->n; @@ -147,7 +156,7 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { // search for p in connected components for (UINT k = 0; k < cj->n; k++) { - struct particle * pk = &( cj->part[k] ); + struct particle * pk = GETPART( *cj,k); // is pk equal to p if (p->id == pk->id) { @@ -155,7 +164,13 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { //LOG("split_cc_verify: found in a cc\n"); } } - if (& ( cj->part[cj->n - 1] ) != cj->last) + if (cj->n-cj->nzero>0 && ( &(cj->part[cj->n - cj->nzero - 1]) != cj->last )) + { + LOG("split_cc_verify: last pointer for c is not set correctly!"); + LOG_CC_SPLIT(c, r); + ENDRUN("data structure corrupted\n"); + } + if (cj->nzero>0 && ( &(cj->part[cj->n-1]) != cj->lastzero )) { LOG("split_cc_verify: last pointer for c is not set correctly!"); LOG_CC_SPLIT(c, r); @@ -166,7 +181,7 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { // search for p in rest for (UINT k = 0; k < r->n; k++) { - struct particle * pk = &( r->part[k] ); + struct particle * pk = GETPART( *r, k); // is pk equal to p if (p->id == pk->id) @@ -198,7 +213,7 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { } else { - //LOG("split_cc_verify pong\n"); + LOG("split_cc_verify pong\n"); } //ENDRUN("Fin.\n"); } @@ -221,7 +236,7 @@ void split_cc_verify_ts(int clevel,struct sys *c, struct sys *r, DOUBLE dt) } for (UINT j = 0; j < cj->n; j++) { - ts_ij = (DOUBLE) timestep_ij((*ci).part+i, (*cj).part+j, dir); + ts_ij = (DOUBLE) timestep_ij(GETPART( *ci, i), GETPART( *cj, j), dir); //LOG("comparing %d %d\n", ci->part[i].id, cj->part[j].id); //LOG("%f %f \n", ts_ij, dt); if (dt > ts_ij) @@ -240,7 +255,7 @@ void split_cc_verify_ts(int clevel,struct sys *c, struct sys *r, DOUBLE dt) { for (UINT j = 0; j < r->n; j++) { - ts_ij = (DOUBLE) timestep_ij( (*ci).part+ i, (*r).part+ j,dir); + ts_ij = (DOUBLE) timestep_ij( GETPART(*ci, i), GETPART(*r, j),dir); if (ts_ij < dt) { ENDRUN("split_cc_verify_ts C-R timestep underflow\n"); @@ -255,7 +270,7 @@ void split_cc_verify_ts(int clevel,struct sys *c, struct sys *r, DOUBLE dt) for (UINT j = 0; j < r->n; j++) { if (i == j) continue; - ts_ij = (DOUBLE) timestep_ij( (*r).part+ i, (*r).part+j,dir); + ts_ij = (DOUBLE) timestep_ij( GETPART(*r, i), GETPART( *r,j),dir); if (ts_ij < dt) { ENDRUN("split_cc_verify_ts R-R timestep underflow\n"); @@ -345,16 +360,16 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, } #endif -#ifdef CC2_SPLIT_CONSISTENCY_CHECKS +#ifdef CONSISTENCY_CHECKS if (clevel == 0) { printf("consistency_checks: ", s.n, clevel); } #endif -#ifdef CC2_SPLIT_CONSISTENCY_CHECKS +#ifdef CONSISTENCY_CHECKS // debug: make a copy of s to verify that the split has been done properly - struct sys s_before_split; + struct sys s_before_split=zerosys; s_before_split.n = s.n; s_before_split.part = (struct particle*) malloc(s.n*sizeof(struct particle)); s_before_split.last = &( s_before_split.part[s.n - 1] ); @@ -370,7 +385,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, split_cc(clevel,s, &c, &r, dt); //if (s.n != c.n) LOG_CC_SPLIT(&c, &r); // print out non-trivial splits -#ifdef CC2_SPLIT_CONSISTENCY_CHECKS +#ifdef CONSISTENCY_CHECKS /* if (s.n != r.n) { LOG("s: "); From 2a0c814c368724e26f7e7ce0ce9c1cad00899722 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 20 Jul 2021 19:58:54 +0200 Subject: [PATCH 07/35] huayno: replace some integer by pointers, making the role of the various vars clearer --- src/amuse/community/huayno/src/evolve_cc.c | 37 ++++++++++++---------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index a6795c0a68..516874c905 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -50,17 +50,18 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) dt=fabs(dt); diag->tstep[clevel]++; // not directly comparable to corresponding SF-split statistics struct sys *c_next; + if(s.n<=1) ENDRUN("This does not look right..."); if(s.nzero>0 && s.n!=s.nzero && s.zeropart!=s.last+1) ENDRUN("split_cc only works on contiguous systems"); c_next = c; *c_next = zerosys; UINT processed = 0; // increase if something is added from the stack to the cc - UINT comp_next = 0; // increase to move a particle from stack to cc; points to the first element of the stack + struct particle *comp_next = s.part; // increase to move a particle from stack to cc; points to the first element of the stack UINT comp_size = 0; // amount of particles added to the current cc - UINT stack_next = 1; // swap this with s[i] to increase the stack - UINT stack_size = 1; // first element of the stack is s[comp_next] - // last element of the stack is s[comp_next + stack_size - 1] - UINT rest_next = s.n - 1; // swap this to add to the rest-system + struct particle *stack_next = comp_next+1; // swap this with s[i] to increase the stack + UINT stack_size = 1; // first element of the stack is comp_next + // last element of the stack is comp_next + stack_size - 1 == stack_next-1 + struct particle *rest_next = GETPART(s, s.n-1); // swap this to add to the rest-system // find connected components while (processed < s.n) { @@ -69,15 +70,15 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) while (stack_size > 0) { // iterate over all unvisited elements - for (UINT i = stack_next; i <= rest_next; i++) + for (struct particle *i = stack_next; i <= rest_next; i++) { // if element is connected to the first element of the stack - DOUBLE timestep = (DOUBLE) timestep_ij(s.part+comp_next, s.part+i,dir); + DOUBLE timestep = (DOUBLE) timestep_ij(comp_next, i,dir); diag->tcount[clevel]++; if ( timestep <= dt) { // add i to the end of the stack by swapping stack_next and i - SWAP( s.part[ stack_next ], s.part[i], struct particle ); + SWAP( *stack_next , *i, struct particle ); stack_next++; stack_size++; } @@ -95,25 +96,27 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) // create new component c from u[0] to u[cc_visited - 1] // remove components from u (u.n, u.part) c_next->n = comp_size; - c_next->part = &( s.part[ comp_next - comp_size ]); - c_next->last = &( s.part[ comp_next - 1 ]); + c_next->part = comp_next - comp_size; + c_next->last = comp_next-1; c_next->next_cc = (struct sys*) malloc( sizeof(struct sys) ); c_next = c_next->next_cc; *c_next = zerosys; - comp_next = stack_next; + if(stack_next!=comp_next) ENDRUN("consistency error in split_cc") + // comp_next = stack_next; // should be unchanged comp_size = 0; - stack_next = stack_next + 1; + stack_next= comp_next+1; stack_size = 1; // new component is trivial: add to rest } else { //LOG("found trivial component; adding to rest\n"); - SWAP(s.part[ comp_next - 1 ], s.part[ rest_next ], struct particle ); + SWAP( *(comp_next - 1), *rest_next, struct particle ); rest_next--; comp_next = comp_next - 1; comp_size = 0; - stack_next = comp_next + 1; + if(stack_next!=comp_next+1) ENDRUN("consistency error in split_cc") + //stack_next = comp_next + 1; // should be unchanged stack_size = 1; } } @@ -122,11 +125,11 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) ENDRUN("split_cc particle count mismatch: processed=%u s.n=%u r->n=%u\n", processed, s.n, r->n); } // create the rest system - r->n = (s.n - 1) - rest_next; + r->n = GETPART(s, s.n-1) - rest_next; if (r->n > 0) { - r->part = &( s.part[rest_next + 1] ); - r->last = s.part+s.n-1; + r->part = rest_next+1; + r->last = GETPART(s, s.n-1); } else { From 89f195805efe30aab26e4377a2406072eb84f922 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Thu, 22 Jul 2021 22:35:05 +0200 Subject: [PATCH 08/35] fix huayno split_cc to maintain separation of mass and massless part --- src/amuse/community/huayno/src/evolve.c | 16 +- src/amuse/community/huayno/src/evolve.h | 3 + src/amuse/community/huayno/src/evolve_cc.c | 301 ++++++++++++++++-- .../community/huayno/src/evolve_kepler.c | 2 +- 4 files changed, 285 insertions(+), 37 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index 1f4991af3d..a3a5679054 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -19,6 +19,8 @@ int verbosity=0; +struct sys debugsys; + FLOAT eps2; FLOAT dt_param; struct sys zerosys ={0,0,NULL,NULL,NULL,NULL,NULL}; @@ -187,7 +189,7 @@ void sum_diagnostics(struct diagnostics* total,struct diagnostics* diag) total->cl_count+=diag->cl_count; #endif #ifdef _OPENMP - printf("%d: %d %li %li %li\n",omp_get_thread_num(),tasksum,diag->taskdrift, + printf("task %d: %d %li %li %li\n",omp_get_thread_num(),tasksum,diag->taskdrift, diag->taskkick,taskcountsum); #endif } @@ -202,6 +204,7 @@ void do_evolve(struct sys s, double dt, int inttype) clevel=0; if(accel_zero_mass) split_zeromass(&s); zero_diagnostics(diag); + debugsys=s; switch (inttype) { case CONSTANT: @@ -278,6 +281,8 @@ void do_evolve(struct sys s, double dt, int inttype) { diag=(struct diagnostics *) malloc(sizeof( struct diagnostics)); zero_diagnostics(diag); +#pragma omp master + printf("Total Threads # %d\n", omp_get_num_threads()); #pragma omp single #endif evolve_cc2(clevel,s,(DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt, inttype, 1); @@ -733,4 +738,13 @@ void split_zeromass(struct sys *s) if((left-s->part)+s->nzero !=s->n) ENDRUN( "split_zeromass error 2"); for(i=0;i<(s->n-s->nzero);i++) if(s->part[i].mass==0) ENDRUN ("split_zromass error 3"); for(i=s->n-s->nzero;in;i++) if(s->part[i].mass!=0) ENDRUN ("split_zeromass error 4"); +#ifdef CONSISTENCY_CHECKS + verify_split_zeromass(*s); +#endif +} + +void verify_split_zeromass(struct sys s) +{ + for(UINT i=0;imass==0) ENDRUN("massless particle in main part\n") + for(UINT i=s.n-s.nzero;imass!=0) ENDRUN("massive particle in massless part\n") } diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index 11028bea07..ecd3e7e50b 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -56,6 +56,8 @@ struct sys struct sys *next_cc; // used in the CC split only }; +extern struct sys debugsys; + #define GETPART(s, i) ((i)<(s).n-(s).nzero ? (s).part+(i) : (s).zeropart+((i)-((s).n-(s).nzero))) enum intopt @@ -157,6 +159,7 @@ void potential(struct sys s1, struct sys s2); struct sys join(struct sys s1,struct sys s2); void split_zeromass(struct sys *s); +void verify_split_zeromass(struct sys s); #define SWAP(a,b,c) {c t;t=(a);(a)=(b);(b)=t;} diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index 516874c905..51b7deb73b 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -36,11 +36,26 @@ printf("} r.n = %d\n", (R)->n); \ }; +#define PRINTSYS(s) \ +{ \ + LOG("sys %d %d : {",s.n,s.nzero); \ + for (UINT i=0;iid); printf(" | "); \ + for (UINT i=s.n-s.nzero;iid); printf(" }\n"); \ +}; + +#define PRINTOFFSETS(s) \ +{ \ + LOG("sysoffsets %d %d : {",s.n,s.nzero); \ + for (UINT i=0;in; i++) { printf("%u ", (SYS)->part[i].id); } printf("\n"); #define LOGSYSC_ID(SYS) for (struct sys *_ci = &(SYS); !IS_ZEROSYS(_ci); _ci = _ci->next_cc) {printf("{"); for (UINT i = 0; i < _ci->n; i++) {printf("%u ", _ci->part[i].id); } printf("}\t");} printf("\n"); -void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) { +void split_cc_old(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) { /* * split_cc: run a connected component search on sys s with threshold dt, * creates a singly-linked list of connected components c and a rest system r @@ -139,6 +154,198 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) //LOG("split_cc: rest system size: %d\n", r->n); } +void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) { + /* + * split_cc: run a connected component search on sys s with threshold dt, + * creates a singly-linked list of connected components c and a rest system r + * c or r is set to zerosys if no connected components/rest is found + */ + int dir=SIGN(dt); + dt=fabs(dt); + diag->tstep[clevel]++; // not directly comparable to corresponding SF-split statistics + struct sys *c_next; + if(s.n<=1) ENDRUN("This does not look right..."); + c_next = c; + UINT processed = 0; // increase if something is added from the stack to the cc + struct particle **active, *comp_next, *compzero_next; // current active particle, and next in the mass and massless part + UINT comp_size, compzero_size; + struct particle *stack_next=NULL, *stackzero_next=NULL; // two pointers keeping track of stack in massive and massless part + UINT stack_size, stackzero_size; // counter for total size of stack and zero + struct particle *rest_next=NULL, *restzero_next=NULL; // swap this to add to the rest-system + // find connected components + + if(s.n-s.nzero>0) stack_next=s.part; + if(s.n-s.nzero>0) rest_next=s.last; + if(s.nzero>0) stackzero_next=s.zeropart; + if(s.nzero>0) restzero_next=s.lastzero; + comp_next=stack_next; + compzero_next=stackzero_next; + *c_next=zerosys; // initialize c_next + + while (processed < s.n) + { + if(stack_next!=comp_next) ENDRUN("consistency error in split_cc\n") + if(stackzero_next!=compzero_next) ENDRUN("consistency error in split_cc\n") + //~ if(stack_next==rest_next && stackzero_next==restzero_next) ENDRUN("impossible") + + // startup stack + + comp_size=0; + compzero_size=0; + if(stack_next!=NULL && stack_next 0) + { + //~ LOG("stack_size %d\n", stack_size); + active=NULL; + if(stack_next!=NULL && + stack_next-comp_next>0) {active=&comp_next;} + else + if(stackzero_next!=NULL && + stackzero_next-compzero_next>0) {active=&compzero_next;} + + if(active==NULL) ENDRUN("no active, while stack still >0\n"); + + // iterate over all unvisited elements + if(stack_next!=NULL) + { + //~ LOG("check massive %d\n", rest_next-stack_next+1); + for (struct particle *i = stack_next; i <= rest_next; i++) + { + diag->tcount[clevel]++; + // if element is connected to the first element of the stack + if ( ((DOUBLE) timestep_ij(*active, i,dir)) <= dt) + { + // add i to the end of the stack by swapping stack_next and i + //~ LOG("stack_next add %d\n", i->id); + //~ LOG("stack offsets: %d, %d\n", stack_next-s.part, i-s.part); + SWAP( *stack_next , *i, struct particle ); + stack_next++; + stack_size++; + } + } + } + // iterate over all unvisited elements + if(stackzero_next!=NULL) + { + //~ LOG("check zero %d\n", restzero_next-stackzero_next+1); + for (struct particle *i = stackzero_next; i <= restzero_next; i++) + { + diag->tcount[clevel]++; + // if element is connected to the first element of the stack + if ( ((DOUBLE) timestep_ij(*active, i,dir)) <= dt) + { + // add i to the end of the stack by swapping stack_next and i + //~ LOG("stackzero_next add %d\n", i->id); + //~ LOG("stack offsets: %d, %d\n", stackzero_next-s.part, i-s.part); + SWAP( *stackzero_next , *i, struct particle ); + stackzero_next++; + stack_size++; + } + } + } + // pop the stack + (*active)++; + if(active==&compzero_next) compzero_size++; + comp_size++; + stack_size--; + //~ LOG("popped %d, %d, %d\n", stack_size, comp_size, compzero_size); + + } + processed += comp_size; + //~ LOG("comp finish %d, %d\n", comp_size, compzero_size); + // new component is non-trivial: create a new sys + if (comp_size > 1) + { + //~ LOG("split_cc: found component with size: %d %d\n", comp_size, compzero_size); + //~ LOG("%d %d \n", comp_next-stack_next, compzero_next-stackzero_next); + c_next->n = comp_size; + c_next->nzero = compzero_size; + if(comp_size-compzero_size>0) + { + c_next->part = comp_next - (comp_size-compzero_size); + c_next->last = comp_next-1; + } + if(compzero_size>0) + { + c_next->zeropart = compzero_next - compzero_size; + c_next->lastzero = compzero_next-1; + } + if(c_next->part==NULL) c_next->part=c_next->zeropart; + //~ PRINTSYS((*c_next)); + //~ PRINTOFFSETS((*c_next)); + + c_next->next_cc = (struct sys*) malloc( sizeof(struct sys) ); + c_next = c_next->next_cc; + *c_next=zerosys; + } + else // new component is trivial: add to rest, reset pointers + { + //~ LOG("split_cc: add to rest\n"); + //~ LOG("%d %d \n", comp_next-stack_next, compzero_next-stackzero_next); + + if(active==&comp_next) + { + comp_next--; + //~ LOG("r1 check offsets: %d, %d\n", comp_next-s.part, rest_next-s.part); + + SWAP( *comp_next, *rest_next, struct particle ); + rest_next--; + stack_next--; + } else + { + compzero_next--; + //~ LOG("r2 check offsets: %d, %d\n", compzero_next-s.part, restzero_next-s.part); + SWAP( *compzero_next, *restzero_next, struct particle ); + restzero_next--; + stackzero_next--; + } + } + } + if(stack_next!=NULL && stack_next!=rest_next+1) ENDRUN("unexpected") + if(stackzero_next!=NULL && stackzero_next!=restzero_next+1) ENDRUN("unexpected") + + // create the rest system + *r=zerosys; + if(rest_next!=NULL) r->n = s.last - rest_next; + if(restzero_next!=NULL) r->nzero = s.lastzero - restzero_next; + r->n+=r->nzero; + if (r->n-r->nzero > 0) + { + r->part = rest_next+1; + r->last = s.last; + } + if (r->nzero > 0) + { + r->zeropart = restzero_next+1; + r->lastzero = s.lastzero; + } + if(r->part==NULL) r->part=r->zeropart; + + if (processed != s.n) + { + ENDRUN("split_cc particle count mismatch: processed=%u s.n=%u r->n=%u\n", processed, s.n, r->n); + } + + //~ LOG("exit with %d %d\n", s.n, s.nzero); + +} + + + void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { /* * split_cc_verify: explicit verification if connected components c and rest system r form a correct @@ -147,6 +354,7 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { LOG("split_cc_verify ping s.n=%d r->n=%d\n", s.n, r->n); //LOG_CC_SPLIT(c, r); UINT pcount_check = 0; + for (UINT i = 0; i < s.n; i++) { pcount_check = 0; @@ -154,8 +362,9 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { struct particle *p = GETPART(s, i); for (struct sys *cj = c; !IS_ZEROSYS(cj); cj = cj->next_cc) { + verify_split_zeromass(*cj); pcount_check += cj->n; - //LOG("%d\n", pcount_check); + //~ //LOG("%d\n", pcount_check); // search for p in connected components for (UINT k = 0; k < cj->n; k++) { @@ -164,23 +373,25 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { if (p->id == pk->id) { particle_found += 1; - //LOG("split_cc_verify: found in a cc\n"); + //~ LOG("split_cc_verify: found %d in a cc\n",i); } } - if (cj->n-cj->nzero>0 && ( &(cj->part[cj->n - cj->nzero - 1]) != cj->last )) + if (cj->n-cj->nzero>0 && ( GETPART( *cj, cj->n - cj->nzero - 1) != cj->last )) { - LOG("split_cc_verify: last pointer for c is not set correctly!"); + LOG("split_cc_verify: last pointer for c is not set correctly!\n"); LOG_CC_SPLIT(c, r); ENDRUN("data structure corrupted\n"); } - if (cj->nzero>0 && ( &(cj->part[cj->n-1]) != cj->lastzero )) + if (cj->nzero>0 && ( GETPART( *cj, cj->n-1) != cj->lastzero )) { - LOG("split_cc_verify: last pointer for c is not set correctly!"); + LOG("split_cc_verify: last pointer for c is not set correctly!\n"); LOG_CC_SPLIT(c, r); ENDRUN("data structure corrupted\n"); } } + verify_split_zeromass(*r); + // search for p in rest for (UINT k = 0; k < r->n; k++) { @@ -190,12 +401,13 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { if (p->id == pk->id) { particle_found += 1; + //~ LOG("found at r\n") } } if (particle_found != 1) { - LOG("split_cc_verify: particle %d particle_found=%d ", i, particle_found); + LOG("split_cc_verify: particle %d (%d) particle_found=%d\n", i, p->id, particle_found); LOG_CC_SPLIT(c, r); ENDRUN("data structure corrupted\n"); } @@ -301,7 +513,7 @@ DOUBLE sys_forces_max_timestep(struct sys s,int dir) { for (UINT j = i+1; j < s.n; j++) { - ts_ij = (DOUBLE) timestep_ij(s.part+ i, s.part+j,dir); // check symm. + ts_ij = (DOUBLE) timestep_ij(GETPART(s,i), GETPART(s,j) ,dir); // check symm. if (ts_ij >= ts) { ts = ts_ij; }; } } @@ -317,7 +529,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, struct sys c = zerosys, r = zerosys; CHECK_TIMESTEP(etime,stime,dt,clevel); - if(accel_zero_mass) split_zeromass(&s); + //~ if(accel_zero_mass) split_zeromass(&s); if ((s.n == 2 || s.n-s.nzero<=1 )&& (inttype==CCC_KEPLER || inttype==CC_KEPLER)) //~ if (s.n == 2 && (inttype==CCC_KEPLER || inttype==CC_KEPLER)) @@ -374,12 +586,16 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, // debug: make a copy of s to verify that the split has been done properly struct sys s_before_split=zerosys; s_before_split.n = s.n; + s_before_split.nzero = s.nzero; s_before_split.part = (struct particle*) malloc(s.n*sizeof(struct particle)); - s_before_split.last = &( s_before_split.part[s.n - 1] ); + if(s_before_split.n-s_before_split.nzero>0) s_before_split.last = s_before_split.part+(s_before_split.n-s_before_split.nzero)-1; + if(s_before_split.nzero>0) s_before_split.zeropart = s_before_split.last+1; + if(s_before_split.nzero>0) s_before_split.lastzero = s_before_split.part+s_before_split.n-1; s_before_split.next_cc = NULL; - memcpy(s_before_split.part, s.part, s.n*sizeof(struct particle)); + for(UINT i=0; in; + lsys.nzero=ci->nzero; struct particle* lpart=(struct particle*) malloc(lsys.n*sizeof(struct particle)); - lsys.part=lpart;lsys.last=lpart+lsys.n-1; - for(UINT i=0;ipart[i]; + lsys.part=lpart; + if(lsys.n-lsys.nzero>0) lsys.last=lpart+lsys.n-lsys.nzero-1; + if(lsys.nzero>0) lsys.zeropart=lsys.last+1; + if(lsys.nzero>0) lsys.lastzero=lpart+lsys.n-1; + + for(UINT i=0;ipart[i]=lpart[i]; + + for(UINT i=0;i0 && accel_zero_mass) split_zeromass(&r); + //~ if(r.n>0 && accel_zero_mass) split_zeromass(&r); // kick c <-> rest (eq 24) if(r.n>0) for (struct sys *ci = &c; !IS_ZEROSYS(ci); ci = ci->next_cc) { @@ -476,26 +700,34 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, for (struct sys *ci = &c; !IS_ZEROSYS(ci); ci = ci->next_cc) { #ifdef _OPENMP - if (TASKCONDITION) - { - diag->ntasks[clevel]++; - diag->taskcount[clevel]+=ci->n; -#pragma omp task firstprivate(clevel,ci,stime,etime,dt,recentersub) untied + if (TASKCONDITION) { - struct sys lsys=zerosys; - lsys.n=ci->n; - struct particle* lpart=(struct particle*) malloc(lsys.n*sizeof(struct particle)); - lsys.part=lpart;lsys.last=lpart+lsys.n-1; - for(UINT i=0;ipart[i]; - evolve_cc2(clevel+1,lsys, stime+dt/2, etime, dt/2,inttype,recentersub); - for(UINT i=0;ipart[i]=lpart[i]; - free(lpart); - } - } else + diag->ntasks[clevel]++; + diag->taskcount[clevel]+=ci->n; +#pragma omp task firstprivate(clevel,ci,stime,etime,dt,recentersub) untied + { + struct sys lsys=zerosys; + lsys.n=ci->n; + lsys.nzero=ci->nzero; + struct particle* lpart=(struct particle*) malloc(lsys.n*sizeof(struct particle)); + lsys.part=lpart; + if(lsys.n-lsys.nzero>0) lsys.last=lpart+lsys.n-lsys.nzero-1; + if(lsys.nzero>0) lsys.zeropart=lsys.last+1; + if(lsys.nzero>0) lsys.lastzero=lpart+lsys.n-1; + + for(UINT i=0;i Date: Fri, 23 Jul 2021 15:55:20 +0200 Subject: [PATCH 09/35] fixes for bs, some comments and diag reporting fixes, also do kepler in case of CC_BS etc --- src/amuse/community/huayno/src/evolve.c | 3 +- src/amuse/community/huayno/src/evolve.h | 10 +++---- src/amuse/community/huayno/src/evolve_bs.c | 34 +++++++++++++++------- src/amuse/community/huayno/src/evolve_cc.c | 11 +++---- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index a3a5679054..caac4564bb 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -594,7 +594,7 @@ static void report(struct sys s,DOUBLE etime, int inttype) } printf(" total, total j, mean j: %18li %18li %f\n",totalbs,totalj,totalj/(1.*totalbs)); } - if(inttype==KEPLER || inttype==CC_KEPLER || inttype==CCC_KEPLER) + if(inttype==KEPLER || inttype==CC_KEPLER || inttype==CCC_KEPLER || inttype==CCC_BS || inttype==CC_BS || inttype==CCC_BSA || inttype==CC_BSA) { unsigned long totalcefail=0,totalcecount=0; printf("kepler solver counts:\n"); @@ -745,6 +745,7 @@ void split_zeromass(struct sys *s) void verify_split_zeromass(struct sys s) { + if(!accel_zero_mass) return; for(UINT i=0;imass==0) ENDRUN("massless particle in main part\n") for(UINT i=s.n-s.nzero;imass!=0) ENDRUN("massive particle in massless part\n") } diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index ecd3e7e50b..0037187953 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -49,14 +49,14 @@ struct jparticle struct sys { UINT n, nzero; // n=total particles, nzero=# zero mass particles - struct particle *part; // start of particles (non-zero mass) - struct particle *zeropart; // start of zero mass particles - struct particle *last; // last of non-zero mass; not needed, serves mainly to check consistency - struct particle *lastzero; // last of zero mass; not needed, serves mainly to check consistency + struct particle *part; // start of particles, NULL iff n==0 + struct particle *zeropart; // start of zero mass particles nzero>0, otherwise NULL + struct particle *last; // last of non-zero mass; not needed, serves mainly to check consistency, NULL if n-nzero==0 + struct particle *lastzero; // last of zero mass; not needed, serves mainly to check consistency, NULL if nzero==0 struct sys *next_cc; // used in the CC split only }; -extern struct sys debugsys; +extern struct sys debugsys; // for monitoring purposes #define GETPART(s, i) ((i)<(s).n-(s).nzero ? (s).part+(i) : (s).zeropart+((i)-((s).n-(s).nzero))) diff --git a/src/amuse/community/huayno/src/evolve_bs.c b/src/amuse/community/huayno/src/evolve_bs.c index 2dafe28c15..c39a5fba4c 100644 --- a/src/amuse/community/huayno/src/evolve_bs.c +++ b/src/amuse/community/huayno/src/evolve_bs.c @@ -145,10 +145,13 @@ static int BulirschStoer(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DO DOUBLE error; struct particle *ipart; - tmpsys.n=s.n; // todo: fix zero mass part, though it will work as is + tmpsys.n=s.n; + tmpsys.nzero=s.nzero; tmpsys.part=(struct particle*) malloc(s.n*sizeof(struct particle)); if(!tmpsys.part) ENDRUN("failed allocation of tmpsys\n"); - tmpsys.last=tmpsys.part+tmpsys.n-1; + if(tmpsys.n-tmpsys.nzero>0) tmpsys.last=tmpsys.part+(tmpsys.n-tmpsys.nzero)-1; + if(tmpsys.nzero>0) tmpsys.zeropart=tmpsys.part+tmpsys.n-tmpsys.nzero; + if(tmpsys.nzero>0) tmpsys.lastzero=tmpsys.part+tmpsys.n-1; ZEROBSYS_ARRAY(bsys_array) ZEROBSYS_ARRAY(bsys_array1) @@ -170,7 +173,7 @@ static int BulirschStoer(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DO } SWAP(jline,j1line, struct bsys*) - for(i=0;ipos_e[0]=0.; ipart->pos_e[1]=0.; @@ -285,22 +288,31 @@ static void n_cc_kepler(int clevel,int n,struct sys s, DOUBLE stime, DOUBLE etim { UINT id; struct sys tmpsys=zerosys; + struct particle *ipart, *tpart; FLOAT odt_param=dt_param; dt_param=dt_param/n; tmpsys.n=s.n; + tmpsys.nzero=s.nzero; tmpsys.part=(struct particle*) malloc(s.n*sizeof(struct particle)); - tmpsys.last=tmpsys.part+s.n-1; - for(UINT i=0;i0) tmpsys.last=tmpsys.part+(tmpsys.n-tmpsys.nzero)-1; + if(tmpsys.nzero>0) tmpsys.zeropart=tmpsys.part+tmpsys.n-tmpsys.nzero; + if(tmpsys.nzero>0) tmpsys.lastzero=tmpsys.part+tmpsys.n-1; + + for(UINT i=0;iid=i; } evolve_cc2(clevel,tmpsys, stime, etime, dt,CCC_KEPLER,1); for(UINT i=0;iid); + id=ipart->id; + *ipart=*tpart; + ipart->id=id; } dt_param=odt_param; free(tmpsys.part); diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index 51b7deb73b..80ab99c3ca 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -351,7 +351,7 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { * split_cc_verify: explicit verification if connected components c and rest system r form a correct * connected components decomposition of the system. */ - LOG("split_cc_verify ping s.n=%d r->n=%d\n", s.n, r->n); + //~ LOG("split_cc_verify ping s.n=%d r->n=%d\n", s.n, r->n); //LOG_CC_SPLIT(c, r); UINT pcount_check = 0; @@ -428,7 +428,7 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { } else { - LOG("split_cc_verify pong\n"); + //~ LOG("split_cc_verify pong\n"); } //ENDRUN("Fin.\n"); } @@ -531,7 +531,8 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, //~ if(accel_zero_mass) split_zeromass(&s); - if ((s.n == 2 || s.n-s.nzero<=1 )&& (inttype==CCC_KEPLER || inttype==CC_KEPLER)) + if ((s.n == 2 || s.n-s.nzero<=1 )&& + (inttype==CCC_KEPLER || inttype==CC_KEPLER || inttype==CCC_BS || inttype==CC_BS || inttype==CCC_BSA || inttype==CC_BSA)) //~ if (s.n == 2 && (inttype==CCC_KEPLER || inttype==CC_KEPLER)) { evolve_kepler(clevel,s, stime, etime, dt); @@ -578,7 +579,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, #ifdef CONSISTENCY_CHECKS if (clevel == 0) { - printf("consistency_checks: ", s.n, clevel); + printf("consistency_checks: %d %d \n", s.n, clevel); } #endif @@ -620,7 +621,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, split_cc_verify_ts(clevel,&c, &r, dt); free(s_before_split.part); if (clevel == 0) { - printf("ok "); + printf("ok \n"); } #endif From 78a903938ec67d19607fd578e1f8506fc568485d Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 27 Jul 2021 16:36:08 +0200 Subject: [PATCH 10/35] factor out last and lastzero pointers (not needed) --- src/amuse/community/huayno/interface.c | 11 +--- src/amuse/community/huayno/src/evolve.c | 37 ++++++----- src/amuse/community/huayno/src/evolve.h | 8 +-- src/amuse/community/huayno/src/evolve_bs.c | 4 -- src/amuse/community/huayno/src/evolve_cc.c | 61 ++++++------------- .../community/huayno/src/evolve_kepler.c | 3 +- src/amuse/community/huayno/src/evolve_ok.c | 16 ++--- src/amuse/community/huayno/src/evolve_ok.h | 1 - src/amuse/community/huayno/src/evolve_sf.c | 8 +-- .../test/suite/codes_tests/test_huayno.py | 19 +++--- 10 files changed, 65 insertions(+), 103 deletions(-) diff --git a/src/amuse/community/huayno/interface.c b/src/amuse/community/huayno/interface.c index 64ef3c0378..f8b6c379a3 100644 --- a/src/amuse/community/huayno/interface.c +++ b/src/amuse/community/huayno/interface.c @@ -56,7 +56,6 @@ int cleanup_code() free(mainsys.part); mainsys.part=NULL; } - mainsys.last=NULL; dt_param=.03; accel_zero_mass=1; t_now=0.; @@ -105,7 +104,6 @@ int new_particle(int *id, double mass, mainsys.part[p].postime=0; mainsys.n++; pcounter++; - mainsys.last=&mainsys.part[p]; return 0; } @@ -117,12 +115,7 @@ int delete_particle(int id) if(err!=0) return err; LOOKUPSYMBOL(,_delete)(&lookup,id); mainsys.n--; - if(mainsys.n==0) - { - mainsys.last=NULL; - return 0; - } - mainsys.last--; + if(mainsys.n==0) return 0; mainsys.part[p]=mainsys.part[mainsys.n]; LOOKUPSYMBOL(,_update)(&lookup,mainsys.part[p].id,p); return 0; @@ -588,7 +581,6 @@ int get_gravity_at_point(double * eps, double * x, double * y, double * z, struct sys tmpsys=zerosys; tmpsys.n=n; tmpsys.part=(struct particle*) malloc(n*sizeof(struct particle)); - tmpsys.last=&tmpsys.part[n]; for(int p=0;p0 && s.last-s.part + 1 != s.n-s.nzero) ENDRUN("join error 1"); - if(s.nzero>0 && s.lastzero-s.zeropart + 1 != s.nzero) ENDRUN("join error 2"); + if(s.n-s.nzero>0 && LAST(s)-s.part + 1 != s.n-s.nzero) ENDRUN("join error 1"); + if(s.nzero>0 && LASTZERO(s)-s.zeropart + 1 != s.nzero) ENDRUN("join error 2"); return s; } @@ -708,10 +708,11 @@ void split_zeromass(struct sys *s) if(s->n==0) return; if(s->n-s->nzero==0) { - if(s->lastzero-s->zeropart+1!=s->nzero) ENDRUN( "split_zeromass malformed input sys"); + if(s->zeropart==NULL) ENDRUN("split_zeromass malformed input"); + if(LASTZERO(*s)-s->zeropart+1!=s->nzero) ENDRUN( "split_zeromass malformed input sys"); return; } - if(s->nzero!=0 && s->last+1!=s->zeropart) + if(s->nzero!=0 && LAST(*s)+1!=s->zeropart) ENDRUN("split_zeromass can only work on fully contiguous sys"); left=s->part; right=s->part+(s->n-1); @@ -731,9 +732,7 @@ void split_zeromass(struct sys *s) if(s->nzero<0) ENDRUN("split_zeromass find negative number of part"); if(s->nzero>0) { - s->last=left-1; s->zeropart=left; - s->lastzero=left+(s->nzero-1); } if((left-s->part)+s->nzero !=s->n) ENDRUN( "split_zeromass error 2"); for(i=0;i<(s->n-s->nzero);i++) if(s->part[i].mass==0) ENDRUN ("split_zromass error 3"); diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index 0037187953..f107c4b61e 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -51,14 +51,14 @@ struct sys UINT n, nzero; // n=total particles, nzero=# zero mass particles struct particle *part; // start of particles, NULL iff n==0 struct particle *zeropart; // start of zero mass particles nzero>0, otherwise NULL - struct particle *last; // last of non-zero mass; not needed, serves mainly to check consistency, NULL if n-nzero==0 - struct particle *lastzero; // last of zero mass; not needed, serves mainly to check consistency, NULL if nzero==0 struct sys *next_cc; // used in the CC split only }; -extern struct sys debugsys; // for monitoring purposes - #define GETPART(s, i) ((i)<(s).n-(s).nzero ? (s).part+(i) : (s).zeropart+((i)-((s).n-(s).nzero))) +#define LAST(s) ((s).part==NULL || (s).n-(s).nzero == 0 ? NULL : (s).part+(s).n-1) +#define LASTZERO(s) ((s).zeropart==NULL ? NULL : (s).zeropart+(s).nzero-1) + +extern struct sys debugsys; // for monitoring purposes enum intopt { diff --git a/src/amuse/community/huayno/src/evolve_bs.c b/src/amuse/community/huayno/src/evolve_bs.c index c39a5fba4c..4d20d660f6 100644 --- a/src/amuse/community/huayno/src/evolve_bs.c +++ b/src/amuse/community/huayno/src/evolve_bs.c @@ -149,9 +149,7 @@ static int BulirschStoer(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DO tmpsys.nzero=s.nzero; tmpsys.part=(struct particle*) malloc(s.n*sizeof(struct particle)); if(!tmpsys.part) ENDRUN("failed allocation of tmpsys\n"); - if(tmpsys.n-tmpsys.nzero>0) tmpsys.last=tmpsys.part+(tmpsys.n-tmpsys.nzero)-1; if(tmpsys.nzero>0) tmpsys.zeropart=tmpsys.part+tmpsys.n-tmpsys.nzero; - if(tmpsys.nzero>0) tmpsys.lastzero=tmpsys.part+tmpsys.n-1; ZEROBSYS_ARRAY(bsys_array) ZEROBSYS_ARRAY(bsys_array1) @@ -294,9 +292,7 @@ static void n_cc_kepler(int clevel,int n,struct sys s, DOUBLE stime, DOUBLE etim tmpsys.n=s.n; tmpsys.nzero=s.nzero; tmpsys.part=(struct particle*) malloc(s.n*sizeof(struct particle)); - if(tmpsys.n-tmpsys.nzero>0) tmpsys.last=tmpsys.part+(tmpsys.n-tmpsys.nzero)-1; if(tmpsys.nzero>0) tmpsys.zeropart=tmpsys.part+tmpsys.n-tmpsys.nzero; - if(tmpsys.nzero>0) tmpsys.lastzero=tmpsys.part+tmpsys.n-1; for(UINT i=0;in == 0) && \ ((SYS)->nzero == 0) && \ ((SYS)->part == NULL) && \ - ((SYS)->last == NULL) && \ - ((SYS)->next_cc == NULL) && \ ((SYS)->zeropart == NULL) && \ - ((SYS)->lastzero == NULL) ) + ((SYS)->next_cc == NULL) ) #define IS_ZEROSYSs(SYS) IS_ZEROSYS(&(SYS)) @@ -66,7 +64,7 @@ void split_cc_old(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE diag->tstep[clevel]++; // not directly comparable to corresponding SF-split statistics struct sys *c_next; if(s.n<=1) ENDRUN("This does not look right..."); - if(s.nzero>0 && s.n!=s.nzero && s.zeropart!=s.last+1) + if(s.nzero>0 && s.n!=s.nzero && s.zeropart!=LAST(s)+1) ENDRUN("split_cc only works on contiguous systems"); c_next = c; *c_next = zerosys; @@ -112,7 +110,6 @@ void split_cc_old(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE // remove components from u (u.n, u.part) c_next->n = comp_size; c_next->part = comp_next - comp_size; - c_next->last = comp_next-1; c_next->next_cc = (struct sys*) malloc( sizeof(struct sys) ); c_next = c_next->next_cc; *c_next = zerosys; @@ -144,12 +141,10 @@ void split_cc_old(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE if (r->n > 0) { r->part = rest_next+1; - r->last = GETPART(s, s.n-1); } else { r->part = NULL; - r->last = NULL; } //LOG("split_cc: rest system size: %d\n", r->n); } @@ -175,9 +170,9 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) // find connected components if(s.n-s.nzero>0) stack_next=s.part; - if(s.n-s.nzero>0) rest_next=s.last; + if(s.n-s.nzero>0) rest_next=LAST(s); if(s.nzero>0) stackzero_next=s.zeropart; - if(s.nzero>0) restzero_next=s.lastzero; + if(s.nzero>0) restzero_next=LASTZERO(s); comp_next=stack_next; compzero_next=stackzero_next; *c_next=zerosys; // initialize c_next @@ -277,12 +272,10 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) if(comp_size-compzero_size>0) { c_next->part = comp_next - (comp_size-compzero_size); - c_next->last = comp_next-1; } if(compzero_size>0) { c_next->zeropart = compzero_next - compzero_size; - c_next->lastzero = compzero_next-1; } if(c_next->part==NULL) c_next->part=c_next->zeropart; //~ PRINTSYS((*c_next)); @@ -320,18 +313,16 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) // create the rest system *r=zerosys; - if(rest_next!=NULL) r->n = s.last - rest_next; - if(restzero_next!=NULL) r->nzero = s.lastzero - restzero_next; + if(rest_next!=NULL) r->n = LAST(s) - rest_next; + if(restzero_next!=NULL) r->nzero = LASTZERO(s) - restzero_next; r->n+=r->nzero; if (r->n-r->nzero > 0) { r->part = rest_next+1; - r->last = s.last; } if (r->nzero > 0) { r->zeropart = restzero_next+1; - r->lastzero = s.lastzero; } if(r->part==NULL) r->part=r->zeropart; @@ -376,13 +367,13 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { //~ LOG("split_cc_verify: found %d in a cc\n",i); } } - if (cj->n-cj->nzero>0 && ( GETPART( *cj, cj->n - cj->nzero - 1) != cj->last )) + if (cj->n-cj->nzero>0 && ( GETPART( *cj, cj->n - cj->nzero - 1) != LAST(*cj) )) { LOG("split_cc_verify: last pointer for c is not set correctly!\n"); LOG_CC_SPLIT(c, r); ENDRUN("data structure corrupted\n"); } - if (cj->nzero>0 && ( GETPART( *cj, cj->n-1) != cj->lastzero )) + if (cj->nzero>0 && ( GETPART( *cj, cj->n-1) != LASTZERO(*cj) )) { LOG("split_cc_verify: last pointer for c is not set correctly!\n"); LOG_CC_SPLIT(c, r); @@ -413,12 +404,6 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { } } - //if (& ( r->part[r->n - 1] ) != r->last) { - // LOG("split_cc_verify: last pointer for r is not set correctly! %d %d",&( r->part[r->n - 1] ), r->last); - // LOG_CC_SPLIT(c, r); - // ENDRUN("data structure corrupted\n"); - //} - if (pcount_check + r->n != s.n) { LOG("split_cc_verify: particle count mismatch (%d %d)\n", pcount_check + r->n, s.n); @@ -585,15 +570,13 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, #ifdef CONSISTENCY_CHECKS // debug: make a copy of s to verify that the split has been done properly - struct sys s_before_split=zerosys; - s_before_split.n = s.n; - s_before_split.nzero = s.nzero; - s_before_split.part = (struct particle*) malloc(s.n*sizeof(struct particle)); - if(s_before_split.n-s_before_split.nzero>0) s_before_split.last = s_before_split.part+(s_before_split.n-s_before_split.nzero)-1; - if(s_before_split.nzero>0) s_before_split.zeropart = s_before_split.last+1; - if(s_before_split.nzero>0) s_before_split.lastzero = s_before_split.part+s_before_split.n-1; - s_before_split.next_cc = NULL; - for(UINT i=0; i0) s_before.zeropart = s_before.part+(s_before.n-s_before.nzero); + s_before.next_cc = NULL; + for(UINT i=0; inzero; struct particle* lpart=(struct particle*) malloc(lsys.n*sizeof(struct particle)); lsys.part=lpart; - if(lsys.n-lsys.nzero>0) lsys.last=lpart+lsys.n-lsys.nzero-1; - if(lsys.nzero>0) lsys.zeropart=lsys.last+1; - if(lsys.nzero>0) lsys.lastzero=lpart+lsys.n-1; + if(lsys.nzero>0) lsys.zeropart=lsys.part+(lsys.n-lsys.nzero); for(UINT i=0;inzero; struct particle* lpart=(struct particle*) malloc(lsys.n*sizeof(struct particle)); lsys.part=lpart; - if(lsys.n-lsys.nzero>0) lsys.last=lpart+lsys.n-lsys.nzero-1; - if(lsys.nzero>0) lsys.zeropart=lsys.last+1; - if(lsys.nzero>0) lsys.lastzero=lpart+lsys.n-1; + if(lsys.nzero>0) lsys.zeropart=lsys.part+(lsys.n-lsys.nzero); for(UINT i=0;imass; *ipart=p[1]; diff --git a/src/amuse/community/huayno/src/evolve_ok.c b/src/amuse/community/huayno/src/evolve_ok.c index 9daa6a6ccc..00d66d5fcb 100644 --- a/src/amuse/community/huayno/src/evolve_ok.c +++ b/src/amuse/community/huayno/src/evolve_ok.c @@ -10,9 +10,9 @@ #include "evolve.h" #include "evolve_ok.h" -struct forces zeroforces = {0, NULL, NULL}; +struct forces zeroforces = {0, NULL}; -#define IS_ZEROFORCES(F) (((F).n == 0) && ((F).forc == NULL) && ((F).last == NULL)) +#define IS_ZEROFORCES(F) (((F).n == 0) && ((F).forc == NULL)) #define LOG_FORCES(F) \ { \ @@ -21,6 +21,9 @@ struct forces zeroforces = {0, NULL, NULL}; } \ }; +#define LASTFORCE(s) ((s).forc==NULL || (s).n==0 ? NULL : (s).forc+(s).n-1) + + static void ok_timestep_cpu(int clevel,struct forces f, DOUBLE dt) { int dir=SIGN(dt); @@ -42,7 +45,7 @@ static void ok_split(FLOAT dt, struct forces f, struct forces *slow, struct forc UINT i = 0; struct force *left, *right; left = f.forc; - right = f.last; + right = LASTFORCE(f); dt=fabs(dt); while (1) { @@ -60,7 +63,7 @@ static void ok_split(FLOAT dt, struct forces f, struct forces *slow, struct forc } } if (left->timestep < dt) left++; - slow->n = f.last - left + 1; + slow->n = LASTFORCE(f) - left + 1; fast->n = left - f.forc; if (fast->n == 1) { @@ -70,26 +73,23 @@ static void ok_split(FLOAT dt, struct forces f, struct forces *slow, struct forc if (slow->n > 0) { slow->forc = f.forc + fast->n; - slow->last = f.last;//slow->part+slow->n-1; } if (fast->n > 0) { fast->forc = f.forc; - fast->last = f.forc + fast->n - 1; } if (fast->n + slow->n != f.n) ENDRUN("forces split error 2: fast->n=%u slow->n=%u f.n=%u\n", fast->n, slow->n, f.n); //for (i = 0; i < f.n; i++) f.forc[i].level = clevel; } -struct forces ok_main_forces = {0, NULL, NULL}; +struct forces ok_main_forces = {0, NULL}; void evolve_ok_init(struct sys s) { UINT n_forces = s.n * s.n - s.n; if (ok_main_forces.forc != NULL) ENDRUN("OK (re)allocation error"); ok_main_forces.forc = (struct force *) malloc(n_forces * sizeof(struct force)); - ok_main_forces.last = &(ok_main_forces.forc[n_forces - 1]); ok_main_forces.n = n_forces; // initialize pointers of the forces structure diff --git a/src/amuse/community/huayno/src/evolve_ok.h b/src/amuse/community/huayno/src/evolve_ok.h index fefcdfbbba..5ca7a4baf2 100644 --- a/src/amuse/community/huayno/src/evolve_ok.h +++ b/src/amuse/community/huayno/src/evolve_ok.h @@ -7,7 +7,6 @@ struct force { struct forces { UINT n; struct force *forc; - struct force *last; }; extern struct forces zeroforces; diff --git a/src/amuse/community/huayno/src/evolve_sf.c b/src/amuse/community/huayno/src/evolve_sf.c index 1e9ad02d8a..d6b3ad1044 100644 --- a/src/amuse/community/huayno/src/evolve_sf.c +++ b/src/amuse/community/huayno/src/evolve_sf.c @@ -345,7 +345,7 @@ static void split(FLOAT dt, struct sys s, struct sys *slow, struct sys *fast) if(s.n-s.nzero>0) { left=s.part; - right=s.last; + right=LAST(s); pivot=partition(dt, left, right); slow->n=right-pivot+1; fast->n=(pivot-left); @@ -355,7 +355,7 @@ static void split(FLOAT dt, struct sys s, struct sys *slow, struct sys *fast) if(s.nzero>0) { left=s.zeropart; - right=s.lastzero; + right=LASTZERO(s); pivot=partition(dt, left, right); slow->nzero=right-pivot+1; fast->nzero=(pivot-left); @@ -371,22 +371,18 @@ static void split(FLOAT dt, struct sys s, struct sys *slow, struct sys *fast) if(slow->n>0) { slow->part=s.part+fast->n-fast->nzero; - slow->last=s.last; } if(fast->n>0) { fast->part=s.part; - fast->last=s.part+(fast->n-fast->nzero)-1; } if(slow->nzero>0) { slow->zeropart=s.zeropart+fast->nzero; - slow->lastzero=s.lastzero; } if(fast->nzero > 0) { fast->zeropart=s.zeropart; - fast->lastzero=s.zeropart+fast->nzero-1; } if(fast->n+slow->n !=s.n) ENDRUN( "split error 2"); if(fast->nzero+slow->nzero !=s.nzero) ENDRUN( "split error 3"); diff --git a/src/amuse/test/suite/codes_tests/test_huayno.py b/src/amuse/test/suite/codes_tests/test_huayno.py index 1865ef436a..e4f2a6b957 100644 --- a/src/amuse/test/suite/codes_tests/test_huayno.py +++ b/src/amuse/test/suite/codes_tests/test_huayno.py @@ -383,8 +383,8 @@ def test14(self): for itype in test_set: - print() - print(itype) + #~ print() + #~ print(itype) instance = Huayno() instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] #~ instance.parameters.accelerate_zero_mass=False @@ -394,6 +394,7 @@ def test14(self): E2=instance.kinetic_energy+instance.potential_energy if itype!="CONSTANT": self.assertLess(abs(E2-E1).number, 1.e-5) + #~ print((E2-E1).number) part_out= instance.particles.copy() position = part_out.position.number @@ -425,11 +426,13 @@ def test14b(self): "CC", "CC_KEPLER", "CC_BS", "CC_BSA", "OK", "SHAREDBS", "SHARED4", "SHARED6", "SHARED8", "SHARED10"] - + for itype in test_set: - instance = Huayno() + #~ print() + #~ print(itype) + instance = Huayno(redirection="none") instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] - instance.parameters.accelerate_zero_mass=False + instance.parameters.accelerate_zero_mass=True instance.particles.add_particles(particles) instance.particles.add_particles(p2) E1=instance.kinetic_energy+instance.potential_energy @@ -473,8 +476,8 @@ def test14c(self): #~ test_set=["CC_BS"] for itype in test_set: - print() - print(itype) + #~ print() + #~ print(itype) instance = Huayno(redirection="none") instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] instance.parameters.accelerate_zero_mass=True @@ -485,7 +488,7 @@ def test14c(self): E2=instance.kinetic_energy+instance.potential_energy #~ if itype!="CONSTANT": #~ self.assertLess(abs(E2-E1).number, 1.e-5) - print((E2-E1).number) + #~ print((E2-E1).number) part_out= instance.particles.copy() position = part_out.position.number if hasattr(position,'tobytes'): From f1cd8d0f159002c35964040070643f177adb8505 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 27 Jul 2021 17:22:12 +0200 Subject: [PATCH 11/35] update test --- src/amuse/test/suite/codes_tests/test_huayno.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/test/suite/codes_tests/test_huayno.py b/src/amuse/test/suite/codes_tests/test_huayno.py index e4f2a6b957..8734a28051 100644 --- a/src/amuse/test/suite/codes_tests/test_huayno.py +++ b/src/amuse/test/suite/codes_tests/test_huayno.py @@ -409,7 +409,7 @@ def test14(self): # this result is probably dependent on system architecture hence no good for assert print() print(sha.hexdigest()) - print("a43263ea713b4b944513d0597e5651a07114eefc") + print("44b8747cd181c7c05de39094d254771b4f67809c") def test14b(self): import hashlib @@ -502,7 +502,7 @@ def test14c(self): # this result is probably dependent on system architecture hence no good for assert print() print(sha.hexdigest()) - print("ac993b464a8efe1f56a57748259d6388ce1e1e44 ") + print("c31d5d2667189708ae8e8be593a2445204596203") def test15(self): From c8b208055609c2894ff9e8c7f7b730c6be32ce6a Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 27 Jul 2021 17:46:21 +0200 Subject: [PATCH 12/35] retire old split_cc --- src/amuse/community/huayno/src/evolve_cc.c | 96 ---------------------- 1 file changed, 96 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index c5c14714b9..1dd51a87b0 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -53,102 +53,6 @@ #define LOGSYSp_ID(SYS) for (UINT i = 0; i < (SYS)->n; i++) { printf("%u ", (SYS)->part[i].id); } printf("\n"); #define LOGSYSC_ID(SYS) for (struct sys *_ci = &(SYS); !IS_ZEROSYS(_ci); _ci = _ci->next_cc) {printf("{"); for (UINT i = 0; i < _ci->n; i++) {printf("%u ", _ci->part[i].id); } printf("}\t");} printf("\n"); -void split_cc_old(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) { - /* - * split_cc: run a connected component search on sys s with threshold dt, - * creates a singly-linked list of connected components c and a rest system r - * c or r is set to zerosys if no connected components/rest is found - */ - int dir=SIGN(dt); - dt=fabs(dt); - diag->tstep[clevel]++; // not directly comparable to corresponding SF-split statistics - struct sys *c_next; - if(s.n<=1) ENDRUN("This does not look right..."); - if(s.nzero>0 && s.n!=s.nzero && s.zeropart!=LAST(s)+1) - ENDRUN("split_cc only works on contiguous systems"); - c_next = c; - *c_next = zerosys; - UINT processed = 0; // increase if something is added from the stack to the cc - struct particle *comp_next = s.part; // increase to move a particle from stack to cc; points to the first element of the stack - UINT comp_size = 0; // amount of particles added to the current cc - struct particle *stack_next = comp_next+1; // swap this with s[i] to increase the stack - UINT stack_size = 1; // first element of the stack is comp_next - // last element of the stack is comp_next + stack_size - 1 == stack_next-1 - struct particle *rest_next = GETPART(s, s.n-1); // swap this to add to the rest-system - // find connected components - while (processed < s.n) - { - //LOG("split_cc: searching for connected components: %d / %d\n", processed, s.n); - // search for the next connected component - while (stack_size > 0) - { - // iterate over all unvisited elements - for (struct particle *i = stack_next; i <= rest_next; i++) - { - // if element is connected to the first element of the stack - DOUBLE timestep = (DOUBLE) timestep_ij(comp_next, i,dir); - diag->tcount[clevel]++; - if ( timestep <= dt) - { - // add i to the end of the stack by swapping stack_next and i - SWAP( *stack_next , *i, struct particle ); - stack_next++; - stack_size++; - } - } - // pop the stack; add to the connected component - comp_size++; - comp_next++; - stack_size--; - } - processed += comp_size; - // new component is non-trivial: create a new sys - if (comp_size > 1) - { - //LOG("split_cc: found component with size: %d\n", comp_size); - // create new component c from u[0] to u[cc_visited - 1] - // remove components from u (u.n, u.part) - c_next->n = comp_size; - c_next->part = comp_next - comp_size; - c_next->next_cc = (struct sys*) malloc( sizeof(struct sys) ); - c_next = c_next->next_cc; - *c_next = zerosys; - if(stack_next!=comp_next) ENDRUN("consistency error in split_cc") - // comp_next = stack_next; // should be unchanged - comp_size = 0; - stack_next= comp_next+1; - stack_size = 1; - // new component is trivial: add to rest - } - else - { - //LOG("found trivial component; adding to rest\n"); - SWAP( *(comp_next - 1), *rest_next, struct particle ); - rest_next--; - comp_next = comp_next - 1; - comp_size = 0; - if(stack_next!=comp_next+1) ENDRUN("consistency error in split_cc") - //stack_next = comp_next + 1; // should be unchanged - stack_size = 1; - } - } - if (processed != s.n) - { - ENDRUN("split_cc particle count mismatch: processed=%u s.n=%u r->n=%u\n", processed, s.n, r->n); - } - // create the rest system - r->n = GETPART(s, s.n-1) - rest_next; - if (r->n > 0) - { - r->part = rest_next+1; - } - else - { - r->part = NULL; - } - //LOG("split_cc: rest system size: %d\n", r->n); -} - void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) { /* * split_cc: run a connected component search on sys s with threshold dt, From 60cb3fad3098ce423e7d85b1351e5e28fc6ead12 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 27 Jul 2021 18:06:30 +0200 Subject: [PATCH 13/35] fix LAST macro --- src/amuse/community/huayno/src/evolve.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index f107c4b61e..249bb3bb2c 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -55,8 +55,8 @@ struct sys }; #define GETPART(s, i) ((i)<(s).n-(s).nzero ? (s).part+(i) : (s).zeropart+((i)-((s).n-(s).nzero))) -#define LAST(s) ((s).part==NULL || (s).n-(s).nzero == 0 ? NULL : (s).part+(s).n-1) -#define LASTZERO(s) ((s).zeropart==NULL ? NULL : (s).zeropart+(s).nzero-1) +#define LAST(s) ((s).part==NULL || (s).n-(s).nzero==0 ? NULL : (s).part+((s).n-(s).nzero)-1) +#define LASTZERO(s) ((s).zeropart==NULL || (s).nzero==0 ? NULL : (s).zeropart+(s).nzero-1) extern struct sys debugsys; // for monitoring purposes From ca03c1b2fa3d68c983104be859877dbc4a37d1ae Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 27 Jul 2021 18:26:34 +0200 Subject: [PATCH 14/35] actually skip all zero/zero mass timestep checks --- src/amuse/community/huayno/src/evolve.h | 2 +- src/amuse/community/huayno/src/evolve_cc.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index 249bb3bb2c..88898139f9 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -19,7 +19,7 @@ #define COMPENSATED_SUMMP //#define COMPENSATED_SUMMV -#define CONSISTENCY_CHECKS // perform (time-consuming, but thorough) sanity checks +//~ #define CONSISTENCY_CHECKS // perform (time-consuming, but thorough) sanity checks struct particle { diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index 1dd51a87b0..f313a9dcb1 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -137,8 +137,8 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) } } } - // iterate over all unvisited elements - if(stackzero_next!=NULL) + // iterate over all unvisited elements, skip when active is zero mass + if(stackzero_next!=NULL && active!=&compzero_next) { //~ LOG("check zero %d\n", restzero_next-stackzero_next+1); for (struct particle *i = stackzero_next; i <= restzero_next; i++) From 1013db5a3c5177cb045d67a3f76b83d87a54e6b3 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 27 Jul 2021 19:39:56 +0200 Subject: [PATCH 15/35] update test --- src/amuse/test/suite/codes_tests/test_huayno.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amuse/test/suite/codes_tests/test_huayno.py b/src/amuse/test/suite/codes_tests/test_huayno.py index 8734a28051..91ba7bb212 100644 --- a/src/amuse/test/suite/codes_tests/test_huayno.py +++ b/src/amuse/test/suite/codes_tests/test_huayno.py @@ -454,7 +454,8 @@ def test14b(self): # this result is probably dependent on system architecture hence no good for assert print() print(sha.hexdigest()) - print("36cf912fb27cbf8169aa6d58e592a29bca4d1991") + print("a37517f3acc23cfd39b7a53f9f47631fa8df2ae4") + #~ print("36cf912fb27cbf8169aa6d58e592a29bca4d1991") def test14c(self): import hashlib From 2a542cda3709ef89a3adf8a4b5c2722bd8fd5447 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 27 Jul 2021 20:20:41 +0200 Subject: [PATCH 16/35] retire IS_ZEROSYS --- src/amuse/community/huayno/src/evolve_cc.c | 96 +++++++++---------- .../test/suite/codes_tests/test_huayno.py | 4 +- 2 files changed, 46 insertions(+), 54 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index f313a9dcb1..77fd55f670 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -18,20 +18,11 @@ #include "evolve_kepler.h" #include "evolve_bs.h" -// this is a bit weird, why not use NULL pointer? -#define IS_ZEROSYS(SYS) (((SYS)->n == 0) && \ - ((SYS)->nzero == 0) && \ - ((SYS)->part == NULL) && \ - ((SYS)->zeropart == NULL) && \ - ((SYS)->next_cc == NULL) ) - -#define IS_ZEROSYSs(SYS) IS_ZEROSYS(&(SYS)) - #define LOG_CC_SPLIT(C, R) \ { \ LOG("clevel = %d s.n = %d c.n = {", clevel, s.n); \ - for (struct sys *_ci = (C); !IS_ZEROSYS(_ci); _ci = _ci->next_cc) printf(" %d ", _ci->n ); \ - printf("} r.n = %d\n", (R)->n); \ + for (struct sys *_ci = (C); _ci!=NULL; _ci = _ci->next_cc) printf(" %d ", _ci->n ); \ + printf("} r.n = %d\n", (R).n); \ }; #define PRINTSYS(s) \ @@ -51,9 +42,9 @@ #define LOGSYS_ID(SYS) for (UINT i = 0; i < (SYS).n; i++) { printf("%u ", (SYS).part[i].id); } printf("\n"); #define LOGSYSp_ID(SYS) for (UINT i = 0; i < (SYS)->n; i++) { printf("%u ", (SYS)->part[i].id); } printf("\n"); -#define LOGSYSC_ID(SYS) for (struct sys *_ci = &(SYS); !IS_ZEROSYS(_ci); _ci = _ci->next_cc) {printf("{"); for (UINT i = 0; i < _ci->n; i++) {printf("%u ", _ci->part[i].id); } printf("}\t");} printf("\n"); +#define LOGSYSC_ID(SYS) for (struct sys *_ci = &(SYS); _ci!=NULL; _ci = _ci->next_cc) {printf("{"); for (UINT i = 0; i < _ci->n; i++) {printf("%u ", _ci->part[i].id); } printf("}\t");} printf("\n"); -void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) { +void split_cc(int clevel,struct sys s, struct sys **c, struct sys *r, DOUBLE dt) { /* * split_cc: run a connected component search on sys s with threshold dt, * creates a singly-linked list of connected components c and a rest system r @@ -62,9 +53,10 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) int dir=SIGN(dt); dt=fabs(dt); diag->tstep[clevel]++; // not directly comparable to corresponding SF-split statistics - struct sys *c_next; + struct sys **c_next; if(s.n<=1) ENDRUN("This does not look right..."); - c_next = c; + c_next = c; + if(*c_next!=NULL) ENDRUN("should start with zero pointer"); UINT processed = 0; // increase if something is added from the stack to the cc struct particle **active, *comp_next, *compzero_next; // current active particle, and next in the mass and massless part UINT comp_size, compzero_size; @@ -79,7 +71,6 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) if(s.nzero>0) restzero_next=LASTZERO(s); comp_next=stack_next; compzero_next=stackzero_next; - *c_next=zerosys; // initialize c_next while (processed < s.n) { @@ -171,23 +162,24 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) { //~ LOG("split_cc: found component with size: %d %d\n", comp_size, compzero_size); //~ LOG("%d %d \n", comp_next-stack_next, compzero_next-stackzero_next); - c_next->n = comp_size; - c_next->nzero = compzero_size; + struct sys *new=(struct sys*) malloc( sizeof(struct sys) ); + *new=zerosys; + new->n = comp_size; + new->nzero = compzero_size; if(comp_size-compzero_size>0) { - c_next->part = comp_next - (comp_size-compzero_size); + new->part = comp_next - (comp_size-compzero_size); } if(compzero_size>0) { - c_next->zeropart = compzero_next - compzero_size; + new->zeropart = compzero_next - compzero_size; } - if(c_next->part==NULL) c_next->part=c_next->zeropart; + if(new->part==NULL) new->part=new->zeropart; //~ PRINTSYS((*c_next)); //~ PRINTOFFSETS((*c_next)); - - c_next->next_cc = (struct sys*) malloc( sizeof(struct sys) ); - c_next = c_next->next_cc; - *c_next=zerosys; + new->next_cc = NULL; + *c_next=new; + c_next = &(new->next_cc); } else // new component is trivial: add to rest, reset pointers { @@ -241,7 +233,7 @@ void split_cc(int clevel,struct sys s, struct sys *c, struct sys *r, DOUBLE dt) -void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { +void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys r) { /* * split_cc_verify: explicit verification if connected components c and rest system r form a correct * connected components decomposition of the system. @@ -255,7 +247,7 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { pcount_check = 0; UINT particle_found = 0; struct particle *p = GETPART(s, i); - for (struct sys *cj = c; !IS_ZEROSYS(cj); cj = cj->next_cc) + for (struct sys *cj = c; cj!=NULL; cj = cj->next_cc) { verify_split_zeromass(*cj); pcount_check += cj->n; @@ -285,12 +277,12 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { } } - verify_split_zeromass(*r); + verify_split_zeromass(r); // search for p in rest - for (UINT k = 0; k < r->n; k++) + for (UINT k = 0; k < r.n; k++) { - struct particle * pk = GETPART( *r, k); + struct particle * pk = GETPART( r, k); // is pk equal to p if (p->id == pk->id) @@ -308,9 +300,9 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { } } - if (pcount_check + r->n != s.n) + if (pcount_check + r.n != s.n) { - LOG("split_cc_verify: particle count mismatch (%d %d)\n", pcount_check + r->n, s.n); + LOG("split_cc_verify: particle count mismatch (%d %d)\n", pcount_check + r.n, s.n); LOG_CC_SPLIT(c, r); ENDRUN("data structure corrupted\n"); //ENDRUN("split_cc_verify: particle count mismatch\n"); @@ -322,17 +314,17 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys *r) { //ENDRUN("Fin.\n"); } -void split_cc_verify_ts(int clevel,struct sys *c, struct sys *r, DOUBLE dt) +void split_cc_verify_ts(int clevel,struct sys *c, struct sys r, DOUBLE dt) { DOUBLE ts_ij; int dir=SIGN(dt); dt=fabs(dt); // verify C-C interactions - for (struct sys *ci = c; !IS_ZEROSYS(ci); ci = ci->next_cc) + for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) { for (UINT i = 0; i < ci->n; i++) { - for (struct sys *cj = c; !IS_ZEROSYS(cj); cj = cj->next_cc) + for (struct sys *cj = c; cj!=NULL; cj = cj->next_cc) { if (ci == cj) { @@ -353,13 +345,13 @@ void split_cc_verify_ts(int clevel,struct sys *c, struct sys *r, DOUBLE dt) } // verify C-R interactions - for (struct sys *ci = c; !IS_ZEROSYS(ci); ci = ci->next_cc) + for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) { for (UINT i = 0; i < ci->n; i++) { - for (UINT j = 0; j < r->n; j++) + for (UINT j = 0; j < r.n; j++) { - ts_ij = (DOUBLE) timestep_ij( GETPART(*ci, i), GETPART(*r, j),dir); + ts_ij = (DOUBLE) timestep_ij( GETPART(*ci, i), GETPART(r, j),dir); if (ts_ij < dt) { ENDRUN("split_cc_verify_ts C-R timestep underflow\n"); @@ -369,12 +361,12 @@ void split_cc_verify_ts(int clevel,struct sys *c, struct sys *r, DOUBLE dt) } // verify R interactions - for (UINT i = 0; i < r->n; i++) + for (UINT i = 0; i < r.n; i++) { - for (UINT j = 0; j < r->n; j++) + for (UINT j = 0; j < r.n; j++) { if (i == j) continue; - ts_ij = (DOUBLE) timestep_ij( GETPART(*r, i), GETPART( *r,j),dir); + ts_ij = (DOUBLE) timestep_ij( GETPART(r, i), GETPART(r,j),dir); if (ts_ij < dt) { ENDRUN("split_cc_verify_ts R-R timestep underflow\n"); @@ -415,7 +407,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, { DOUBLE cmpos[3],cmvel[3]; int recentersub=0; - struct sys c = zerosys, r = zerosys; + struct sys *c = NULL, r = zerosys; CHECK_TIMESTEP(etime,stime,dt,clevel); //~ if(accel_zero_mass) split_zeromass(&s); @@ -504,25 +496,25 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, } */ // verify the split - split_cc_verify(clevel,s_before, &c, &r); - split_cc_verify_ts(clevel,&c, &r, dt); + split_cc_verify(clevel,s_before, c, r); + split_cc_verify_ts(clevel, c, r, dt); free(s_before.part); if (clevel == 0) { printf("ok \n"); } #endif - if (IS_ZEROSYSs(c)) { + if (c==NULL) { diag->deepsteps++; diag->simtime+=dt; } // Independently integrate every C_i at reduced pivot time step h/2 (1st time) - int nc=0; for (struct sys *ci = &c; !IS_ZEROSYS(ci); ci = ci->next_cc) nc++; + int nc=0; for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) nc++; if(nc>1 || r.n>0) recentersub=1; - for (struct sys *ci = &c; !IS_ZEROSYS(ci); ci = ci->next_cc) + for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) { #ifdef _OPENMP if( TASKCONDITION ) @@ -558,9 +550,9 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, if(r.n>0) drift(clevel,r, stime+dt/2, dt/2); // drift r, 1st time // kick ci <-> cj (eq 23) - for (struct sys *ci = &c; !IS_ZEROSYS(ci); ci = ci->next_cc) + for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) { - for (struct sys *cj = &c; !IS_ZEROSYS(cj); cj = cj->next_cc) + for (struct sys *cj = c; cj!=NULL; cj = cj->next_cc) { if (ci != cj) { @@ -572,7 +564,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, //~ if(r.n>0 && accel_zero_mass) split_zeromass(&r); // kick c <-> rest (eq 24) - if(r.n>0) for (struct sys *ci = &c; !IS_ZEROSYS(ci); ci = ci->next_cc) + if(r.n>0) for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) { kick(clevel,r, *ci, dt); kick(clevel,*ci, r, dt); @@ -583,7 +575,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, if(r.n>0) drift(clevel,r, etime, dt/2); // drift r, 2nd time // Independently integrate every C_i at reduced pivot time step h/2 (2nd time, eq 27) - for (struct sys *ci = &c; !IS_ZEROSYS(ci); ci = ci->next_cc) + for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) { #ifdef _OPENMP if (TASKCONDITION) @@ -621,7 +613,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, move_system(s,cmpos,cmvel,1); } - free_sys(c.next_cc); + free_sys(c); //~ if(accel_zero_mass) split_zeromass(&s); } diff --git a/src/amuse/test/suite/codes_tests/test_huayno.py b/src/amuse/test/suite/codes_tests/test_huayno.py index 91ba7bb212..698cb46916 100644 --- a/src/amuse/test/suite/codes_tests/test_huayno.py +++ b/src/amuse/test/suite/codes_tests/test_huayno.py @@ -428,8 +428,8 @@ def test14b(self): "SHARED10"] for itype in test_set: - #~ print() - #~ print(itype) + print() + print(itype) instance = Huayno(redirection="none") instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] instance.parameters.accelerate_zero_mass=True From e28175b0fd394ebbaf68f4844fe453b89a9bc77a Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 27 Jul 2021 20:52:24 +0200 Subject: [PATCH 17/35] make seperate cc linked list, simplifies particle sys --- src/amuse/community/huayno/src/evolve.c | 2 +- src/amuse/community/huayno/src/evolve.h | 1 - src/amuse/community/huayno/src/evolve_cc.c | 104 +++++++++++---------- 3 files changed, 56 insertions(+), 51 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index 0ffc517606..9b938f8c87 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -23,7 +23,7 @@ struct sys debugsys; FLOAT eps2; FLOAT dt_param; -struct sys zerosys ={0,0,NULL,NULL,NULL}; +struct sys zerosys ={0,0,NULL,NULL}; int accel_zero_mass=1; struct diagnostics global_diag; diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index 88898139f9..5a0fa51218 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -51,7 +51,6 @@ struct sys UINT n, nzero; // n=total particles, nzero=# zero mass particles struct particle *part; // start of particles, NULL iff n==0 struct particle *zeropart; // start of zero mass particles nzero>0, otherwise NULL - struct sys *next_cc; // used in the CC split only }; #define GETPART(s, i) ((i)<(s).n-(s).nzero ? (s).part+(i) : (s).zeropart+((i)-((s).n-(s).nzero))) diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index 77fd55f670..bad02cf8b1 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -18,10 +18,16 @@ #include "evolve_kepler.h" #include "evolve_bs.h" +struct ccsys +{ + struct sys s; + struct ccsys *next_cc; +}; + #define LOG_CC_SPLIT(C, R) \ { \ LOG("clevel = %d s.n = %d c.n = {", clevel, s.n); \ - for (struct sys *_ci = (C); _ci!=NULL; _ci = _ci->next_cc) printf(" %d ", _ci->n ); \ + for (struct ccsys *_ci = (C); _ci!=NULL; _ci = _ci->next_cc) printf(" %d ", _ci->s.n ); \ printf("} r.n = %d\n", (R).n); \ }; @@ -42,9 +48,9 @@ #define LOGSYS_ID(SYS) for (UINT i = 0; i < (SYS).n; i++) { printf("%u ", (SYS).part[i].id); } printf("\n"); #define LOGSYSp_ID(SYS) for (UINT i = 0; i < (SYS)->n; i++) { printf("%u ", (SYS)->part[i].id); } printf("\n"); -#define LOGSYSC_ID(SYS) for (struct sys *_ci = &(SYS); _ci!=NULL; _ci = _ci->next_cc) {printf("{"); for (UINT i = 0; i < _ci->n; i++) {printf("%u ", _ci->part[i].id); } printf("}\t");} printf("\n"); +#define LOGSYSC_ID(SYS) for (struct ccsys *_ci = &(SYS); _ci!=NULL; _ci = _ci->next_cc) {printf("{"); for (UINT i = 0; i < _ci->s.n; i++) {printf("%u ", _ci->part[i].id); } printf("}\t");} printf("\n"); -void split_cc(int clevel,struct sys s, struct sys **c, struct sys *r, DOUBLE dt) { +void split_cc(int clevel,struct sys s, struct ccsys **c, struct sys *r, DOUBLE dt) { /* * split_cc: run a connected component search on sys s with threshold dt, * creates a singly-linked list of connected components c and a rest system r @@ -53,7 +59,7 @@ void split_cc(int clevel,struct sys s, struct sys **c, struct sys *r, DOUBLE dt) int dir=SIGN(dt); dt=fabs(dt); diag->tstep[clevel]++; // not directly comparable to corresponding SF-split statistics - struct sys **c_next; + struct ccsys **c_next; if(s.n<=1) ENDRUN("This does not look right..."); c_next = c; if(*c_next!=NULL) ENDRUN("should start with zero pointer"); @@ -162,7 +168,8 @@ void split_cc(int clevel,struct sys s, struct sys **c, struct sys *r, DOUBLE dt) { //~ LOG("split_cc: found component with size: %d %d\n", comp_size, compzero_size); //~ LOG("%d %d \n", comp_next-stack_next, compzero_next-stackzero_next); - struct sys *new=(struct sys*) malloc( sizeof(struct sys) ); + *c_next=(struct ccsys*) malloc( sizeof(struct ccsys) ); + struct sys *new=&((*c_next)->s); *new=zerosys; new->n = comp_size; new->nzero = compzero_size; @@ -177,9 +184,8 @@ void split_cc(int clevel,struct sys s, struct sys **c, struct sys *r, DOUBLE dt) if(new->part==NULL) new->part=new->zeropart; //~ PRINTSYS((*c_next)); //~ PRINTOFFSETS((*c_next)); - new->next_cc = NULL; - *c_next=new; - c_next = &(new->next_cc); + (*c_next)->next_cc = NULL; + c_next = &((*c_next)->next_cc); } else // new component is trivial: add to rest, reset pointers { @@ -233,7 +239,7 @@ void split_cc(int clevel,struct sys s, struct sys **c, struct sys *r, DOUBLE dt) -void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys r) { +void split_cc_verify(int clevel,struct sys s, struct ccsys *c, struct sys r) { /* * split_cc_verify: explicit verification if connected components c and rest system r form a correct * connected components decomposition of the system. @@ -247,15 +253,15 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys r) { pcount_check = 0; UINT particle_found = 0; struct particle *p = GETPART(s, i); - for (struct sys *cj = c; cj!=NULL; cj = cj->next_cc) + for (struct ccsys *cj = c; cj!=NULL; cj = cj->next_cc) { - verify_split_zeromass(*cj); - pcount_check += cj->n; + verify_split_zeromass(cj->s); + pcount_check += cj->s.n; //~ //LOG("%d\n", pcount_check); // search for p in connected components - for (UINT k = 0; k < cj->n; k++) + for (UINT k = 0; k < cj->s.n; k++) { - struct particle * pk = GETPART( *cj,k); + struct particle * pk = GETPART( cj->s,k); // is pk equal to p if (p->id == pk->id) { @@ -263,13 +269,13 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys r) { //~ LOG("split_cc_verify: found %d in a cc\n",i); } } - if (cj->n-cj->nzero>0 && ( GETPART( *cj, cj->n - cj->nzero - 1) != LAST(*cj) )) + if (cj->s.n-cj->s.nzero>0 && ( GETPART( cj->s, cj->s.n - cj->s.nzero - 1) != LAST(cj->s) )) { LOG("split_cc_verify: last pointer for c is not set correctly!\n"); LOG_CC_SPLIT(c, r); ENDRUN("data structure corrupted\n"); } - if (cj->nzero>0 && ( GETPART( *cj, cj->n-1) != LASTZERO(*cj) )) + if (cj->s.nzero>0 && ( GETPART( cj->s, cj->s.n-1) != LASTZERO(cj->s) )) { LOG("split_cc_verify: last pointer for c is not set correctly!\n"); LOG_CC_SPLIT(c, r); @@ -314,25 +320,25 @@ void split_cc_verify(int clevel,struct sys s, struct sys *c, struct sys r) { //ENDRUN("Fin.\n"); } -void split_cc_verify_ts(int clevel,struct sys *c, struct sys r, DOUBLE dt) +void split_cc_verify_ts(int clevel,struct ccsys *c, struct sys r, DOUBLE dt) { DOUBLE ts_ij; int dir=SIGN(dt); dt=fabs(dt); // verify C-C interactions - for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) + for (struct ccsys *ci = c; ci!=NULL; ci = ci->next_cc) { - for (UINT i = 0; i < ci->n; i++) + for (UINT i = 0; i < ci->s.n; i++) { - for (struct sys *cj = c; cj!=NULL; cj = cj->next_cc) + for (struct ccsys *cj = c; cj!=NULL; cj = cj->next_cc) { if (ci == cj) { continue; } - for (UINT j = 0; j < cj->n; j++) + for (UINT j = 0; j < cj->s.n; j++) { - ts_ij = (DOUBLE) timestep_ij(GETPART( *ci, i), GETPART( *cj, j), dir); + ts_ij = (DOUBLE) timestep_ij(GETPART( ci->s, i), GETPART( cj->s, j), dir); //LOG("comparing %d %d\n", ci->part[i].id, cj->part[j].id); //LOG("%f %f \n", ts_ij, dt); if (dt > ts_ij) @@ -345,13 +351,13 @@ void split_cc_verify_ts(int clevel,struct sys *c, struct sys r, DOUBLE dt) } // verify C-R interactions - for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) + for (struct ccsys *ci = c; ci!=NULL; ci = ci->next_cc) { - for (UINT i = 0; i < ci->n; i++) + for (UINT i = 0; i < ci->s.n; i++) { for (UINT j = 0; j < r.n; j++) { - ts_ij = (DOUBLE) timestep_ij( GETPART(*ci, i), GETPART(r, j),dir); + ts_ij = (DOUBLE) timestep_ij( GETPART(ci->s, i), GETPART(r, j),dir); if (ts_ij < dt) { ENDRUN("split_cc_verify_ts C-R timestep underflow\n"); @@ -376,7 +382,7 @@ void split_cc_verify_ts(int clevel,struct sys *c, struct sys r, DOUBLE dt) } // TODO rename to cc_free_sys? -void free_sys(struct sys * s) +void free_sys(struct ccsys * s) { if (s==NULL) return; if (s->next_cc != NULL) @@ -407,7 +413,8 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, { DOUBLE cmpos[3],cmvel[3]; int recentersub=0; - struct sys *c = NULL, r = zerosys; + struct ccsys *c = NULL; + struct sys r = zerosys; CHECK_TIMESTEP(etime,stime,dt,clevel); //~ if(accel_zero_mass) split_zeromass(&s); @@ -471,7 +478,6 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, s_before.nzero = s.nzero; s_before.part = (struct particle*) malloc(s.n*sizeof(struct particle)); if(s_before.nzero>0) s_before.zeropart = s_before.part+(s_before.n-s_before.nzero); - s_before.next_cc = NULL; for(UINT i=0; inext_cc) nc++; + int nc=0; for (struct ccsys *ci = c; ci!=NULL; ci = ci->next_cc) nc++; if(nc>1 || r.n>0) recentersub=1; - for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) + for (struct ccsys *ci = c; ci!=NULL; ci = ci->next_cc) { #ifdef _OPENMP if( TASKCONDITION ) { diag->ntasks[clevel]++; - diag->taskcount[clevel]+=ci->n; + diag->taskcount[clevel]+=ci->s.n; #pragma omp task firstprivate(clevel,ci,stime,dt,recentersub) untied { struct sys lsys=zerosys; - lsys.n=ci->n; - lsys.nzero=ci->nzero; + lsys.n=ci->s.n; + lsys.nzero=ci->s.nzero; struct particle* lpart=(struct particle*) malloc(lsys.n*sizeof(struct particle)); lsys.part=lpart; if(lsys.nzero>0) lsys.zeropart=lsys.part+(lsys.n-lsys.nzero); - for(UINT i=0;is,i); evolve_cc2(clevel+1,lsys, stime, stime+dt/2, dt/2,inttype,recentersub); - for(UINT i=0;is,i)=*GETPART(lsys,i); free(lpart); } } else #endif { - evolve_cc2(clevel+1,*ci, stime, stime+dt/2, dt/2,inttype,recentersub); + evolve_cc2(clevel+1,ci->s, stime, stime+dt/2, dt/2,inttype,recentersub); } } #pragma omp taskwait @@ -550,13 +556,13 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, if(r.n>0) drift(clevel,r, stime+dt/2, dt/2); // drift r, 1st time // kick ci <-> cj (eq 23) - for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) + for (struct ccsys *ci = c; ci!=NULL; ci = ci->next_cc) { - for (struct sys *cj = c; cj!=NULL; cj = cj->next_cc) + for (struct ccsys *cj = c; cj!=NULL; cj = cj->next_cc) { if (ci != cj) { - kick(clevel,*ci, *cj, dt); + kick(clevel,ci->s, cj->s, dt); //kick(*cj, *ci, dt); } } @@ -564,10 +570,10 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, //~ if(r.n>0 && accel_zero_mass) split_zeromass(&r); // kick c <-> rest (eq 24) - if(r.n>0) for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) + if(r.n>0) for (struct ccsys *ci = c; ci!=NULL; ci = ci->next_cc) { - kick(clevel,r, *ci, dt); - kick(clevel,*ci, r, dt); + kick(clevel,r, ci->s, dt); + kick(clevel,ci->s, r, dt); } if(r.n>0) kick(clevel,r, r, dt); // kick rest (V_RR) @@ -575,34 +581,34 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, if(r.n>0) drift(clevel,r, etime, dt/2); // drift r, 2nd time // Independently integrate every C_i at reduced pivot time step h/2 (2nd time, eq 27) - for (struct sys *ci = c; ci!=NULL; ci = ci->next_cc) + for (struct ccsys *ci = c; ci!=NULL; ci = ci->next_cc) { #ifdef _OPENMP if (TASKCONDITION) { diag->ntasks[clevel]++; - diag->taskcount[clevel]+=ci->n; + diag->taskcount[clevel]+=ci->s.n; #pragma omp task firstprivate(clevel,ci,stime,etime,dt,recentersub) untied { struct sys lsys=zerosys; - lsys.n=ci->n; - lsys.nzero=ci->nzero; + lsys.n=ci->s.n; + lsys.nzero=ci->s.nzero; struct particle* lpart=(struct particle*) malloc(lsys.n*sizeof(struct particle)); lsys.part=lpart; if(lsys.nzero>0) lsys.zeropart=lsys.part+(lsys.n-lsys.nzero); - for(UINT i=0;is,i); evolve_cc2(clevel+1,lsys, stime+dt/2, etime, dt/2,inttype,recentersub); - for(UINT i=0;is,i)=*GETPART(lsys,i); free(lpart); } } else #endif { - evolve_cc2(clevel+1,*ci, stime+dt/2, etime, dt/2,inttype,recentersub); + evolve_cc2(clevel+1,ci->s, stime+dt/2, etime, dt/2,inttype,recentersub); } } #pragma omp taskwait From b40fa2cab4585af1307f8a361926121147603a5d Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Thu, 29 Jul 2021 15:34:09 +0200 Subject: [PATCH 18/35] add error controlled integrator (needs improved condition), do not recacalculate dtsys if not necessary, integrator alias for inttype_parameter --- src/amuse/community/huayno/interface.py | 7 ++ src/amuse/community/huayno/src/Makefile | 2 +- src/amuse/community/huayno/src/evolve.c | 22 +++--- src/amuse/community/huayno/src/evolve.h | 3 +- src/amuse/community/huayno/src/evolve_bs.c | 15 ++-- src/amuse/community/huayno/src/evolve_bs.h | 2 +- src/amuse/community/huayno/src/evolve_cc.c | 2 +- .../community/huayno/src/evolve_kepler.c | 1 - .../community/huayno/src/evolve_shared.c | 73 +++++++++++-------- .../community/huayno/src/evolve_shared.h | 10 +-- 10 files changed, 81 insertions(+), 56 deletions(-) diff --git a/src/amuse/community/huayno/interface.py b/src/amuse/community/huayno/interface.py index 427ec2d3b2..1716ee4a04 100644 --- a/src/amuse/community/huayno/interface.py +++ b/src/amuse/community/huayno/interface.py @@ -285,6 +285,13 @@ def define_parameters(self, handler): default_value = 8 ) + handler.add_alias_parameter( + "integrator", + "inttype_parameter", + "integrator method to use (alias), this can be one of: "+ + ",".join( ["{0}={1}".format(i, t) for i,t in inttypes]), + ) + handler.add_method_parameter( "get_begin_time", "set_begin_time", diff --git a/src/amuse/community/huayno/src/Makefile b/src/amuse/community/huayno/src/Makefile index f919850d3d..d7239a51a1 100644 --- a/src/amuse/community/huayno/src/Makefile +++ b/src/amuse/community/huayno/src/Makefile @@ -25,7 +25,7 @@ RM = rm OBJS = evolve.o evolve_shared.o evolve_sf.o evolve_cc.o \ evolve_ok.o evolve_kepler.o universal_variable_kepler.o evolve_bs.o \ - evolve_shared_collisions.o simple_map.o simple_hash.o + evolve_shared_collisions.o evolve_error_control.o simple_map.o simple_hash.o all: libhuayno.a diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index 9b938f8c87..4f0ce2e339 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -12,6 +12,7 @@ #include "evolve_kepler.h" #include "evolve_ok.h" #include "evolve_bs.h" +#include "evolve_error_control.h" #ifdef EVOLVE_OPENCL #include "evolve_cl.h" @@ -224,22 +225,22 @@ void do_evolve(struct sys s, double dt, int inttype) evolve_constant10(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt); break; case SHARED2: - evolve_shared2(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt,1); + evolve_shared2(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt, -1.); break; case SHARED4: - evolve_shared4(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt,1); + evolve_shared4(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt, -1.); break; case SHARED6: - evolve_shared6(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt,1); + evolve_shared6(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt, -1.); break; case SHARED8: - evolve_shared8(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt,1); + evolve_shared8(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt, -1.); break; case SHARED10: - evolve_shared10(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt,1); + evolve_shared10(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt, -1.); break; case SHAREDBS: - evolve_bs_adaptive(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt,1); + evolve_bs_adaptive(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt, -1.); break; case BS_CC_KEPLER: evolve_bs(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt); @@ -320,6 +321,9 @@ void do_evolve(struct sys s, double dt, int inttype) case SHARED10_COLLISIONS: evolve_shared10_collision_detection(s, (DOUBLE) dt); break; + case ERROR_CONTROL: + evolve_error_control(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt,1); + break; default: ENDRUN("unknown integrator\n"); break; @@ -672,11 +676,9 @@ struct sys join(struct sys s1,struct sys s2) FLOAT global_timestep(struct sys s) { - UINT i; - FLOAT mindt; - mindt=HUGE_VAL; + FLOAT mindt=HUGE_VAL; struct particle *ipart; - for(i=0;iipart->timestep) mindt=ipart->timestep; diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index 5a0fa51218..8ff5cd9c63 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -17,7 +17,7 @@ #define MAXLEVEL 64 #define COMPENSATED_SUMMP -//#define COMPENSATED_SUMMV +//~ #define COMPENSATED_SUMMV //~ #define CONSISTENCY_CHECKS // perform (time-consuming, but thorough) sanity checks @@ -100,6 +100,7 @@ enum intopt CONSTANT6, // 36 CONSTANT8, // 37 CONSTANT10, // 38 + ERROR_CONTROL // 39 }; extern int verbosity; diff --git a/src/amuse/community/huayno/src/evolve_bs.c b/src/amuse/community/huayno/src/evolve_bs.c index 4d20d660f6..7744a632f3 100644 --- a/src/amuse/community/huayno/src/evolve_bs.c +++ b/src/amuse/community/huayno/src/evolve_bs.c @@ -35,21 +35,23 @@ static void nkdk(int clevel,int n,struct sys s1,struct sys s2, DOUBLE stime, DOU static void ndkd(int clevel,int n,struct sys s1,struct sys s2, DOUBLE stime, DOUBLE etime, DOUBLE dt); static void n_cc_kepler(int clevel,int n,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt); -void evolve_bs_adaptive(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep) +void evolve_bs_adaptive(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys) { - FLOAT dtsys; int done=0; CHECK_TIMESTEP(etime,stime,dt,clevel); - if(calc_timestep) timestep(clevel,s,s,SIGN(dt)); - dtsys=global_timestep(s); + if(dtsys<0) + { + timestep(clevel,s,s,SIGN(dt)); + dtsys=global_timestep(s); + } if(dtsys > fabs(dt)) { done=BulirschStoer(clevel,s, stime, etime, dt, 0); } if(done==0) { - evolve_bs_adaptive(clevel+1,s,stime, stime+dt/2,dt/2,0); - evolve_bs_adaptive(clevel+1,s,stime+dt/2, etime,dt/2,1); + evolve_bs_adaptive(clevel+1,s,stime, stime+dt/2,dt/2, dtsys); + evolve_bs_adaptive(clevel+1,s,stime+dt/2, etime,dt/2, -1.); } else { @@ -60,7 +62,6 @@ void evolve_bs_adaptive(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOU void evolve_bs(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { - FLOAT dtsys; int done=0; CHECK_TIMESTEP(etime,stime,dt,clevel); done=BulirschStoer(clevel,s, stime, etime, dt, 1); diff --git a/src/amuse/community/huayno/src/evolve_bs.h b/src/amuse/community/huayno/src/evolve_bs.h index 49bce04abb..2087c90272 100644 --- a/src/amuse/community/huayno/src/evolve_bs.h +++ b/src/amuse/community/huayno/src/evolve_bs.h @@ -1,2 +1,2 @@ -void evolve_bs_adaptive(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep); +void evolve_bs_adaptive(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys); void evolve_bs(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt); diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index bad02cf8b1..bcd0d5a7cc 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -435,7 +435,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, if (s.n <= BS_SUBSYS_SIZE && (inttype==CCC_BSA ||inttype==CC_BSA)) { - evolve_bs_adaptive(clevel,s, stime, etime, dt,1); + evolve_bs_adaptive(clevel,s, stime, etime, dt, -1.); return; } diff --git a/src/amuse/community/huayno/src/evolve_kepler.c b/src/amuse/community/huayno/src/evolve_kepler.c index abf5fbd71c..527f55ea9a 100644 --- a/src/amuse/community/huayno/src/evolve_kepler.c +++ b/src/amuse/community/huayno/src/evolve_kepler.c @@ -6,7 +6,6 @@ #include #include #include "evolve.h" -#include "evolve_shared.h" #include "universal_kepler_solver.h" #ifdef _OPENMP #include diff --git a/src/amuse/community/huayno/src/evolve_shared.c b/src/amuse/community/huayno/src/evolve_shared.c index 6ccd982f8c..c1090b7ecf 100644 --- a/src/amuse/community/huayno/src/evolve_shared.c +++ b/src/amuse/community/huayno/src/evolve_shared.c @@ -8,16 +8,18 @@ #include "evolve.h" #include "integrators_shared.h" -void evolve_shared2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep) +void evolve_shared2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys) { - FLOAT dtsys; CHECK_TIMESTEP(etime,stime,dt,clevel); - if(calc_timestep) timestep(clevel,s,s,SIGN(dt)); - dtsys=global_timestep(s); + if(dtsys<0) + { + timestep(clevel,s,s,SIGN(dt)); + dtsys=global_timestep(s); + } if(dtsys < fabs(dt)) { - evolve_shared2(clevel+1,s,stime, stime+dt/2,dt/2,0); - evolve_shared2(clevel+1,s,stime+dt/2, etime,dt/2,1); + evolve_shared2(clevel+1,s,stime, stime+dt/2,dt/2, dtsys); + evolve_shared2(clevel+1,s,stime+dt/2, etime,dt/2, -1.); } else { @@ -27,14 +29,17 @@ void evolve_shared2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE } } -void evolve_shared4(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep) { - FLOAT dtsys; +void evolve_shared4(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys) +{ CHECK_TIMESTEP(etime,stime,dt,clevel); - if(calc_timestep) timestep(clevel,s,s,SIGN(dt)); - dtsys = global_timestep(s); + if(dtsys<0) + { + timestep(clevel,s,s,SIGN(dt)); + dtsys=global_timestep(s); + } if(dtsys < fabs(dt)) { - evolve_shared4(clevel+1,s,stime, stime+dt/2,dt/2,0); - evolve_shared4(clevel+1,s,stime+dt/2, etime,dt/2,1); + evolve_shared4(clevel+1,s,stime, stime+dt/2,dt/2, dtsys); + evolve_shared4(clevel+1,s,stime+dt/2, etime,dt/2, -1.); } else { diag->deepsteps++; diag->simtime+=dt; @@ -42,14 +47,17 @@ void evolve_shared4(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE } } -void evolve_shared6(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep) { - FLOAT dtsys; +void evolve_shared6(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys) +{ CHECK_TIMESTEP(etime,stime,dt,clevel); - if(calc_timestep) timestep(clevel,s,s,SIGN(dt)); - dtsys = global_timestep(s); + if(dtsys<0) + { + timestep(clevel,s,s,SIGN(dt)); + dtsys=global_timestep(s); + } if(dtsys < fabs(dt)) { - evolve_shared6(clevel+1,s,stime, stime+dt/2,dt/2,0); - evolve_shared6(clevel+1,s,stime+dt/2, etime,dt/2,1); + evolve_shared6(clevel+1,s,stime, stime+dt/2,dt/2, dtsys); + evolve_shared6(clevel+1,s,stime+dt/2, etime,dt/2, -1.); } else { diag->deepsteps++; diag->simtime+=dt; @@ -57,14 +65,17 @@ void evolve_shared6(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE } } -void evolve_shared8(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep) { - FLOAT dtsys; +void evolve_shared8(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys) +{ CHECK_TIMESTEP(etime,stime,dt,clevel); - if(calc_timestep) timestep(clevel,s,s,SIGN(dt)); - dtsys = global_timestep(s); + if(dtsys<0) + { + timestep(clevel,s,s,SIGN(dt)); + dtsys=global_timestep(s); + } if(dtsys < fabs(dt)) { - evolve_shared8(clevel+1,s,stime, stime+dt/2,dt/2,0); - evolve_shared8(clevel+1,s,stime+dt/2, etime,dt/2,1); + evolve_shared8(clevel+1,s,stime, stime+dt/2,dt/2, dtsys); + evolve_shared8(clevel+1,s,stime+dt/2, etime,dt/2, -1.); } else { diag->deepsteps++; diag->simtime+=dt; @@ -72,14 +83,18 @@ void evolve_shared8(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE } } -void evolve_shared10(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep) { - FLOAT dtsys; +void evolve_shared10(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys) +{ CHECK_TIMESTEP(etime,stime,dt,clevel); - if(calc_timestep) timestep(clevel,s,s,SIGN(dt)); + if(dtsys<0) + { + timestep(clevel,s,s,SIGN(dt)); + dtsys=global_timestep(s); + } dtsys = global_timestep(s); if(dtsys < fabs(dt)) { - evolve_shared10(clevel+1,s,stime, stime+dt/2,dt/2,0); - evolve_shared10(clevel+1,s,stime+dt/2, etime,dt/2,1); + evolve_shared10(clevel+1,s,stime, stime+dt/2,dt/2, dtsys); + evolve_shared10(clevel+1,s,stime+dt/2, etime,dt/2, -1.); } else { diag->deepsteps++; diag->simtime+=dt; diff --git a/src/amuse/community/huayno/src/evolve_shared.h b/src/amuse/community/huayno/src/evolve_shared.h index be88e861f3..e5963477da 100644 --- a/src/amuse/community/huayno/src/evolve_shared.h +++ b/src/amuse/community/huayno/src/evolve_shared.h @@ -1,8 +1,8 @@ -void evolve_shared2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep); -void evolve_shared4(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep); -void evolve_shared6(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep); -void evolve_shared8(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep); -void evolve_shared10(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep); +void evolve_shared2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys); +void evolve_shared4(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys); +void evolve_shared6(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys); +void evolve_shared8(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys); +void evolve_shared10(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys); void evolve_constant2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt); void evolve_constant4(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt); From 39e76a1df7f1e858f8e4fea6188722d4dd55f6f0 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Thu, 29 Jul 2021 15:35:33 +0200 Subject: [PATCH 19/35] add error control files --- .../huayno/src/evolve_error_control.c | 88 +++++++++++++++++++ .../huayno/src/evolve_error_control.h | 1 + 2 files changed, 89 insertions(+) create mode 100644 src/amuse/community/huayno/src/evolve_error_control.c create mode 100644 src/amuse/community/huayno/src/evolve_error_control.h diff --git a/src/amuse/community/huayno/src/evolve_error_control.c b/src/amuse/community/huayno/src/evolve_error_control.c new file mode 100644 index 0000000000..1f32268991 --- /dev/null +++ b/src/amuse/community/huayno/src/evolve_error_control.c @@ -0,0 +1,88 @@ +/* + * Reference integrators with single, global shared time step. + */ + + +#include +#include +#include +#include "evolve.h" +#include "evolve_shared.h" + +#define error_control_parameter 1.e-15 + +static FLOAT error_function(struct sys s1, struct sys s2) +{ + FLOAT maxdiv=0.; + UINT i; + struct particle *part1, *part2; + if(s1.n!=s2.n) ENDRUN("error_function length mismatch %d,%d\n",s1.n,s2.n); + for(i=0;ipos[0] - (FLOAT) part2->pos[0])); + maxdiv=fmax(maxdiv,fabs( (FLOAT) part1->pos[1] - (FLOAT) part2->pos[1])); + maxdiv=fmax(maxdiv,fabs( (FLOAT) part1->pos[2] - (FLOAT) part2->pos[2])); + maxdiv=fmax(maxdiv,fabs( (FLOAT) part1->vel[0] - (FLOAT) part2->vel[0])); + maxdiv=fmax(maxdiv,fabs( (FLOAT) part1->vel[1] - (FLOAT) part2->vel[1])); + maxdiv=fmax(maxdiv,fabs( (FLOAT) part1->vel[2] - (FLOAT) part2->vel[2])); + } + return maxdiv; +} + +void evolve_error_control_sub(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) +{ + struct sys s1,s2; + CHECK_TIMESTEP(etime,stime,dt,clevel); + + s1.n=s.n; + s1.nzero=s.nzero; + s1.part=(struct particle*) malloc(s.n*sizeof(struct particle)); + if(!s1.part) ENDRUN("failed allocation of sys\n"); + if(s1.nzero>0) s1.zeropart=s1.part+s1.n-s1.nzero; + for(UINT i=0;i0) s2.zeropart=s2.part+s2.n-s2.nzero; + for(UINT i=0;ierror_control_parameter) + { + //~ LOG("%d error control REJECT\n", clevel); + evolve_error_control_sub(clevel+1,s2, stime, stime+dt/2, dt/2); + evolve_error_control_sub(clevel+1,s2, stime+dt/2, etime, dt/2); + }// else LOG("%d error control ACCEPT\n", clevel); + + for(UINT i=0;i Date: Thu, 29 Jul 2021 15:48:12 +0200 Subject: [PATCH 20/35] fix prototype --- src/amuse/community/huayno/src/evolve.c | 2 +- src/amuse/community/huayno/src/evolve_error_control.c | 2 +- src/amuse/community/huayno/src/evolve_error_control.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index 4f0ce2e339..9a4a4d5c01 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -322,7 +322,7 @@ void do_evolve(struct sys s, double dt, int inttype) evolve_shared10_collision_detection(s, (DOUBLE) dt); break; case ERROR_CONTROL: - evolve_error_control(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt,1); + evolve_error_control(clevel,s, (DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt, -1.); break; default: ENDRUN("unknown integrator\n"); diff --git a/src/amuse/community/huayno/src/evolve_error_control.c b/src/amuse/community/huayno/src/evolve_error_control.c index 1f32268991..a6f5fc3ccb 100644 --- a/src/amuse/community/huayno/src/evolve_error_control.c +++ b/src/amuse/community/huayno/src/evolve_error_control.c @@ -9,7 +9,7 @@ #include "evolve.h" #include "evolve_shared.h" -#define error_control_parameter 1.e-15 +#define error_control_parameter 1.e-12 static FLOAT error_function(struct sys s1, struct sys s2) { diff --git a/src/amuse/community/huayno/src/evolve_error_control.h b/src/amuse/community/huayno/src/evolve_error_control.h index 2ecdfdf2b3..f1e9907712 100644 --- a/src/amuse/community/huayno/src/evolve_error_control.h +++ b/src/amuse/community/huayno/src/evolve_error_control.h @@ -1 +1 @@ -void evolve_error_control(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int calc_timestep); +void evolve_error_control(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, FLOAT dtsys); From 12096a716347c6b8564134fd54d9de727b36ea66 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Thu, 29 Jul 2021 15:55:30 +0200 Subject: [PATCH 21/35] fix comment --- src/amuse/community/huayno/src/evolve_error_control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amuse/community/huayno/src/evolve_error_control.c b/src/amuse/community/huayno/src/evolve_error_control.c index a6f5fc3ccb..3787b78965 100644 --- a/src/amuse/community/huayno/src/evolve_error_control.c +++ b/src/amuse/community/huayno/src/evolve_error_control.c @@ -1,5 +1,5 @@ /* - * Reference integrators with single, global shared time step. + * integrator which combines multiple integrators at different timestep/order to get error estimates */ From c9e8ef7df87c6864cfcb6c68cc9b27ac0293e461 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Mon, 2 Aug 2021 18:07:18 +0200 Subject: [PATCH 22/35] small fixes in logging and small bug in kick, convert some direct part accesses --- src/amuse/community/huayno/src/evolve.c | 13 ++++++----- src/amuse/community/huayno/src/evolve_cc.c | 9 ++++---- .../community/huayno/src/evolve_kepler.c | 23 ++++++++++--------- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index 2f0313e879..0a033ab7fd 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -367,8 +367,8 @@ static void kick_cpu(struct sys s1, struct sys s2, DOUBLE dt) for(UINT j=0;jmass==0) continue; +// if(ipart==jpart) continue; dx[0]=ipart->pos[0]-jpart->pos[0]; dx[1]=ipart->pos[1]-jpart->pos[1]; dx[2]=ipart->pos[2]-jpart->pos[2]; @@ -523,7 +523,7 @@ static void timestep_cpu(struct sys s1, struct sys s2,int dir) tau=timestep_ij(ipart,GETPART(s2,j),dir); if(tau < timestep) timestep=tau; } -// if(timesteptimestep) ipart->timestep=timestep; } } @@ -708,9 +708,10 @@ void split_zeromass(struct sys *s) UINT i=0; struct particle *left, *right; if(s->n==0) return; + if(s->part==NULL) ENDRUN("split_zeromass malformed input"); if(s->n-s->nzero==0) { - if(s->zeropart==NULL) ENDRUN("split_zeromass malformed input"); + if(s->zeropart==NULL || s->part!=s->zeropart) ENDRUN("split_zeromass malformed input"); if(LASTZERO(*s)-s->zeropart+1!=s->nzero) ENDRUN( "split_zeromass malformed input sys"); return; } @@ -737,8 +738,8 @@ void split_zeromass(struct sys *s) s->zeropart=left; } if((left-s->part)+s->nzero !=s->n) ENDRUN( "split_zeromass error 2"); - for(i=0;i<(s->n-s->nzero);i++) if(s->part[i].mass==0) ENDRUN ("split_zromass error 3"); - for(i=s->n-s->nzero;in;i++) if(s->part[i].mass!=0) ENDRUN ("split_zeromass error 4"); + for(i=0;i<(s->n-s->nzero);i++) if(GETPART(*s,i)->mass==0) ENDRUN ("split_zromass error 3"); + for(i=s->n-s->nzero;in;i++) if(GETPART(*s,i)->mass!=0) ENDRUN ("split_zeromass error 4"); #ifdef CONSISTENCY_CHECKS verify_split_zeromass(*s); #endif diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index bcd0d5a7cc..3392fcfbe3 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -46,9 +46,10 @@ struct ccsys }; -#define LOGSYS_ID(SYS) for (UINT i = 0; i < (SYS).n; i++) { printf("%u ", (SYS).part[i].id); } printf("\n"); -#define LOGSYSp_ID(SYS) for (UINT i = 0; i < (SYS)->n; i++) { printf("%u ", (SYS)->part[i].id); } printf("\n"); -#define LOGSYSC_ID(SYS) for (struct ccsys *_ci = &(SYS); _ci!=NULL; _ci = _ci->next_cc) {printf("{"); for (UINT i = 0; i < _ci->s.n; i++) {printf("%u ", _ci->part[i].id); } printf("}\t");} printf("\n"); +#define LOGSYS_ID(SYS) for (UINT i = 0; i < (SYS).n; i++) { printf("%u ", GETPART(SYS, i)->id); } printf("\n"); +#define LOGSYSp_ID(SYS) LOGSYS_ID(*SYS); +#define LOGSYSC_ID(SYS) for (struct ccsys *_ci = &(SYS); _ci!=NULL; _ci = _ci->next_cc) \ + {printf("{"); for (UINT i = 0; i < _ci->s.n; i++) {printf("%u ", GETPART(_ci->s,i)->id); } printf("}\t");} printf("\n"); void split_cc(int clevel,struct sys s, struct ccsys **c, struct sys *r, DOUBLE dt) { /* @@ -496,7 +497,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, LOG("s: "); LOGSYS_ID(s_before); LOG("c: "); - LOGSYSC_ID(c); + LOGSYSC_ID(*c); LOG("r: "); LOGSYS_ID(r); } diff --git a/src/amuse/community/huayno/src/evolve_kepler.c b/src/amuse/community/huayno/src/evolve_kepler.c index 527f55ea9a..eb4682dc1c 100644 --- a/src/amuse/community/huayno/src/evolve_kepler.c +++ b/src/amuse/community/huayno/src/evolve_kepler.c @@ -58,7 +58,7 @@ static void evolve_kepler_2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { - struct particle *ipart; + struct particle *ipart, *spart; DOUBLE dpos[3],dpos0[3],cmpos[3]; DOUBLE dvel[3],dvel0[3]; UINT err; @@ -71,17 +71,18 @@ static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, drift(clevel,s,etime, dt); return; } - for(int k=0;k<3;k++) cmpos[k]= s.part->pos[k] + s.part->vel[k]*dt; + spart=GETPART(s,0); + for(int k=0;k<3;k++) cmpos[k]= spart->pos[k] + spart->vel[k]*dt; err=0; #pragma omp parallel for if((ULONG) s.n>omp_get_num_threads() && !omp_in_parallel()) default(none) \ - private(ipart, dpos,dvel,dpos0,dvel0) shared(etime,clevel, dt,cmpos, s, eps2) reduction(|: err) + private(ipart, dpos,dvel,dpos0,dvel0) shared(etime,clevel, dt,cmpos, s, eps2, spart) reduction(|: err) for(UINT i=1;ipos[k] - ipart->pos[k]; - for(int k=0;k<3;k++) dvel0[k] = s.part->vel[k] - ipart->vel[k]; - err|=universal_kepler_solver(dt,s.part->mass,eps2, + for(int k=0;k<3;k++) dpos0[k] = spart->pos[k] - ipart->pos[k]; + for(int k=0;k<3;k++) dvel0[k] = spart->vel[k] - ipart->vel[k]; + err|=universal_kepler_solver(dt,spart->mass,eps2, dpos0[0],dpos0[1],dpos0[2], dvel0[0],dvel0[1],dvel0[2], &dpos[0],&dpos[1],&dpos[2], @@ -89,14 +90,14 @@ static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, for(int k=0;k<3;k++) ipart->pos[k] = cmpos[k] - dpos[k]; - for(int k=0;k<3;k++) ipart->vel[k] = s.part->vel[k] - dvel[k]; + for(int k=0;k<3;k++) ipart->vel[k] = spart->vel[k] - dvel[k]; ipart->postime=etime; } if (err != 0) { ENDRUN("kepler solver failure"); // failure of the kepler solver should be very rare now } - for(int k=0;k<3;k++) s.part->pos[k]=cmpos[k]; - s.part->postime=etime; + for(int k=0;k<3;k++) spart->pos[k]=cmpos[k]; + spart->postime=etime; diag->cecount[clevel]+=s.nzero; } @@ -107,7 +108,7 @@ static void evolve_kepler_test(int clevel,struct sys s, DOUBLE stime, DOUBLE eti struct particle *central; struct particle *ipart; struct particle p[2]; - struct sys s2; + struct sys s2=zerosys; CHECK_TIMESTEP(etime,stime,dt,clevel); if (s.n <= 1) ENDRUN("kepler test solver was called with too few massive particles sys.n=%u\n this hsouldn't happen\n", s.n); @@ -154,7 +155,7 @@ void evolve_kepler(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE d } if(s.n-s.nzero>1) // more than 1 massive particle, consider heaviest as central; { - evolve_kepler_test(clevel,s,stime,etime,dt); + ENDRUN("evolve_kepler called for a system with more than 1 massive particle"); return; } drift(clevel,s,etime, dt); // 1 massive or only zero mass From 4602d1afd706bf8e8d223be7f13bced5021b7838 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Mon, 2 Aug 2021 18:09:18 +0200 Subject: [PATCH 23/35] retire kepler test evolver --- .../community/huayno/src/evolve_kepler.c | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve_kepler.c b/src/amuse/community/huayno/src/evolve_kepler.c index eb4682dc1c..8ae81ef4cf 100644 --- a/src/amuse/community/huayno/src/evolve_kepler.c +++ b/src/amuse/community/huayno/src/evolve_kepler.c @@ -101,46 +101,6 @@ static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, diag->cecount[clevel]+=s.nzero; } - -// special solver to test kepler -static void evolve_kepler_test(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) -{ - struct particle *central; - struct particle *ipart; - struct particle p[2]; - struct sys s2=zerosys; - - CHECK_TIMESTEP(etime,stime,dt,clevel); - if (s.n <= 1) ENDRUN("kepler test solver was called with too few massive particles sys.n=%u\n this hsouldn't happen\n", s.n); - - central=GETPART(s,0); - for(UINT i=1;imassmass) central=ipart; - } - - for(UINT i=0;imass+ipart->mass; - p[1].mass=0; - - s2.n=2; - s2.part=p; - evolve_kepler_2(clevel,s2,stime,etime,dt); - p[1].mass=ipart->mass; - *ipart=p[1]; - ipart->postime=etime; - } - for(int k=0;k<3;k++) central->pos[k]+=central->vel[k]*dt; - central->postime=etime; -} - void evolve_kepler(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { if(s.n-s.nzero==2) // 2 body From a0f540c7c38b2b5eb41f4a8265601b0cde39b74b Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Mon, 2 Aug 2021 18:32:07 +0200 Subject: [PATCH 24/35] minor cleanup --- src/amuse/community/huayno/src/evolve_cc.c | 6 +----- src/amuse/test/suite/codes_tests/test_huayno.py | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index 3392fcfbe3..33d1c32d19 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -340,7 +340,7 @@ void split_cc_verify_ts(int clevel,struct ccsys *c, struct sys r, DOUBLE dt) for (UINT j = 0; j < cj->s.n; j++) { ts_ij = (DOUBLE) timestep_ij(GETPART( ci->s, i), GETPART( cj->s, j), dir); - //LOG("comparing %d %d\n", ci->part[i].id, cj->part[j].id); + //LOG("comparing %d %d\n", GETPART( ci->s, i)-> id, GETPART( cj->s, j)->id); //LOG("%f %f \n", ts_ij, dt); if (dt > ts_ij) { @@ -418,8 +418,6 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, struct sys r = zerosys; CHECK_TIMESTEP(etime,stime,dt,clevel); - //~ if(accel_zero_mass) split_zeromass(&s); - if ((s.n == 2 || s.n-s.nzero<=1 )&& (inttype==CCC_KEPLER || inttype==CC_KEPLER || inttype==CCC_BS || inttype==CC_BS || inttype==CCC_BSA || inttype==CC_BSA)) //~ if (s.n == 2 && (inttype==CCC_KEPLER || inttype==CC_KEPLER)) @@ -569,7 +567,6 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, } } - //~ if(r.n>0 && accel_zero_mass) split_zeromass(&r); // kick c <-> rest (eq 24) if(r.n>0) for (struct ccsys *ci = c; ci!=NULL; ci = ci->next_cc) { @@ -622,6 +619,5 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, free_sys(c); - //~ if(accel_zero_mass) split_zeromass(&s); } #undef TASKCONDITION diff --git a/src/amuse/test/suite/codes_tests/test_huayno.py b/src/amuse/test/suite/codes_tests/test_huayno.py index f72c4a3656..1c517d8312 100644 --- a/src/amuse/test/suite/codes_tests/test_huayno.py +++ b/src/amuse/test/suite/codes_tests/test_huayno.py @@ -428,8 +428,8 @@ def test14b(self): "SHARED10"] for itype in test_set: - print() - print(itype) + #~ print() + #~ print(itype) instance = Huayno(redirection="none") instance.parameters.inttype_parameter=Huayno.all_inttypes[itype] instance.parameters.accelerate_zero_mass=True From 534ab8bd2382751d25d1e6cb232bc068cacfccb6 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 3 Aug 2021 19:48:58 +0200 Subject: [PATCH 25/35] fix huayno cl build, some cl fixes and make sure zero mass interactions are not calculated --- src/amuse/community/huayno/Makefile | 9 ++- src/amuse/community/huayno/src/Makefile | 4 +- src/amuse/community/huayno/src/evolve.c | 2 +- src/amuse/community/huayno/src/evolve_cl.c | 66 ++++++++++++++++------ 4 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/amuse/community/huayno/Makefile b/src/amuse/community/huayno/Makefile index a6c7cddf5e..9514d09bca 100644 --- a/src/amuse/community/huayno/Makefile +++ b/src/amuse/community/huayno/Makefile @@ -40,9 +40,8 @@ OPENMP_BUILDDIR = build_mp AM_LIBS = -L$(AMUSE_DIR)/lib/amuse_mpi -lamuse_mpi AM_CFLAGS = -I$(AMUSE_DIR)/lib/amuse_mpi -CL_LIBS?=-L$(OPENCL)/lib/x86_64 -lOpenCL -lpthread -ldl -L$(STDCL)/lib -lstdcl -CL_FLAGS?=-I$(OPENCL)/include -I$(STDCL)/include - +CL_LIBS+=-L$(STDCL)/lib64 -lstdcl +CL_FLAGS+=-I$(STDCL)/include all: huayno_worker clean: @@ -60,7 +59,7 @@ $(BUILDDIR)/Makefile: $(SRCDIR)/Makefile cp $(SRCDIR)/Makefile $(BUILDDIR)/Makefile $(BUILDDIR)/libhuayno.a: $(BUILDDIR)/Makefile $(LIBFILES) src/Makefile - make -C $(BUILDDIR) all VPATH=../src + make -C $(BUILDDIR) all VPATH=../src CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" $(OPENMP_BUILDDIR)/Makefile: $(SRCDIR)/Makefile @@ -98,7 +97,7 @@ huayno_worker_mp: __init__.py worker_code.cc worker_code.h $(OPENMP_BUILDDIR)/li huayno_worker_cl: CFLAGS += -fopenmp -DEVOLVE_OPENCL huayno_worker_cl: CXXFLAGS += -fopenmp -DEVOLVE_OPENCL huayno_worker_cl: LIBS += $(CL_LIBS) -lstdcl -huayno_worker_cl: INCLUDE += $(CL_FLAGS) +huayno_worker_cl: INCLUDE += $(CL_FLAGS) -I. huayno_worker_cl: __init__.py worker_code.cc worker_code.h $(OPENCL_BUILDDIR)/libhuayno_cl.a $(OPENCL_BUILDDIR)/interface.o $(MPICXX) $(CXXFLAGS) $(INCLUDE) $(LDFLAGS) worker_code.cc $(OPENCL_BUILDDIR)/interface.o $(OPENCL_BUILDDIR)/libhuayno_cl.a -o $@ $(LIBS) $(SC_CLIBS) diff --git a/src/amuse/community/huayno/src/Makefile b/src/amuse/community/huayno/src/Makefile index d7239a51a1..256831aa5e 100644 --- a/src/amuse/community/huayno/src/Makefile +++ b/src/amuse/community/huayno/src/Makefile @@ -15,7 +15,7 @@ else CFLAGS += -std=gnu99 endif LIBS += -lm -INCLUDE = +#~ INCLUDE = AR = ar ruv RANLIB = ranlib @@ -56,7 +56,7 @@ libhuayno_cl.a: evolve_kern.clh $(OBJS) evolve_cl.o $(MPICXX) $(CXXFLAGS) $(SC_FLAGS) $(INCLUDE) -c -o $@ $< .c.o: $< - $(MPICC) $(CFLAGS) $(SC_FLAGS) -I../build_cl $(INCLUDE) -c -o $@ $< + $(MPICC) $(CFLAGS) $(SC_FLAGS) $(INCLUDE) -c -o $@ $< %.clh: %.cl awk 'BEGIN{print "const char srcstr[]=" } {if(substr($$0,length($0))=="\\"){$$0=$$0"\\"};print "\""$$0"\\n\""} END{print ";"}' $< > $@ diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index 0a033ab7fd..ed9944865e 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -531,7 +531,7 @@ static void timestep_cpu(struct sys s1, struct sys s2,int dir) void timestep(int clevel,struct sys s1, struct sys s2,int dir) { #ifdef EVOLVE_OPENCL - if((ULONG) s1.n*s2.n>CLWORKLIMIT) + if((ULONG) (s1.n*s2.n-s1.nzero*s2.nzero)>CLWORKLIMIT) { #pragma omp critical timestep_cl(s1,s2,dir); diff --git a/src/amuse/community/huayno/src/evolve_cl.c b/src/amuse/community/huayno/src/evolve_cl.c index 8b07e5625b..faccff3899 100644 --- a/src/amuse/community/huayno/src/evolve_cl.c +++ b/src/amuse/community/huayno/src/evolve_cl.c @@ -32,23 +32,24 @@ void close_cl() void kick_cl(struct sys s1, struct sys s2, DOUBLE dt) { - int i; + int i, n2; int groupsize,nthread,blocksize; CLFLOAT cleps2=(CLFLOAT) eps2; struct particle *ipart; - if(s1.n==0 || s2.n==0) return; + n2=s2.n-s2.nzero; + if(s1.n==0 || n2==0) return; groupsize=NTHREAD; if(s1.n < NTHREAD) groupsize=s1.n; nthread=((s1.n-1)/groupsize+1)*groupsize; blocksize=BLOCKSIZE; - if(s2.nmass; } for(i=s1.n;ipos[0]; @@ -69,7 +70,7 @@ void kick_cl(struct sys s1, struct sys s2, DOUBLE dt) } for(i=0;itimestep=timestep[i]; + for(i=0;itimestep=timestep[i]; //if(timestep[i]timestep) + } clfree( ipos); clfree( ivel); @@ -171,26 +175,52 @@ void timestep_cl(struct sys s1, struct sys s2,int dir) clfree( timestep); } +void timestep_cl(struct sys s1, struct sys s2,int dir) +{ + if(accel_zero_mass) + { + struct sys s1m=zerosys, s1ml=zerosys, s2m=zerosys; + + s1m.n=s1.n-s1.nzero; + if(s1m.n>0) s1m.part=s1.part; + + s1ml.n=s1.nzero; + s1ml.nzero=s1.nzero; + if(s1ml.n>0) s1ml.part=s1.zeropart; + if(s1ml.n>0) s1ml.zeropart=s1.zeropart; + + s2m.n=s2.n-s2.nzero; + if(s2m.n>0) s2m.part=s2.part; + _timestep_cl(s1m, s2, dir); + _timestep_cl(s1ml, s2m, dir); + } else + { + _timestep_cl(s1,s2,dir); + } +} + void potential_cl(struct sys s1, struct sys s2) { - int i; + int i, n2; int groupsize,nthread,blocksize; CLFLOAT cleps2=(CLFLOAT) eps2; struct particle *ipart; - if(s1.n==0 || s2.n==0) return; + n2=s2.n-s2.nzero; + + if(s1.n==0 || n2==0) return; groupsize=NTHREAD; if(s1.n < NTHREAD) groupsize=s1.n; nthread=((s1.n-1)/groupsize+1)*groupsize; blocksize=BLOCKSIZE; - if(s2.nmass; } for(i=s1.n;ipos[0]; @@ -211,7 +241,7 @@ void potential_cl(struct sys s1, struct sys s2) } for(i=0;i Date: Tue, 3 Aug 2021 19:58:21 +0200 Subject: [PATCH 26/35] make double the default for huayno opencl --- src/amuse/community/huayno/src/evolve.h | 4 ++-- src/amuse/community/huayno/src/evolve_kern.cl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index 8ff5cd9c63..1cedffa565 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -1,6 +1,6 @@ #define FLOAT double -#define CLFLOAT cl_float -#define CLFLOAT4 cl_float4 +#define CLFLOAT cl_double +#define CLFLOAT4 cl_double4 #define DOUBLE long double #define INT int #define UINT unsigned int diff --git a/src/amuse/community/huayno/src/evolve_kern.cl b/src/amuse/community/huayno/src/evolve_kern.cl index de414613a0..10b723479b 100644 --- a/src/amuse/community/huayno/src/evolve_kern.cl +++ b/src/amuse/community/huayno/src/evolve_kern.cl @@ -10,8 +10,8 @@ #define RATIMESTEP #define RARVRATIO 1. -#define FLOAT float -#define FLOAT4 float4 +#define FLOAT double +#define FLOAT4 double4 #define BIGNUM HUGE_VALF __kernel void kick_kernel( From cce4c9b3843713da8311b178f719f8fb46f4ede7 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Thu, 5 Aug 2021 18:12:22 +0200 Subject: [PATCH 27/35] refactor huayno opencl support to not need stdcl lib --- src/amuse/community/huayno/Makefile | 10 +- src/amuse/community/huayno/interface.c | 16 +- src/amuse/community/huayno/interface.py | 21 + src/amuse/community/huayno/src/Makefile | 2 +- src/amuse/community/huayno/src/evolve.c | 1 + src/amuse/community/huayno/src/evolve.h | 1 + src/amuse/community/huayno/src/evolve_cl.c | 463 ++++++++++++++++----- 7 files changed, 406 insertions(+), 108 deletions(-) diff --git a/src/amuse/community/huayno/Makefile b/src/amuse/community/huayno/Makefile index 9514d09bca..f084d6eea7 100644 --- a/src/amuse/community/huayno/Makefile +++ b/src/amuse/community/huayno/Makefile @@ -23,10 +23,6 @@ INCLUDE = CODE_GENERATOR ?= $(AMUSE_DIR)/build.py -# set OpenCL and STDCL libraries -OPENCL = /home/inti/libraries/cuda -STDCL = /home/inti/libraries/coprthr - #--------------------------------------------- OBJS = interface.o @@ -40,8 +36,6 @@ OPENMP_BUILDDIR = build_mp AM_LIBS = -L$(AMUSE_DIR)/lib/amuse_mpi -lamuse_mpi AM_CFLAGS = -I$(AMUSE_DIR)/lib/amuse_mpi -CL_LIBS+=-L$(STDCL)/lib64 -lstdcl -CL_FLAGS+=-I$(STDCL)/include all: huayno_worker clean: @@ -96,7 +90,7 @@ huayno_worker_mp: __init__.py worker_code.cc worker_code.h $(OPENMP_BUILDDIR)/li huayno_worker_cl: CFLAGS += -fopenmp -DEVOLVE_OPENCL huayno_worker_cl: CXXFLAGS += -fopenmp -DEVOLVE_OPENCL -huayno_worker_cl: LIBS += $(CL_LIBS) -lstdcl +huayno_worker_cl: LIBS += $(CL_LIBS) huayno_worker_cl: INCLUDE += $(CL_FLAGS) -I. huayno_worker_cl: __init__.py worker_code.cc worker_code.h $(OPENCL_BUILDDIR)/libhuayno_cl.a $(OPENCL_BUILDDIR)/interface.o $(MPICXX) $(CXXFLAGS) $(INCLUDE) $(LDFLAGS) worker_code.cc $(OPENCL_BUILDDIR)/interface.o $(OPENCL_BUILDDIR)/libhuayno_cl.a -o $@ $(LIBS) $(SC_CLIBS) @@ -119,4 +113,4 @@ $(OPENCL_BUILDDIR)/%.o: %.c $(MPICC) -I$(SRCDIR) $(SC_FLAGS) $(CFLAGS) $(INCLUDE) -c -o $@ $< %.clh: %.cl - awk 'BEGIN{print "const char srcstr[]=" } {if(substr($$0,length($0))=="\\"){$$0=$$0"\\"};print "\""$$0"\\n\""} END{print ";"}' $< > $@ + awk 'BEGIN{print "const char *srcstr=" } {if(substr($$0,length($0))=="\\"){$$0=$$0"\\"};print "\""$$0"\\n\""} END{print ";"}' $< > $@ diff --git a/src/amuse/community/huayno/interface.c b/src/amuse/community/huayno/interface.c index f8b6c379a3..7ac5c5217a 100644 --- a/src/amuse/community/huayno/interface.c +++ b/src/amuse/community/huayno/interface.c @@ -38,9 +38,9 @@ int initialize_code() accel_zero_mass=1; eps2=0.; inttype=8; + opencl_device_type=0; t_now=0.; dtime=0.; - init_code(); // AMUSE STOPPING CONDITIONS SUPPORT set_support_for_condition(COLLISION_DETECTION); set_support_for_condition(OUT_OF_BOX_DETECTION); @@ -298,6 +298,19 @@ int set_inttype_parameter(int i) return 0; } +int get_opencl_device_type(int *i) +{ + *i=opencl_device_type; + return 0; +} + +int set_opencl_device_type(int i) +{ + opencl_device_type=i; + return 0; +} + + int set_timestep_parameter(double t) { dt_param=t; @@ -546,6 +559,7 @@ int recommit_parameters() } int commit_parameters() { + init_code(); t_now = begin_time; return 0; } diff --git a/src/amuse/community/huayno/interface.py b/src/amuse/community/huayno/interface.py index 4e5e31aa7b..2cb3c162a9 100644 --- a/src/amuse/community/huayno/interface.py +++ b/src/amuse/community/huayno/interface.py @@ -166,6 +166,20 @@ def set_accel_zero_mass_parameter(): function.result_type = 'i' return function + @legacy_function + def get_opencl_device_type(): + function = LegacyFunctionSpecification() + function.addParameter('opencl_device_type', dtype='i', direction=function.OUT) + function.result_type = 'i' + return function + + @legacy_function + def set_opencl_device_type(): + function = LegacyFunctionSpecification() + function.addParameter('opencl_device_type', dtype='i', direction=function.IN) + function.result_type = 'i' + return function + def set_eps2(self, e): return self.set_eps2_parameter(e) @@ -312,6 +326,13 @@ def define_parameters(self, handler): default_value = 0.0 | nbody_system.time ) + handler.add_method_parameter( + "get_opencl_device_type", + "set_opencl_device_type", + "opencl_device_type", + "set preferred OpenCL device type (0=default, 1=cpu, 2=gpu)", + default_value = 0 + ) def define_methods(self, handler): GravitationalDynamics.define_methods(self, handler) diff --git a/src/amuse/community/huayno/src/Makefile b/src/amuse/community/huayno/src/Makefile index 256831aa5e..d16b60d036 100644 --- a/src/amuse/community/huayno/src/Makefile +++ b/src/amuse/community/huayno/src/Makefile @@ -59,4 +59,4 @@ libhuayno_cl.a: evolve_kern.clh $(OBJS) evolve_cl.o $(MPICC) $(CFLAGS) $(SC_FLAGS) $(INCLUDE) -c -o $@ $< %.clh: %.cl - awk 'BEGIN{print "const char srcstr[]=" } {if(substr($$0,length($0))=="\\"){$$0=$$0"\\"};print "\""$$0"\\n\""} END{print ";"}' $< > $@ + awk 'BEGIN{print "const char *srcstr=" } {if(substr($$0,length($0))=="\\"){$$0=$$0"\\"};print "\""$$0"\\n\""} END{print ";"}' $< > $@ diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index ed9944865e..cd19271233 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -26,6 +26,7 @@ FLOAT eps2; FLOAT dt_param; struct sys zerosys ={0,0,NULL,NULL}; int accel_zero_mass=1; +int opencl_device_type=0; struct diagnostics global_diag; struct diagnostics *diag; diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index 1cedffa565..03e3a84d1c 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -113,6 +113,7 @@ extern struct sys zerosys; extern int fixed_j; extern DOUBLE bs_target_error; +extern int opencl_device_type; /* diagnostics */ struct diagnostics { diff --git a/src/amuse/community/huayno/src/evolve_cl.c b/src/amuse/community/huayno/src/evolve_cl.c index faccff3899..c21bf303ce 100644 --- a/src/amuse/community/huayno/src/evolve_cl.c +++ b/src/amuse/community/huayno/src/evolve_cl.c @@ -1,37 +1,138 @@ #include "evolve.h" + #ifdef EVOLVE_OPENCL -#include -#include +#include +#include + +#define CL_TARGET_OPENCL_VERSION 200 +#ifdef __APPLE__ + #include "OpenCL/opencl.h" +#else + #include "CL/cl.h" +#endif + #include "evolve_cl.h" #include "evolve_kern.clh" -#define cl_double4_x(f) (((cl_double*)&(f))[0]) -#define cl_double4_y(f) (((cl_double*)&(f))[1]) -#define cl_double4_z(f) (((cl_double*)&(f))[2]) -#define cl_double4_w(f) (((cl_double*)&(f))[3]) - -static void *h; -static cl_kernel kick_krn; +static cl_device_id device_id; // compute device id +static cl_context context; // compute context +static cl_command_queue queue; // compute command queue +static cl_program program; // compute program +static cl_kernel kick_krn; // kernels static cl_kernel timestep_krn; static cl_kernel potential_krn; +static cl_mem _ipos, _ivel, _jpos, _jvel, _acc, _pot, _timestep; // opencl buffer objects + +static UINT nalloc=0; + +#define SELECT(x,a,b,c) (x==0?a:(x==1?b:c)) void init_cl() { - h = clsopen(CLCONTEXT,srcstr,CLLD_NOW); - kick_krn = clsym(CLCONTEXT,h,"kick_kernel",CLLD_NOW); - timestep_krn = clsym(CLCONTEXT,h,"timestep_kernel",CLLD_NOW); - potential_krn = clsym(CLCONTEXT,h,"potential_kernel",CLLD_NOW); + cl_int err; + char *name; + + err = clGetDeviceIDs(NULL, SELECT(opencl_device_type,CL_DEVICE_TYPE_DEFAULT,CL_DEVICE_TYPE_CPU,CL_DEVICE_TYPE_GPU), 1, &device_id, NULL); + if (err != CL_SUCCESS) ENDRUN("OpenCL could not get device of requested type %s", SELECT(opencl_device_type,"default","cpu","gpu")); + { + size_t size; + clGetDeviceInfo(device_id, CL_DEVICE_NAME, 0, NULL, &size); + name=(char*) malloc(size); + clGetDeviceInfo(device_id, CL_DEVICE_NAME, size, name, NULL); + } + + + context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); + if (!context || err!=CL_SUCCESS) ENDRUN("OpenCL failed to create context"); + + queue = clCreateCommandQueueWithProperties(context, device_id, NULL, &err); + queue = clCreateCommandQueueWithProperties(context, device_id, NULL, &err); + if (!queue || err!=CL_SUCCESS) ENDRUN("OpenCL failed to create command queue"); + + program = clCreateProgramWithSource(context, 1, (const char **) &srcstr, NULL, &err); + if (!program || err!=CL_SUCCESS) ENDRUN("OpenCL failed to create program") + + err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); + if (err != CL_SUCCESS) + { + size_t len; + char *buffer; + printf("OpenCL failed to build program executable!\n"); + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &len); + buffer=(char*) malloc(len); + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, len, buffer, NULL); + printf("%s\n", buffer); + free(buffer); + ENDRUN("exiting") + } + + kick_krn = clCreateKernel(program, "kick_kernel", &err); + if(err!= CL_SUCCESS) ENDRUN("OpenCL failed to create kick_kernel"); + timestep_krn = clCreateKernel(program, "timestep_kernel", &err); + if(err!= CL_SUCCESS) ENDRUN("OpenCL failed to create timestep_kernel"); + potential_krn = clCreateKernel(program, "potential_kernel", &err); + if(err!= CL_SUCCESS) ENDRUN("OpenCL failed to create potential_kernel"); + + printf("OpenCL initialized using device %s\n", name); + free(name); +} + +void release_cl_buffers() +{ + + clReleaseMemObject(_ipos); + clReleaseMemObject(_jpos); + clReleaseMemObject(_ivel); + clReleaseMemObject(_jvel); + clReleaseMemObject(_acc); + clReleaseMemObject(_timestep); + clReleaseMemObject(_pot); + nalloc=0; +} + + +void init_cl_buffers(UINT nthread) +{ + cl_int err; + + if(nalloc) release_cl_buffers(); + + _ipos = clCreateBuffer(context, CL_MEM_READ_ONLY, nthread*sizeof(CLFLOAT4), NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("failure creating cl buffer") + _ivel = clCreateBuffer(context, CL_MEM_READ_ONLY, nthread*sizeof(CLFLOAT4), NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("failure creating cl buffer") + _jpos = clCreateBuffer(context, CL_MEM_READ_ONLY, nthread*sizeof(CLFLOAT4), NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("failure creating cl buffer") + _jvel = clCreateBuffer(context, CL_MEM_READ_ONLY, nthread*sizeof(CLFLOAT4), NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("failure creating cl buffer") + + _acc = clCreateBuffer(context, CL_MEM_WRITE_ONLY, nthread*sizeof(CLFLOAT4), NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("failure creating cl buffer") + _pot = clCreateBuffer(context, CL_MEM_WRITE_ONLY, nthread*sizeof(CLFLOAT), NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("failure creating cl buffer") + _timestep = clCreateBuffer(context, CL_MEM_WRITE_ONLY, nthread*sizeof(CLFLOAT), NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("failure creating cl buffer") + + nalloc=nthread; } void close_cl() { - clclose(CLCONTEXT,h); + release_cl_buffers(); + clReleaseProgram(program); + clReleaseKernel(kick_krn); + clReleaseKernel(timestep_krn); + clReleaseKernel(potential_krn); + clReleaseCommandQueue(queue); + clReleaseContext(context); } void kick_cl(struct sys s1, struct sys s2, DOUBLE dt) { + cl_int err; + size_t global, local; int i, n2; int groupsize,nthread,blocksize; CLFLOAT cleps2=(CLFLOAT) eps2; @@ -45,11 +146,19 @@ void kick_cl(struct sys s1, struct sys s2, DOUBLE dt) nthread=((s1.n-1)/groupsize+1)*groupsize; blocksize=BLOCKSIZE; if(n2nalloc) init_cl_buffers(nthread); + + CLFLOAT4* ipos = (CLFLOAT4*) clEnqueueMapBuffer(queue, _ipos, CL_TRUE, CL_MAP_READ, 0, nthread * sizeof(CLFLOAT4), 0, NULL, NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueMapBuffer fail") + CLFLOAT4* jpos = (CLFLOAT4*) clEnqueueMapBuffer(queue, _jpos, CL_TRUE, CL_MAP_READ, 0, nthread * sizeof(CLFLOAT4), 0, NULL, NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueMapBuffer fail") + + //~ clndrange_t ndr = clndrange_init1d(0,nthread,groupsize); + + //~ CLFLOAT4* ipos = (CLFLOAT4*) clmalloc(CLCONTEXT,nthread*sizeof(CLFLOAT4),0); + //~ CLFLOAT4* acc = (CLFLOAT4*) clmalloc(CLCONTEXT,nthread*sizeof(CLFLOAT4),0); + //~ CLFLOAT4* jpos = (CLFLOAT4*) clmalloc(CLCONTEXT,n2*sizeof(CLFLOAT4),0); for(i=0;ipos[2]; jpos[i].s3=ipart->mass; } - for(i=0;ivel[0]+=dt*acc[i].s0; - ipart->vel[1]+=dt*acc[i].s1; - ipart->vel[2]+=dt*acc[i].s2; + //~ ipart->vel[0]+=dt*acc[i].s0; + //~ ipart->vel[1]+=dt*acc[i].s1; + //~ ipart->vel[2]+=dt*acc[i].s2; + COMPSUMV(ipart->vel[0],ipart->vel_e[0],dt*acc[i].s0); + COMPSUMV(ipart->vel[1],ipart->vel_e[1],dt*acc[i].s1); + COMPSUMV(ipart->vel[2],ipart->vel_e[2],dt*acc[i].s2); } - clfree( ipos); - clfree( jpos); - clfree( acc); + err=clEnqueueUnmapMemObject(queue, _acc, (void*)acc, 0, NULL, NULL); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueUnmapBuffer fail") + + //~ clfree( ipos); + //~ clfree( jpos); + //~ clfree( acc); } void _timestep_cl(struct sys s1, struct sys s2,int dir) { - int i,n2; + cl_int err; + size_t global, local; + int i, n2; int groupsize,nthread,blocksize; CLFLOAT cleps2=(CLFLOAT) eps2; CLFLOAT cldtparam=(CLFLOAT) dt_param; struct particle *ipart; - - n2=s2.n; - if(s1.n==0 || s2.n==0) return; + + n2=s2.n; + if(s1.n==0 || n2==0) return; groupsize=NTHREAD; if(s1.n < NTHREAD) groupsize=s1.n; nthread=((s1.n-1)/groupsize+1)*groupsize; - blocksize=BLOCKSIZE/2; - if(s2.nnalloc) init_cl_buffers(nthread); - CLFLOAT4* ipos = (CLFLOAT4*) clmalloc(CLCONTEXT,nthread*sizeof(CLFLOAT4),0); - CLFLOAT4* ivel = (CLFLOAT4*) clmalloc(CLCONTEXT,nthread*sizeof(CLFLOAT4),0); - CLFLOAT* timestep = (CLFLOAT*) clmalloc(CLCONTEXT,nthread*sizeof(CLFLOAT),0); - CLFLOAT4* jpos = (CLFLOAT4*) clmalloc(CLCONTEXT,s2.n*sizeof(CLFLOAT4),0); - CLFLOAT4* jvel = (CLFLOAT4*) clmalloc(CLCONTEXT,s2.n*sizeof(CLFLOAT4),0); + CLFLOAT4* ipos = (CLFLOAT4*) clEnqueueMapBuffer(queue, _ipos, CL_TRUE, CL_MAP_READ, 0, nthread * sizeof(CLFLOAT4), 0, NULL, NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueMapBuffer fail") + CLFLOAT4* jpos = (CLFLOAT4*) clEnqueueMapBuffer(queue, _jpos, CL_TRUE, CL_MAP_READ, 0, nthread * sizeof(CLFLOAT4), 0, NULL, NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueMapBuffer fail") + CLFLOAT4* ivel = (CLFLOAT4*) clEnqueueMapBuffer(queue, _ivel, CL_TRUE, CL_MAP_READ, 0, nthread * sizeof(CLFLOAT4), 0, NULL, NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueMapBuffer fail") + CLFLOAT4* jvel = (CLFLOAT4*) clEnqueueMapBuffer(queue, _jvel, CL_TRUE, CL_MAP_READ, 0, nthread * sizeof(CLFLOAT4), 0, NULL, NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueMapBuffer fail") + + + //~ int i,n2; + //~ int groupsize,nthread,blocksize; + //~ CLFLOAT cleps2=(CLFLOAT) eps2; + //~ CLFLOAT cldtparam=(CLFLOAT) dt_param; + //~ struct particle *ipart; + + //~ n2=s2.n; + //~ if(s1.n==0 || s2.n==0) return; + + //~ groupsize=NTHREAD; + //~ if(s1.n < NTHREAD) groupsize=s1.n; + //~ nthread=((s1.n-1)/groupsize+1)*groupsize; + //~ blocksize=BLOCKSIZE/2; + //~ if(s2.npos[2]; jvel[i].s2=ipart->vel[2]; jpos[i].s3= ipart->mass; jvel[i].s3=0.; } - for(i=0;itimestep=timestep[i]; //if(timestep[i]timestep) } - clfree( ipos); - clfree( ivel); - clfree( jpos); - clfree( jvel); - clfree( timestep); + err=clEnqueueUnmapMemObject(queue, _timestep, (void*)timestep, 0, NULL, NULL); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueUnmapBuffer fail") + + //~ clfree( ipos); + //~ clfree( ivel); + //~ clfree( jpos); + //~ clfree( jvel); + //~ clfree( timestep); } void timestep_cl(struct sys s1, struct sys s2,int dir) @@ -202,25 +413,49 @@ void timestep_cl(struct sys s1, struct sys s2,int dir) void potential_cl(struct sys s1, struct sys s2) { + cl_int err; + size_t global, local; int i, n2; int groupsize,nthread,blocksize; CLFLOAT cleps2=(CLFLOAT) eps2; struct particle *ipart; n2=s2.n-s2.nzero; - if(s1.n==0 || n2==0) return; groupsize=NTHREAD; if(s1.n < NTHREAD) groupsize=s1.n; nthread=((s1.n-1)/groupsize+1)*groupsize; blocksize=BLOCKSIZE; - if( n2nalloc) init_cl_buffers(nthread); + + CLFLOAT4* ipos = (CLFLOAT4*) clEnqueueMapBuffer(queue, _ipos, CL_TRUE, CL_MAP_READ, 0, nthread * sizeof(CLFLOAT4), 0, NULL, NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueMapBuffer fail") + CLFLOAT4* jpos = (CLFLOAT4*) clEnqueueMapBuffer(queue, _jpos, CL_TRUE, CL_MAP_READ, 0, nthread * sizeof(CLFLOAT4), 0, NULL, NULL, &err); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueMapBuffer fail") + + + //~ int i, n2; + //~ int groupsize,nthread,blocksize; + //~ CLFLOAT cleps2=(CLFLOAT) eps2; + //~ struct particle *ipart; + + //~ n2=s2.n-s2.nzero; + + //~ if(s1.n==0 || n2==0) return; + + //~ groupsize=NTHREAD; + //~ if(s1.n < NTHREAD) groupsize=s1.n; + //~ nthread=((s1.n-1)/groupsize+1)*groupsize; + //~ blocksize=BLOCKSIZE; + //~ if( n2pos[2]; jpos[i].s3=ipart->mass; } - for(i=0;ipot+=pot[i]; - clfree( ipos); - clfree( jpos); - clfree( pot); + err=clEnqueueUnmapMemObject(queue, _pot, (void*)pot, 0, NULL, NULL); + if(err!=CL_SUCCESS) ENDRUN("clEnqueueUnmapBuffer fail") + + //~ for(i=0;ipot+=pot[i]; + + //~ clfree( ipos); + //~ clfree( jpos); + //~ clfree( pot); } #endif From a836cf30f087c15295b864448b204fca9d9910de Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Thu, 5 Aug 2021 20:21:11 +0200 Subject: [PATCH 28/35] minor cleanup of Makefile --- src/amuse/community/huayno/Makefile | 20 +++++--------------- src/amuse/community/huayno/src/Makefile | 19 ++++++------------- 2 files changed, 11 insertions(+), 28 deletions(-) diff --git a/src/amuse/community/huayno/Makefile b/src/amuse/community/huayno/Makefile index f084d6eea7..e6e010ad88 100644 --- a/src/amuse/community/huayno/Makefile +++ b/src/amuse/community/huayno/Makefile @@ -20,9 +20,6 @@ CXXFLAGS += LIBS += -lm INCLUDE = - -CODE_GENERATOR ?= $(AMUSE_DIR)/build.py - #--------------------------------------------- OBJS = interface.o @@ -43,7 +40,7 @@ clean: rm -Rf $(BUILDDIR) rm -Rf $(OPENCL_BUILDDIR) rm -Rf $(OPENMP_BUILDDIR) - rm -f huayno_worker huayno_worker_cl huayno_worker_mp huayno_worker_sockets + rm -f huayno_worker huayno_worker_cl huayno_worker_mp -make -C $(SRCDIR) clean distclean: clean @@ -55,7 +52,6 @@ $(BUILDDIR)/Makefile: $(SRCDIR)/Makefile $(BUILDDIR)/libhuayno.a: $(BUILDDIR)/Makefile $(LIBFILES) src/Makefile make -C $(BUILDDIR) all VPATH=../src CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" - $(OPENMP_BUILDDIR)/Makefile: $(SRCDIR)/Makefile -mkdir $(OPENMP_BUILDDIR) cp $(SRCDIR)/Makefile $(OPENMP_BUILDDIR)/Makefile @@ -69,8 +65,7 @@ $(OPENCL_BUILDDIR)/Makefile: $(SRCDIR)/Makefile $(OPENCL_BUILDDIR)/libhuayno_cl.a: $(OPENCL_BUILDDIR)/Makefile $(LIBFILES) src/Makefile make -C $(OPENCL_BUILDDIR) libhuayno_cl.a VPATH=../src CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LIBS="$(LIBS)" INCLUDE="$(INCLUDE)" - - + worker_code.cc: interface.py $(CODE_GENERATOR) --type=c interface.py HuaynoInterface -o $@ @@ -95,22 +90,17 @@ huayno_worker_cl: INCLUDE += $(CL_FLAGS) -I. huayno_worker_cl: __init__.py worker_code.cc worker_code.h $(OPENCL_BUILDDIR)/libhuayno_cl.a $(OPENCL_BUILDDIR)/interface.o $(MPICXX) $(CXXFLAGS) $(INCLUDE) $(LDFLAGS) worker_code.cc $(OPENCL_BUILDDIR)/interface.o $(OPENCL_BUILDDIR)/libhuayno_cl.a -o $@ $(LIBS) $(SC_CLIBS) - .cc.o: $< $(MPICXX) -I$(SRCDIR) $(SC_FLAGS) $(CXXFLAGS) $(INCLUDE) -c -o $@ $< $(BUILDDIR)/%.o: %.c - $(MPICC) -I$(SRCDIR) $(SC_FLAGS) $(CFLAGS) $(INCLUDE) -c -o $@ $< + $(CC) -I$(SRCDIR) $(SC_FLAGS) $(CFLAGS) $(INCLUDE) -c -o $@ $< $(OPENMP_BUILDDIR)/%.o: %.c - $(MPICC) -I$(SRCDIR) $(SC_FLAGS) $(CFLAGS) $(INCLUDE) -c -o $@ $< + $(CC) -I$(SRCDIR) $(SC_FLAGS) $(CFLAGS) $(INCLUDE) -c -o $@ $< - $(OPENCL_BUILDDIR)/%.o: %.c - $(MPICC) -I$(SRCDIR) $(SC_FLAGS) $(CFLAGS) $(INCLUDE) -c -o $@ $< + $(CC) -I$(SRCDIR) $(SC_FLAGS) $(CFLAGS) $(INCLUDE) -c -o $@ $< .c.o: $< $(MPICC) -I$(SRCDIR) $(SC_FLAGS) $(CFLAGS) $(INCLUDE) -c -o $@ $< - -%.clh: %.cl - awk 'BEGIN{print "const char *srcstr=" } {if(substr($$0,length($0))=="\\"){$$0=$$0"\\"};print "\""$$0"\\n\""} END{print ";"}' $< > $@ diff --git a/src/amuse/community/huayno/src/Makefile b/src/amuse/community/huayno/src/Makefile index d16b60d036..6aa244b3ba 100644 --- a/src/amuse/community/huayno/src/Makefile +++ b/src/amuse/community/huayno/src/Makefile @@ -1,9 +1,10 @@ # standard amuse configuration include # config.mk will be made after ./configure has run -AMUSE_DIR?=../../../../.. +ifeq ($(origin AMUSE_DIR), undefined) + AMUSE_DIR := $(shell amusifier --get-amuse-dir) +endif -include ${AMUSE_DIR}/config.mk - MPICXX ?= mpicxx MPICC ?= mpicc @@ -30,18 +31,10 @@ OBJS = evolve.o evolve_shared.o evolve_sf.o evolve_cc.o \ all: libhuayno.a clean: - rm -f *.o *.bck *.pyc *.clh worker_code.cc worker_code.h worker_code-sockets.cc + rm -f *.o *.bck *.pyc *.clh distclean: clean - rm -f huayno_worker huayno_worker_cl huayno_worker_mp huayno_worker_sockets - -worker_code.cc: interface.py - $(CODE_GENERATOR) --type=c interface.py HuaynoInterface -o $@ -worker_code.h: interface.py - $(CODE_GENERATOR) --type=h interface.py HuaynoInterface -o $@ - - libhuayno.a: $(OBJS) $(RM) -f $@ $(AR) $@ $(OBJS) @@ -53,10 +46,10 @@ libhuayno_cl.a: evolve_kern.clh $(OBJS) evolve_cl.o $(RANLIB) $@ .cc.o: $< - $(MPICXX) $(CXXFLAGS) $(SC_FLAGS) $(INCLUDE) -c -o $@ $< + $(CXX) $(CXXFLAGS) $(SC_FLAGS) $(INCLUDE) -c -o $@ $< .c.o: $< - $(MPICC) $(CFLAGS) $(SC_FLAGS) $(INCLUDE) -c -o $@ $< + $(CC) $(CFLAGS) $(SC_FLAGS) $(INCLUDE) -c -o $@ $< %.clh: %.cl awk 'BEGIN{print "const char *srcstr=" } {if(substr($$0,length($0))=="\\"){$$0=$$0"\\"};print "\""$$0"\\n\""} END{print ";"}' $< > $@ From 83434acfc5c8f71c6385bcba9aa93030f21e1847 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Thu, 5 Aug 2021 20:36:14 +0200 Subject: [PATCH 29/35] minor change for system energies --- src/amuse/community/huayno/interface.c | 4 ++-- src/amuse/community/huayno/src/evolve.c | 8 ++++---- src/amuse/community/huayno/src/evolve.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/amuse/community/huayno/interface.c b/src/amuse/community/huayno/interface.c index 7ac5c5217a..fcf266bcf7 100644 --- a/src/amuse/community/huayno/interface.c +++ b/src/amuse/community/huayno/interface.c @@ -276,13 +276,13 @@ int get_index_of_next_particle(int id, int *nout) int get_kinetic_energy(double *kinetic_energy) { - *kinetic_energy=system_kinetic_energy(mainsys); + *kinetic_energy=(double) system_kinetic_energy(mainsys); return 0; } int get_potential_energy(double *potential_energy) { - *potential_energy=system_potential_energy(mainsys); + *potential_energy=(double) system_potential_energy(mainsys); return 0; } diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index cd19271233..aace06bf05 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -72,7 +72,7 @@ void system_center_of_mass(struct sys s, DOUBLE *cmpos, DOUBLE *cmvel) } } -FLOAT system_kinetic_energy(struct sys s) +DOUBLE system_kinetic_energy(struct sys s) { UINT i; DOUBLE e=0.; @@ -84,10 +84,10 @@ FLOAT system_kinetic_energy(struct sys s) ipart->vel[1]*ipart->vel[1]+ ipart->vel[2]*ipart->vel[2] ); } - return (FLOAT) e; + return e; } -FLOAT system_potential_energy(struct sys s) +DOUBLE system_potential_energy(struct sys s) { UINT i; DOUBLE e=0.; @@ -97,7 +97,7 @@ FLOAT system_potential_energy(struct sys s) ipart=GETPART(s, i); e+=ipart->mass*ipart->pot; } - return (FLOAT) e/2; + return e/2; } void init_code() diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index 03e3a84d1c..fe121bead8 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -143,8 +143,8 @@ void do_evolve(struct sys s, double dt, int inttype); void system_center_of_mass(struct sys s, DOUBLE *cmpos, DOUBLE *cmvel); void move_system(struct sys s, DOUBLE dpos[3],DOUBLE dvel[3],int dir); -FLOAT system_potential_energy(struct sys s); -FLOAT system_kinetic_energy(struct sys s); +DOUBLE system_potential_energy(struct sys s); +DOUBLE system_kinetic_energy(struct sys s); void drift(int clevel,struct sys s, DOUBLE etime, DOUBLE dt); /* drift sys */ void kick(int clevel,struct sys s1, struct sys s2, DOUBLE dt); /* =kick sys1 for interactions with sys2 */ From ebc9069759ce81dc3ab4e6d12337beb71acae780 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Fri, 6 Aug 2021 16:19:29 +0200 Subject: [PATCH 30/35] remove code comment and fix evolve_cl for small systems --- src/amuse/community/huayno/src/evolve_cl.c | 132 ++------------------- 1 file changed, 9 insertions(+), 123 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve_cl.c b/src/amuse/community/huayno/src/evolve_cl.c index c21bf303ce..a4823e522c 100644 --- a/src/amuse/community/huayno/src/evolve_cl.c +++ b/src/amuse/community/huayno/src/evolve_cl.c @@ -43,7 +43,6 @@ void init_cl() clGetDeviceInfo(device_id, CL_DEVICE_NAME, size, name, NULL); } - context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (!context || err!=CL_SUCCESS) ENDRUN("OpenCL failed to create context"); @@ -81,7 +80,6 @@ void init_cl() void release_cl_buffers() { - clReleaseMemObject(_ipos); clReleaseMemObject(_jpos); clReleaseMemObject(_ivel); @@ -143,6 +141,7 @@ void kick_cl(struct sys s1, struct sys s2, DOUBLE dt) groupsize=NTHREAD; if(s1.n < NTHREAD) groupsize=s1.n; + if(groupsize<=8) groupsize=8; // needed for some implementations (e.g. pocl) nthread=((s1.n-1)/groupsize+1)*groupsize; blocksize=BLOCKSIZE; if(n2vel[0]+=dt*acc[i].s0; - //~ ipart->vel[1]+=dt*acc[i].s1; - //~ ipart->vel[2]+=dt*acc[i].s2; COMPSUMV(ipart->vel[0],ipart->vel_e[0],dt*acc[i].s0); COMPSUMV(ipart->vel[1],ipart->vel_e[1],dt*acc[i].s1); COMPSUMV(ipart->vel[2],ipart->vel_e[2],dt*acc[i].s2); @@ -235,9 +210,6 @@ void kick_cl(struct sys s1, struct sys s2, DOUBLE dt) err=clEnqueueUnmapMemObject(queue, _acc, (void*)acc, 0, NULL, NULL); if(err!=CL_SUCCESS) ENDRUN("clEnqueueUnmapBuffer fail") - //~ clfree( ipos); - //~ clfree( jpos); - //~ clfree( acc); } void _timestep_cl(struct sys s1, struct sys s2,int dir) @@ -249,12 +221,13 @@ void _timestep_cl(struct sys s1, struct sys s2,int dir) CLFLOAT cleps2=(CLFLOAT) eps2; CLFLOAT cldtparam=(CLFLOAT) dt_param; struct particle *ipart; - + n2=s2.n; if(s1.n==0 || n2==0) return; groupsize=NTHREAD; if(s1.n < NTHREAD) groupsize=s1.n; + if(groupsize<=8) groupsize=8; // needed for some implementations (e.g. pocl) nthread=((s1.n-1)/groupsize+1)*groupsize; blocksize=BLOCKSIZE; if(n2pos[0]; jvel[i].s0=ipart->vel[0]; @@ -345,28 +295,7 @@ void _timestep_cl(struct sys s1, struct sys s2,int dir) if(err!=CL_SUCCESS) ENDRUN("kernel run fail") clFinish(queue); - - //~ clarg_set(CLCONTEXT,timestep_krn,0, n2); - //~ clarg_set(CLCONTEXT,timestep_krn,1, blocksize); - //~ clarg_set(CLCONTEXT,timestep_krn,2, cleps2); - //~ clarg_set(CLCONTEXT,timestep_krn,3, cldtparam); - //~ clarg_set_global(CLCONTEXT,timestep_krn,4, ipos); - //~ clarg_set_global(CLCONTEXT,timestep_krn,5, ivel); - //~ clarg_set_global(CLCONTEXT,timestep_krn,6, jpos); - //~ clarg_set_global(CLCONTEXT,timestep_krn,7, jvel); - //~ clarg_set_global(CLCONTEXT,timestep_krn,8, timestep); - //~ clarg_set_local(CLCONTEXT,timestep_krn,9,blocksize*sizeof(CLFLOAT4)); - //~ clarg_set_local(CLCONTEXT,timestep_krn,10,blocksize*sizeof(CLFLOAT4)); - //~ clarg_set(CLCONTEXT,timestep_krn,11, dir); - //~ clmsync(CLCONTEXT,0,ipos,CL_MEM_DEVICE|CL_EVENT_NOWAIT); - //~ clmsync(CLCONTEXT,0,ivel,CL_MEM_DEVICE|CL_EVENT_NOWAIT); - //~ clmsync(CLCONTEXT,0,jpos,CL_MEM_DEVICE|CL_EVENT_NOWAIT); - //~ clmsync(CLCONTEXT,0,jvel,CL_MEM_DEVICE|CL_EVENT_NOWAIT); - //~ clfork(CLCONTEXT,0,timestep_krn,&ndr,CL_EVENT_NOWAIT); - //~ clmsync(CLCONTEXT,0,timestep,CL_MEM_HOST|CL_EVENT_NOWAIT); - //~ clwait(CLCONTEXT,0,CL_KERNEL_EVENT|CL_MEM_EVENT); - CLFLOAT* timestep = (CLFLOAT*) clEnqueueMapBuffer(queue, _timestep, CL_TRUE, CL_MAP_WRITE, 0, nthread * sizeof(CLFLOAT), 0, NULL, NULL, &err); if(err!=CL_SUCCESS) ENDRUN("clEnqueueMapBuffer fail") @@ -379,16 +308,12 @@ void _timestep_cl(struct sys s1, struct sys s2,int dir) err=clEnqueueUnmapMemObject(queue, _timestep, (void*)timestep, 0, NULL, NULL); if(err!=CL_SUCCESS) ENDRUN("clEnqueueUnmapBuffer fail") - //~ clfree( ipos); - //~ clfree( ivel); - //~ clfree( jpos); - //~ clfree( jvel); - //~ clfree( timestep); } void timestep_cl(struct sys s1, struct sys s2,int dir) { - if(accel_zero_mass) + + if(accel_zero_mass && s1.nzero*s2.nzero>CLWORKLIMIT/4) { struct sys s1m=zerosys, s1ml=zerosys, s2m=zerosys; @@ -401,7 +326,7 @@ void timestep_cl(struct sys s1, struct sys s2,int dir) if(s1ml.n>0) s1ml.zeropart=s1.zeropart; s2m.n=s2.n-s2.nzero; - if(s2m.n>0) s2m.part=s2.part; + if(s2m.n>0) s2m.part=s2.part; _timestep_cl(s1m, s2, dir); _timestep_cl(s1ml, s2m, dir); } else @@ -425,6 +350,7 @@ void potential_cl(struct sys s1, struct sys s2) groupsize=NTHREAD; if(s1.n < NTHREAD) groupsize=s1.n; + if(groupsize<=8) groupsize=8; // needed for some implementations (e.g. pocl) nthread=((s1.n-1)/groupsize+1)*groupsize; blocksize=BLOCKSIZE; if(n2pot+=pot[i]; - - //~ clfree( ipos); - //~ clfree( jpos); - //~ clfree( pot); } #endif From 60b06bdc9e36651243d23dbe98d3a7217fab004e Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Sat, 7 Aug 2021 16:41:33 +0200 Subject: [PATCH 31/35] add CC_SHARED10 (connected components with shared10 termination) integrators, rework shortcuts (still not useful), add function for max timestep --- src/amuse/community/huayno/Makefile | 4 +- src/amuse/community/huayno/interface.py | 2 +- src/amuse/community/huayno/src/evolve.c | 27 +++++-- src/amuse/community/huayno/src/evolve.h | 7 +- src/amuse/community/huayno/src/evolve_cc.c | 82 ++++++++++++---------- src/amuse/community/huayno/src/evolve_cc.h | 5 ++ 6 files changed, 80 insertions(+), 47 deletions(-) diff --git a/src/amuse/community/huayno/Makefile b/src/amuse/community/huayno/Makefile index e6e010ad88..06af85a217 100644 --- a/src/amuse/community/huayno/Makefile +++ b/src/amuse/community/huayno/Makefile @@ -83,8 +83,8 @@ huayno_worker_mp: __init__.py worker_code.cc worker_code.h $(OPENMP_BUILDDIR)/li #~ huayno_worker_mp.so: CFLAGS += $(OPENMP_CFLAGS) #~ huayno_worker_mp.so: CXXFLAGS += $(OPENMP_CFLAGS) -huayno_worker_cl: CFLAGS += -fopenmp -DEVOLVE_OPENCL -huayno_worker_cl: CXXFLAGS += -fopenmp -DEVOLVE_OPENCL +huayno_worker_cl: CFLAGS += -DEVOLVE_OPENCL +huayno_worker_cl: CXXFLAGS += -DEVOLVE_OPENCL huayno_worker_cl: LIBS += $(CL_LIBS) huayno_worker_cl: INCLUDE += $(CL_FLAGS) -I. huayno_worker_cl: __init__.py worker_code.cc worker_code.h $(OPENCL_BUILDDIR)/libhuayno_cl.a $(OPENCL_BUILDDIR)/interface.o diff --git a/src/amuse/community/huayno/interface.py b/src/amuse/community/huayno/interface.py index 2cb3c162a9..c5075559ce 100644 --- a/src/amuse/community/huayno/interface.py +++ b/src/amuse/community/huayno/interface.py @@ -226,7 +226,7 @@ def _list(cls): CC_BS = 24, CCC_BS = 25, BS_CC_KEPLER = 26, CC_BSA = 27, CCC_BSA = 28, SHARED2_COLLISIONS = 29, SHARED4_COLLISIONS = 30, SHARED6_COLLISIONS = 31, SHARED8_COLLISIONS = 32, SHARED10_COLLISIONS = 33, CONSTANT2 = 34, CONSTANT4 = 35, CONSTANT6 = 36, - CONSTANT8 = 37, CONSTANT10 = 38, ) + CONSTANT8 = 37, CONSTANT10 = 38, ERROR_CONTROL=39, CC_SHARED10=40, CCC_SHARED10=41) for key, val in all_inttypes.items(): setattr(inttypes, key, val) diff --git a/src/amuse/community/huayno/src/evolve.c b/src/amuse/community/huayno/src/evolve.c index aace06bf05..3cda968631 100644 --- a/src/amuse/community/huayno/src/evolve.c +++ b/src/amuse/community/huayno/src/evolve.c @@ -278,6 +278,8 @@ void do_evolve(struct sys s, double dt, int inttype) case CCC_KEPLER: case CCC_BS: case CCC_BSA: + case CC_SHARED10: + case CCC_SHARED10: #ifdef _OPENMP #pragma omp parallel shared(global_diag,s,dt,clevel) copyin(dt_param) { @@ -287,7 +289,11 @@ void do_evolve(struct sys s, double dt, int inttype) printf("Total Threads # %d\n", omp_get_num_threads()); #pragma omp single #endif +#ifdef CC2_SPLIT_SHORTCUTS + evolve_cc2_shortcut(clevel,s,(DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt, inttype, 1, -1.); +#else evolve_cc2(clevel,s,(DOUBLE) 0.,(DOUBLE) dt,(DOUBLE) dt, inttype, 1); +#endif #ifdef _OPENMP #pragma omp critical sum_diagnostics(&global_diag,diag); @@ -599,7 +605,8 @@ static void report(struct sys s,DOUBLE etime, int inttype) } printf(" total, total j, mean j: %18li %18li %f\n",totalbs,totalj,totalj/(1.*totalbs)); } - if(inttype==KEPLER || inttype==CC_KEPLER || inttype==CCC_KEPLER || inttype==CCC_BS || inttype==CC_BS || inttype==CCC_BSA || inttype==CC_BSA) + if(inttype==KEPLER || inttype==CC_KEPLER || inttype==CCC_KEPLER || inttype==CCC_BS || + inttype==CC_BS || inttype==CCC_BSA || inttype==CC_BSA || inttype==CC_SHARED10 || inttype==CC_SHARED10) { unsigned long totalcefail=0,totalcecount=0; printf("kepler solver counts:\n"); @@ -677,16 +684,26 @@ struct sys join(struct sys s1,struct sys s2) FLOAT global_timestep(struct sys s) { - FLOAT mindt=HUGE_VAL; - struct particle *ipart; + FLOAT dt,mindt=HUGE_VAL; for(UINT i=0;iipart->timestep) mindt=ipart->timestep; + dt=GETPART(s, i)->timestep; + if(dttimestep; + if(dt>maxdt) maxdt=dt; + } + return maxdt; +} + void kdk(int clevel,struct sys s1,struct sys s2, DOUBLE stime, DOUBLE etime, DOUBLE dt) { if(s2.n>0) kick(clevel,s2, s1, dt/2); diff --git a/src/amuse/community/huayno/src/evolve.h b/src/amuse/community/huayno/src/evolve.h index fe121bead8..4c4de6dd33 100644 --- a/src/amuse/community/huayno/src/evolve.h +++ b/src/amuse/community/huayno/src/evolve.h @@ -12,7 +12,7 @@ #define RARVRATIO 1. #define MPWORKLIMIT 1000 -#define CLWORKLIMIT 100000 +#define CLWORKLIMIT 40000 #define MAXLEVEL 64 @@ -100,7 +100,9 @@ enum intopt CONSTANT6, // 36 CONSTANT8, // 37 CONSTANT10, // 38 - ERROR_CONTROL // 39 + ERROR_CONTROL, // 39 + CC_SHARED10, // 40 + CCC_SHARED10 // 41 }; extern int verbosity; @@ -155,6 +157,7 @@ void dkd(int clevel,struct sys s1,struct sys s2, DOUBLE stime, DOUBLE etime, DOU void timestep(int clevel,struct sys s1, struct sys s2,int dir); FLOAT timestep_ij(struct particle *i, struct particle *j,int dir); FLOAT global_timestep(struct sys s); +FLOAT max_global_timestep(struct sys s); void potential(struct sys s1, struct sys s2); diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index 33d1c32d19..a8ec089d8f 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -17,6 +17,12 @@ #include "evolve.h" #include "evolve_kepler.h" #include "evolve_bs.h" +#include "evolve_shared.h" + +#define BS_SUBSYS_SIZE 10 +#define SHARED10_SUBSYS_SIZE 10 +#define SHARED10_MIN_DT_RATIO (1./128) // if dtmin/dtmax > this do shared10, else try to find further CC + // (ie see if there are hard binaries) struct ccsys { @@ -393,22 +399,6 @@ void free_sys(struct ccsys * s) free(s); } -DOUBLE sys_forces_max_timestep(struct sys s,int dir) -{ - DOUBLE ts = 0.0; - DOUBLE ts_ij; - for (UINT i = 0; i < s.n-1; i++) - { - for (UINT j = i+1; j < s.n; j++) - { - ts_ij = (DOUBLE) timestep_ij(GETPART(s,i), GETPART(s,j) ,dir); // check symm. - if (ts_ij >= ts) { ts = ts_ij; }; - } - } - return ts; -} - -#define BS_SUBSYS_SIZE 10 #define TASKCONDITION (nc > 1 && s.n>BS_SUBSYS_SIZE) void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int inttype, int recenter) { @@ -419,7 +409,8 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, CHECK_TIMESTEP(etime,stime,dt,clevel); if ((s.n == 2 || s.n-s.nzero<=1 )&& - (inttype==CCC_KEPLER || inttype==CC_KEPLER || inttype==CCC_BS || inttype==CC_BS || inttype==CCC_BSA || inttype==CC_BSA)) + (inttype==CCC_KEPLER || inttype==CC_KEPLER || inttype==CCC_BS || + inttype==CC_BS || inttype==CCC_BSA || inttype==CC_BSA || inttype==CC_SHARED10 || inttype==CCC_SHARED10)) //~ if (s.n == 2 && (inttype==CCC_KEPLER || inttype==CC_KEPLER)) { evolve_kepler(clevel,s, stime, etime, dt); @@ -438,30 +429,23 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, return; } - if(recenter && (inttype==CCC || inttype==CCC_KEPLER || inttype==CCC_BS || inttype==CCC_BSA)) + if (s.n <= SHARED10_SUBSYS_SIZE && (inttype==CCC_SHARED10 ||inttype==CC_SHARED10)) { - system_center_of_mass(s,cmpos,cmvel); - move_system(s,cmpos,cmvel,-1); + timestep(clevel,s,s,SIGN(dt)); + FLOAT dtmax=max_global_timestep(s); + FLOAT dtmin=global_timestep(s); + if(dtmin/dtmax>SHARED10_MIN_DT_RATIO) + { + evolve_shared10(clevel,s, stime, etime, dt, -1.); + return; + } } -// not actually helpful I think; needs testing -#ifdef CC2_SPLIT_SHORTCUTS - int dir=SIGN(dt); - DOUBLE initial_timestep = sys_forces_max_timestep(s, dir); - if(fabs(dt) > initial_timestep) + if(recenter && (inttype==CCC || inttype==CCC_KEPLER || inttype==CCC_BS || inttype==CCC_BSA || inttype==CCC_SHARED10)) { - DOUBLE dt_step = dt; - while (fabs(dt_step) > initial_timestep) - { - dt_step = dt_step / 2; - clevel++; - } - LOG("CC2_SPLIT_SHORTCUTS clevel=%d dt/dt_step=%Le\n", clevel,(long double) (dt / dt_step)); - for (DOUBLE dt_now = 0; dir*dt_now < dir*(dt-dt_step/2); dt_now += dt_step) - evolve_cc2(clevel,s, dt_now, dt_now + dt_step, dt_step,inttype,0); - return; + system_center_of_mass(s,cmpos,cmvel); + move_system(s,cmpos,cmvel,-1); } -#endif #ifdef CONSISTENCY_CHECKS if (clevel == 0) @@ -611,7 +595,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, } #pragma omp taskwait - if(recenter && (inttype==CCC || inttype==CCC_KEPLER || inttype==CCC_BS || inttype==CCC_BSA)) + if(recenter && (inttype==CCC || inttype==CCC_KEPLER || inttype==CCC_BS || inttype==CCC_BSA || inttype==CCC_SHARED10)) { for(int i=0;i<3;i++) cmpos[i]+=cmvel[i]*dt; move_system(s,cmpos,cmvel,1); @@ -620,4 +604,28 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, free_sys(c); } + +// not actually helpful I think; needs testing +void evolve_cc2_shortcut(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int inttype, int recenter, FLOAT dtsys) +{ + CHECK_TIMESTEP(etime,stime,dt,clevel); + if(dtsys<0) + { + timestep(clevel,s,s,SIGN(dt)); + dtsys=max_global_timestep(s); + } + if(dtsys < fabs(dt)) + { + evolve_cc2_shortcut(clevel+1,s,stime, stime+dt/2,dt/2, inttype, recenter, dtsys); + evolve_cc2_shortcut(clevel+1,s,stime+dt/2, etime,dt/2, inttype, recenter, -1.); + } + else + { + evolve_cc2(clevel,s, stime, etime, dt, inttype, recenter); + } +} + + #undef TASKCONDITION + + diff --git a/src/amuse/community/huayno/src/evolve_cc.h b/src/amuse/community/huayno/src/evolve_cc.h index 11838736db..b4fe39b7ff 100644 --- a/src/amuse/community/huayno/src/evolve_cc.h +++ b/src/amuse/community/huayno/src/evolve_cc.h @@ -1 +1,6 @@ +//~ #define CC2_SPLIT_SHORTCUTS void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int inttype,int recenter); + +#ifdef CC2_SPLIT_SHORTCUTS +void evolve_cc2_shortcut(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, int inttype, int recenter, FLOAT dtsys); +#endif From 98e9d6139c00bd79e61d2aa65303f2aabe7183a8 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Sat, 7 Aug 2021 17:03:42 +0200 Subject: [PATCH 32/35] make recenter logic clearer --- src/amuse/community/huayno/src/evolve_cc.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index a8ec089d8f..c82351af1e 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -443,8 +443,12 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, if(recenter && (inttype==CCC || inttype==CCC_KEPLER || inttype==CCC_BS || inttype==CCC_BSA || inttype==CCC_SHARED10)) { - system_center_of_mass(s,cmpos,cmvel); - move_system(s,cmpos,cmvel,-1); + system_center_of_mass(s,cmpos,cmvel); + move_system(s,cmpos,cmvel,-1); + evolve_cc2(clevel, s, stime, etime, dt, inttype, 0); + for(int i=0;i<3;i++) cmpos[i]+=cmvel[i]*dt; + move_system(s,cmpos,cmvel,1); + return; } #ifdef CONSISTENCY_CHECKS @@ -595,12 +599,6 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, } #pragma omp taskwait - if(recenter && (inttype==CCC || inttype==CCC_KEPLER || inttype==CCC_BS || inttype==CCC_BSA || inttype==CCC_SHARED10)) - { - for(int i=0;i<3;i++) cmpos[i]+=cmvel[i]*dt; - move_system(s,cmpos,cmvel,1); - } - free_sys(c); } From 08e27241fec44b3af6a5cc873c2da63faebcc838 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Sat, 7 Aug 2021 20:09:15 +0200 Subject: [PATCH 33/35] minor fix and reset compsum in kepler --- src/amuse/community/huayno/src/evolve_cc.c | 20 ++++++----- .../community/huayno/src/evolve_kepler.c | 33 ++++++++++++++----- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/amuse/community/huayno/src/evolve_cc.c b/src/amuse/community/huayno/src/evolve_cc.c index c82351af1e..ccaaa2998f 100644 --- a/src/amuse/community/huayno/src/evolve_cc.c +++ b/src/amuse/community/huayno/src/evolve_cc.c @@ -417,6 +417,16 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, return; } + if(recenter && (inttype==CCC || inttype==CCC_KEPLER || inttype==CCC_BS || inttype==CCC_BSA || inttype==CCC_SHARED10)) + { + system_center_of_mass(s,cmpos,cmvel); + move_system(s,cmpos,cmvel,-1); + evolve_cc2(clevel, s, stime, etime, dt, inttype, 0); + for(int i=0;i<3;i++) cmpos[i]+=cmvel[i]*dt; + move_system(s,cmpos,cmvel,1); + return; + } + if (s.n <= BS_SUBSYS_SIZE && (inttype==CCC_BS ||inttype==CC_BS)) { evolve_bs(clevel,s, stime, etime, dt); @@ -441,15 +451,7 @@ void evolve_cc2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt, } } - if(recenter && (inttype==CCC || inttype==CCC_KEPLER || inttype==CCC_BS || inttype==CCC_BSA || inttype==CCC_SHARED10)) - { - system_center_of_mass(s,cmpos,cmvel); - move_system(s,cmpos,cmvel,-1); - evolve_cc2(clevel, s, stime, etime, dt, inttype, 0); - for(int i=0;i<3;i++) cmpos[i]+=cmvel[i]*dt; - move_system(s,cmpos,cmvel,1); - return; - } + #ifdef CONSISTENCY_CHECKS if (clevel == 0) diff --git a/src/amuse/community/huayno/src/evolve_kepler.c b/src/amuse/community/huayno/src/evolve_kepler.c index 8ae81ef4cf..d77687d1d7 100644 --- a/src/amuse/community/huayno/src/evolve_kepler.c +++ b/src/amuse/community/huayno/src/evolve_kepler.c @@ -46,9 +46,18 @@ static void evolve_kepler_2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, for(k=0;k<3;k++) ipart->vel[k] = vel_cm[k] + f1 * dvel[k]; for(k=0;k<3;k++) jpart->pos[k] = pos_cm[k] - f2 * dpos[k]; for(k=0;k<3;k++) jpart->vel[k] = vel_cm[k] - f2 * dvel[k]; +#ifdef COMPENSATED_SUMMP + for(k=0;k<3;k++) ipart->pos_e[k]=0.; + for(k=0;k<3;k++) jpart->pos_e[k]=0.; +#endif +#ifdef COMPENSATED_SUMMV + for(k=0;k<3;k++) ipart->vel_e[k]=0.; + for(k=0;k<3;k++) jpart->vel_e[k]=0.; +#endif + } else { - for(k=0;k<3;k++) ipart->pos[k]+=ipart->vel[k]*dt; - for(k=0;k<3;k++) jpart->pos[k]+=jpart->vel[k]*dt; + for(k=0;k<3;k++) COMPSUMP(ipart->pos[k],ipart->pos_e[k],dt*ipart->vel[k]); + for(k=0;k<3;k++) COMPSUMP(jpart->pos[k],jpart->pos_e[k],dt*jpart->vel[k]); } ipart->postime=etime; jpart->postime=etime; @@ -59,7 +68,7 @@ static void evolve_kepler_2(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, DOUBLE dt) { struct particle *ipart, *spart; - DOUBLE dpos[3],dpos0[3],cmpos[3]; + DOUBLE dpos[3],dpos0[3],spos[3]; DOUBLE dvel[3],dvel0[3]; UINT err; @@ -72,15 +81,17 @@ static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, return; } spart=GETPART(s,0); - for(int k=0;k<3;k++) cmpos[k]= spart->pos[k] + spart->vel[k]*dt; + for(int k=0;k<3;k++) spos[k]= spart->pos[k]; // save initial pos + for(int k=0;k<3;k++) COMPSUMP(spart->pos[k],spart->pos_e[k],dt*spart->vel[k]); //evolve central + spart->postime=etime; err=0; #pragma omp parallel for if((ULONG) s.n>omp_get_num_threads() && !omp_in_parallel()) default(none) \ - private(ipart, dpos,dvel,dpos0,dvel0) shared(etime,clevel, dt,cmpos, s, eps2, spart) reduction(|: err) + private(ipart, dpos,dvel,dpos0,dvel0) shared(etime,clevel, dt,spos, s, eps2, spart) reduction(|: err) for(UINT i=1;ipos[k] - ipart->pos[k]; + for(int k=0;k<3;k++) dpos0[k] = spos[k] - ipart->pos[k]; for(int k=0;k<3;k++) dvel0[k] = spart->vel[k] - ipart->vel[k]; err|=universal_kepler_solver(dt,spart->mass,eps2, dpos0[0],dpos0[1],dpos0[2], @@ -89,15 +100,19 @@ static void evolve_kepler_n(int clevel,struct sys s, DOUBLE stime, DOUBLE etime, &dvel[0],&dvel[1],&dvel[2]); - for(int k=0;k<3;k++) ipart->pos[k] = cmpos[k] - dpos[k]; + for(int k=0;k<3;k++) ipart->pos[k] = spart->pos[k] - dpos[k]; for(int k=0;k<3;k++) ipart->vel[k] = spart->vel[k] - dvel[k]; +#ifdef COMPENSATED_SUMMP + for(int k=0;k<3;k++) ipart->pos_e[k]=0.; +#endif +#ifdef COMPENSATED_SUMMV + for(int k=0;k<3;k++) ipart->vel_e[k]=0.; +#endif ipart->postime=etime; } if (err != 0) { ENDRUN("kepler solver failure"); // failure of the kepler solver should be very rare now } - for(int k=0;k<3;k++) spart->pos[k]=cmpos[k]; - spart->postime=etime; diag->cecount[clevel]+=s.nzero; } From b77caea7ffb7d0debe40288f4e776448d4173dd2 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Mon, 9 Aug 2021 11:59:06 +0200 Subject: [PATCH 34/35] minor error check --- src/amuse/community/huayno/src/evolve_cl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amuse/community/huayno/src/evolve_cl.c b/src/amuse/community/huayno/src/evolve_cl.c index a4823e522c..9c9b68c9b8 100644 --- a/src/amuse/community/huayno/src/evolve_cl.c +++ b/src/amuse/community/huayno/src/evolve_cl.c @@ -403,7 +403,8 @@ void potential_cl(struct sys s1, struct sys s2) if(err!=CL_SUCCESS) ENDRUN("clEnqueueNDRangeKernel fail") - clFinish(queue); + err=clFinish(queue); + if(err!=CL_SUCCESS) ENDRUN("clFinish fail") CLFLOAT* pot = (CLFLOAT*) clEnqueueMapBuffer(queue, _pot, CL_TRUE, CL_MAP_WRITE, 0, nthread * sizeof(CLFLOAT), 0, NULL, NULL, &err); if(err!=CL_SUCCESS) ENDRUN("clEnqueueMapBuffer fail") From b2927d1efce9a27353088c5e60f4b2236aef7722 Mon Sep 17 00:00:00 2001 From: ipelupessy Date: Tue, 10 Aug 2021 11:50:47 +0200 Subject: [PATCH 35/35] change test shasums (changed because of reset compensated summation in kepler) --- src/amuse/test/suite/codes_tests/test_huayno.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/amuse/test/suite/codes_tests/test_huayno.py b/src/amuse/test/suite/codes_tests/test_huayno.py index 1c517d8312..352dbc6fa7 100644 --- a/src/amuse/test/suite/codes_tests/test_huayno.py +++ b/src/amuse/test/suite/codes_tests/test_huayno.py @@ -409,7 +409,7 @@ def test14(self): # this result is probably dependent on system architecture hence no good for assert print() print(sha.hexdigest()) - print("44b8747cd181c7c05de39094d254771b4f67809c") + print("4f2aac4d8761f3b07545dcea53f1a65f84a5275b") def test14b(self): import hashlib @@ -454,8 +454,7 @@ def test14b(self): # this result is probably dependent on system architecture hence no good for assert print() print(sha.hexdigest()) - print("a37517f3acc23cfd39b7a53f9f47631fa8df2ae4") - #~ print("36cf912fb27cbf8169aa6d58e592a29bca4d1991") + print("f3563453fb9b959d8337c6e57a526e1ff52572a7") def test14c(self): import hashlib @@ -503,7 +502,7 @@ def test14c(self): # this result is probably dependent on system architecture hence no good for assert print() print(sha.hexdigest()) - print("c31d5d2667189708ae8e8be593a2445204596203") + print("0af7eaea472989ea4be1bb0e2b8633d6b9af41e4") def test15(self): particles = plummer.new_plummer_model(512)