sphere interaction

i-saint · Apr 9, 2012 · 337dbba · 337dbba
commit 337dbba
Show file tree

Hide file tree

Showing 6 changed files with 725 additions and 0 deletions.
diff --git a/SPH_const.h b/SPH_const.h
@@ -0,0 +1,7 @@
+#ifndef _SPH_const_h_
+#define _SPH_const_h_
+
+#define SPH_PARTICLE_SIZE 0.02f
+#define SPH_PARTICLE_STIFFNESS 0.1f
+
+#endif // _SPH_const_h_
diff --git a/SPH_core.ispc b/SPH_core.ispc
@@ -0,0 +1,124 @@
+// �R���p�C���́��݂����ȃJ�X�^���r���h�X�e�b�v�ŁB
+// ispc %(FullPath) -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx --arch=x86-64 --opt=fast-masked-vload --opt=fast-math
+// 
+// Intel SPMD �͂�����
+// http://ispc.github.com/
+
+
+#include "ispc_vectormath.h"
+#include "SPH_const.h"
+typedef unsigned int32 uint32;
+
+const uniform int32 SPH_FLUID_BLOCK_SIZE = 128;
+
+
+struct Particle
+{
+    float   posx, posy, posz;
+    float   velx, vely, velz;
+};
+
+
+
+static uniform int32 sphFluidBlockNum(uniform int32 n)
+{
+    uniform int32 b = (n / SPH_FLUID_BLOCK_SIZE);
+    b += (n % SPH_FLUID_BLOCK_SIZE > 0 ? 1 : 0);
+    return b;
+}
+
+
+static inline void ComputeParticleInteraction(
+    vec3 pos1,
+    vec3 pos2,
+    vec3 &accel )
+{
+    vec3 diff = pos2 - pos1;
+    float d = length3(diff);
+    if(d > 0.0f) { // d==0: �Փˌ����Փˑ��肪����
+        vec3 dir = diff / d;
+        accel += dir * (-min(0.0f, d-(SPH_PARTICLE_SIZE*2.0f)) * SPH_PARTICLE_STIFFNESS);
+    }
+}
+
+static inline void ComputeGravity(
+    uniform vec3 pos1,
+    uniform vec3 &vel1 )
+{
+    // x,y,z=0,0,0 �Ŕ��a CENTER_SPHERE_RADIUS �̋����������Ƃ��ďd�͂ň������違�Փ˔���
+
+    const uniform float CENTER_SPHERE_RADIUS = 1.5f;
+    const uniform float GRAVITY = -0.002f;
+
+    // ���S�̋��Ƃ̏Փ˔���
+    uniform vec3 diff = pos1;
+    uniform float d = length3(diff);
+    uniform vec3 dir = diff / d;
+    vel1 += dir * (-min(0.0f, d-CENTER_SPHERE_RADIUS) * SPH_PARTICLE_STIFFNESS);
+
+    // �d�͉���
+    vel1 += dir * GRAVITY;
+}
+
+task void UpdateVelocity(
+    uniform int32 particle_num,
+    soa<8> Particle all_particles[] )
+{
+    // particle_num �� SIMD lane �� (programCount) �̔{���łȂ��ꍇ�z���˂������ăA�N�Z�X���܂����A
+    // �K�؂Ƀ}�[�W����݂��Ă���Ƃ����O��̌��ɖ������܂��B
+    // (foreach(i=0 ... particle_num) ���Ɠ˂������Ȃ��悤�ɓK�؂ɑΏ����Ă���܂����A�኱�R�X�g�𔺂����߁A�����Ă������܂�)
+
+    uniform int32 beg = SPH_FLUID_BLOCK_SIZE * taskIndex;
+    uniform int32 end = min(SPH_FLUID_BLOCK_SIZE, particle_num-beg);
+    soa<8> Particle * uniform particles = &all_particles[beg];
+
+    for(uniform int32 i=0; i<end; ++i) {
+        uniform vec3 pos1 = {particles[i].posx, particles[i].posy, particles[i].posz};
+        uniform vec3 vel1 = {particles[i].velx, particles[i].vely, particles[i].velz};
+        vec3 accel = {0.0f, 0.0f, 0.0f};
+        for(uniform int32 ti=0; ti<particle_num; ti+=programCount) {
+            int t = ti+programIndex;
+            vec3 pos2 = {all_particles[t].posx, all_particles[t].posy, all_particles[t].posz};
+            ComputeParticleInteraction(pos1, pos2, accel);
+        }
+
+        vel1.x  += reduce_add(accel.x);
+        vel1.y  += reduce_add(accel.y);
+        vel1.z  += reduce_add(accel.z);
+        ComputeGravity(pos1, vel1);
+
+        particles[i].velx = vel1.x;
+        particles[i].vely = vel1.y;
+        particles[i].velz = vel1.z;
+    }
+}
+
+task void Integrate(
+    uniform int32 particle_num,
+    soa<8> Particle all_particles[] )
+{
+    uniform int32 beg = SPH_FLUID_BLOCK_SIZE * taskIndex;
+    uniform int32 end = min(SPH_FLUID_BLOCK_SIZE, particle_num-beg);
+    soa<8> Particle * uniform particles = &all_particles[beg];
+
+    for(uniform int pi=0; pi<end; pi+=programCount) {
+        int i = pi+programIndex;
+        vec3 pos = {particles[i].posx, particles[i].posy, particles[i].posz};
+        vec3 vel = {particles[i].velx, particles[i].vely, particles[i].velz};
+        pos += vel;
+        particles[i].posx = pos.x;
+        particles[i].posy = pos.y;
+        particles[i].posz = pos.z;
+    }
+}
+
+export void sphUpdate(
+    uniform int32 particle_num,
+    soa<8> Particle particles[] )
+{
+    const uniform int32 block_num = sphFluidBlockNum(particle_num);
+    launch[block_num] UpdateVelocity(particle_num, particles);
+    sync;
+    launch[block_num] Integrate(particle_num, particles);
+    sync;
+}
diff --git a/SPH_types.cpp b/SPH_types.cpp
@@ -0,0 +1,78 @@
+//#include "stdafx.h"
+#include "SPH_types.h"
+#include "SPH_types.h"
+
+void SoAnize( uint32 num, const sphParticle *particles, ispc::Particle_SOA8 *out )
+{
+    uint32 blocks = num/8 + (num%8!=0 ? 1 : 0);
+    for(uint32 bi=0; bi<blocks; ++bi) {
+        uint32 i = 8*bi;
+        ist::soavec34 soa;
+        soa = ist::soa_transpose34(particles[i+0].position, particles[i+1].position, particles[i+2].position, particles[i+3].position);
+        _mm_store_ps(out[bi].posx+0, soa.x());
+        _mm_store_ps(out[bi].posy+0, soa.y());
+        _mm_store_ps(out[bi].posz+0, soa.z());
+        soa = ist::soa_transpose34(particles[i+0].velocity, particles[i+1].velocity, particles[i+2].velocity, particles[i+3].velocity);
+        _mm_store_ps(out[bi].velx+0, soa.x());
+        _mm_store_ps(out[bi].vely+0, soa.y());
+        _mm_store_ps(out[bi].velz+0, soa.z());
+
+        soa = ist::soa_transpose34(particles[i+4].position, particles[i+5].position, particles[i+6].position, particles[i+7].position);
+        _mm_store_ps(out[bi].posx+4, soa.x());
+        _mm_store_ps(out[bi].posy+4, soa.y());
+        _mm_store_ps(out[bi].posz+4, soa.z());
+        soa = ist::soa_transpose34(particles[i+4].velocity, particles[i+5].velocity, particles[i+6].velocity, particles[i+7].velocity);
+        _mm_store_ps(out[bi].velx+4, soa.x());
+        _mm_store_ps(out[bi].vely+4, soa.y());
+        _mm_store_ps(out[bi].velz+4, soa.z());
+    }
+}
+
+void AoSnize( uint32 num, const ispc::Particle_SOA8 *particles, sphParticle *out )
+{
+    uint32 blocks = num/8 + (num%8!=0 ? 1 : 0);
+    for(uint32 bi=0; bi<blocks; ++bi) {
+        uint32 i = 8*bi;
+        ist::soavec44 aos_pos[2] = {
+            ist::soa_transpose44(
+                _mm_load_ps(particles[bi].posx + 0),
+                _mm_load_ps(particles[bi].posy + 0),
+                _mm_load_ps(particles[bi].posz + 0),
+                _mm_set1_ps(1.0f) ),
+            ist::soa_transpose44(
+                _mm_load_ps(particles[bi].posx + 4),
+                _mm_load_ps(particles[bi].posy + 4),
+                _mm_load_ps(particles[bi].posz + 4),
+                _mm_set1_ps(1.0f) ),
+        };
+        out[i+0].position = aos_pos[0][0];
+        out[i+1].position = aos_pos[0][1];
+        out[i+2].position = aos_pos[0][2];
+        out[i+3].position = aos_pos[0][3];
+        out[i+4].position = aos_pos[1][0];
+        out[i+5].position = aos_pos[1][1];
+        out[i+6].position = aos_pos[1][2];
+        out[i+7].position = aos_pos[1][3];
+
+        ist::soavec44 aos_vel[2] = {
+            ist::soa_transpose44(
+                _mm_load_ps(particles[bi].velx + 0),
+                _mm_load_ps(particles[bi].vely + 0),
+                _mm_load_ps(particles[bi].velz + 0),
+                _mm_set1_ps(0.0f) ),
+            ist::soa_transpose44(
+                _mm_load_ps(particles[bi].velx + 4),
+                _mm_load_ps(particles[bi].vely + 4),
+                _mm_load_ps(particles[bi].velz + 4),
+                _mm_set1_ps(0.0f) ),
+        };
+        out[i+0].velocity = aos_vel[0][0];
+        out[i+1].velocity = aos_vel[0][1];
+        out[i+2].velocity = aos_vel[0][2];
+        out[i+3].velocity = aos_vel[0][3];
+        out[i+4].velocity = aos_vel[1][0];
+        out[i+5].velocity = aos_vel[1][1];
+        out[i+6].velocity = aos_vel[1][2];
+        out[i+7].velocity = aos_vel[1][3];
+    }
+}
diff --git a/SPH_types.h b/SPH_types.h
@@ -0,0 +1,35 @@
+#ifndef _SPH_types_h_
+#define _SPH_types_h_
+
+#include "SPH_const.h"
+#include "SPH_core_ispc.h"
+#include "SoA.h"
+typedef int int32;
+typedef unsigned int uint32;
+
+using ist::simdvec4;
+using ist::simdvec8;
+using ist::soavec24;
+using ist::soavec34;
+using ist::soavec44;
+
+struct sphParticle
+{
+    simdvec4 position;
+    simdvec4 velocity;
+    union {
+        struct {
+            float energy;
+            float density;
+            uint32 hash;
+            uint32 dummy[1];
+        } params;
+        simdvec4 paramsv;
+    };
+};
+
+void SoAnize(uint32 num, const sphParticle *particles, ispc::Particle_SOA8 *out);
+void AoSnize(uint32 num, const ispc::Particle_SOA8 *particles, sphParticle *out);
+
+
+#endif // _SPH_types_h_