Skip to content

Commit

Permalink
sphere interaction
Browse files Browse the repository at this point in the history
  • Loading branch information
i-saint committed Apr 9, 2012
0 parents commit 337dbba
Show file tree
Hide file tree
Showing 6 changed files with 725 additions and 0 deletions.
7 changes: 7 additions & 0 deletions SPH_const.h
@@ -0,0 +1,7 @@
#ifndef _SPH_const_h_
#define _SPH_const_h_

#define SPH_PARTICLE_SIZE 0.02f
#define SPH_PARTICLE_STIFFNESS 0.1f

#endif // _SPH_const_h_
124 changes: 124 additions & 0 deletions SPH_core.ispc
@@ -0,0 +1,124 @@
// �R���p�C���́��݂����ȃJ�X�^���r���h�X�e�b�v�ŁB
// ispc %(FullPath) -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4,avx --arch=x86-64 --opt=fast-masked-vload --opt=fast-math
//
// Intel SPMD �͂�����
// http://ispc.github.com/


#include "ispc_vectormath.h"
#include "SPH_const.h"
typedef unsigned int32 uint32;

const uniform int32 SPH_FLUID_BLOCK_SIZE = 128;


struct Particle
{
float posx, posy, posz;
float velx, vely, velz;
};



static uniform int32 sphFluidBlockNum(uniform int32 n)
{
uniform int32 b = (n / SPH_FLUID_BLOCK_SIZE);
b += (n % SPH_FLUID_BLOCK_SIZE > 0 ? 1 : 0);
return b;
}


static inline void ComputeParticleInteraction(
vec3 pos1,
vec3 pos2,
vec3 &accel )
{
vec3 diff = pos2 - pos1;
float d = length3(diff);
if(d > 0.0f) { // d==0: �Փˌ����Փˑ��肪����
vec3 dir = diff / d;
accel += dir * (-min(0.0f, d-(SPH_PARTICLE_SIZE*2.0f)) * SPH_PARTICLE_STIFFNESS);
}
}

static inline void ComputeGravity(
uniform vec3 pos1,
uniform vec3 &vel1 )
{
// x,y,z=0,0,0 �Ŕ��a CENTER_SPHERE_RADIUS �̋����������Ƃ��ďd�͂ň������違�Փ˔���

const uniform float CENTER_SPHERE_RADIUS = 1.5f;
const uniform float GRAVITY = -0.002f;

// ���S�̋��Ƃ̏Փ˔���
uniform vec3 diff = pos1;
uniform float d = length3(diff);
uniform vec3 dir = diff / d;
vel1 += dir * (-min(0.0f, d-CENTER_SPHERE_RADIUS) * SPH_PARTICLE_STIFFNESS);

// �d�͉���
vel1 += dir * GRAVITY;
}

task void UpdateVelocity(
uniform int32 particle_num,
soa<8> Particle all_particles[] )
{
// particle_num �� SIMD lane �� (programCount) �̔{���łȂ��ꍇ�z���˂������ăA�N�Z�X���܂����A
// �K�؂Ƀ}�[�W����݂��Ă���Ƃ����O��̌��ɖ������܂��B
// (foreach(i=0 ... particle_num) ���Ɠ˂������Ȃ��悤�ɓK�؂ɑΏ����Ă���܂����A�኱�R�X�g�𔺂����߁A�����Ă������܂�)

uniform int32 beg = SPH_FLUID_BLOCK_SIZE * taskIndex;
uniform int32 end = min(SPH_FLUID_BLOCK_SIZE, particle_num-beg);
soa<8> Particle * uniform particles = &all_particles[beg];

for(uniform int32 i=0; i<end; ++i) {
uniform vec3 pos1 = {particles[i].posx, particles[i].posy, particles[i].posz};
uniform vec3 vel1 = {particles[i].velx, particles[i].vely, particles[i].velz};
vec3 accel = {0.0f, 0.0f, 0.0f};
for(uniform int32 ti=0; ti<particle_num; ti+=programCount) {
int t = ti+programIndex;
vec3 pos2 = {all_particles[t].posx, all_particles[t].posy, all_particles[t].posz};
ComputeParticleInteraction(pos1, pos2, accel);
}

vel1.x += reduce_add(accel.x);
vel1.y += reduce_add(accel.y);
vel1.z += reduce_add(accel.z);
ComputeGravity(pos1, vel1);

particles[i].velx = vel1.x;
particles[i].vely = vel1.y;
particles[i].velz = vel1.z;
}
}

task void Integrate(
uniform int32 particle_num,
soa<8> Particle all_particles[] )
{
uniform int32 beg = SPH_FLUID_BLOCK_SIZE * taskIndex;
uniform int32 end = min(SPH_FLUID_BLOCK_SIZE, particle_num-beg);
soa<8> Particle * uniform particles = &all_particles[beg];

for(uniform int pi=0; pi<end; pi+=programCount) {
int i = pi+programIndex;
vec3 pos = {particles[i].posx, particles[i].posy, particles[i].posz};
vec3 vel = {particles[i].velx, particles[i].vely, particles[i].velz};
pos += vel;
particles[i].posx = pos.x;
particles[i].posy = pos.y;
particles[i].posz = pos.z;
}
}

export void sphUpdate(
uniform int32 particle_num,
soa<8> Particle particles[] )
{
const uniform int32 block_num = sphFluidBlockNum(particle_num);
launch[block_num] UpdateVelocity(particle_num, particles);
sync;
launch[block_num] Integrate(particle_num, particles);
sync;
}
78 changes: 78 additions & 0 deletions SPH_types.cpp
@@ -0,0 +1,78 @@
//#include "stdafx.h"
#include "SPH_types.h"
#include "SPH_types.h"

void SoAnize( uint32 num, const sphParticle *particles, ispc::Particle_SOA8 *out )
{
uint32 blocks = num/8 + (num%8!=0 ? 1 : 0);
for(uint32 bi=0; bi<blocks; ++bi) {
uint32 i = 8*bi;
ist::soavec34 soa;
soa = ist::soa_transpose34(particles[i+0].position, particles[i+1].position, particles[i+2].position, particles[i+3].position);
_mm_store_ps(out[bi].posx+0, soa.x());
_mm_store_ps(out[bi].posy+0, soa.y());
_mm_store_ps(out[bi].posz+0, soa.z());
soa = ist::soa_transpose34(particles[i+0].velocity, particles[i+1].velocity, particles[i+2].velocity, particles[i+3].velocity);
_mm_store_ps(out[bi].velx+0, soa.x());
_mm_store_ps(out[bi].vely+0, soa.y());
_mm_store_ps(out[bi].velz+0, soa.z());

soa = ist::soa_transpose34(particles[i+4].position, particles[i+5].position, particles[i+6].position, particles[i+7].position);
_mm_store_ps(out[bi].posx+4, soa.x());
_mm_store_ps(out[bi].posy+4, soa.y());
_mm_store_ps(out[bi].posz+4, soa.z());
soa = ist::soa_transpose34(particles[i+4].velocity, particles[i+5].velocity, particles[i+6].velocity, particles[i+7].velocity);
_mm_store_ps(out[bi].velx+4, soa.x());
_mm_store_ps(out[bi].vely+4, soa.y());
_mm_store_ps(out[bi].velz+4, soa.z());
}
}

void AoSnize( uint32 num, const ispc::Particle_SOA8 *particles, sphParticle *out )
{
uint32 blocks = num/8 + (num%8!=0 ? 1 : 0);
for(uint32 bi=0; bi<blocks; ++bi) {
uint32 i = 8*bi;
ist::soavec44 aos_pos[2] = {
ist::soa_transpose44(
_mm_load_ps(particles[bi].posx + 0),
_mm_load_ps(particles[bi].posy + 0),
_mm_load_ps(particles[bi].posz + 0),
_mm_set1_ps(1.0f) ),
ist::soa_transpose44(
_mm_load_ps(particles[bi].posx + 4),
_mm_load_ps(particles[bi].posy + 4),
_mm_load_ps(particles[bi].posz + 4),
_mm_set1_ps(1.0f) ),
};
out[i+0].position = aos_pos[0][0];
out[i+1].position = aos_pos[0][1];
out[i+2].position = aos_pos[0][2];
out[i+3].position = aos_pos[0][3];
out[i+4].position = aos_pos[1][0];
out[i+5].position = aos_pos[1][1];
out[i+6].position = aos_pos[1][2];
out[i+7].position = aos_pos[1][3];

ist::soavec44 aos_vel[2] = {
ist::soa_transpose44(
_mm_load_ps(particles[bi].velx + 0),
_mm_load_ps(particles[bi].vely + 0),
_mm_load_ps(particles[bi].velz + 0),
_mm_set1_ps(0.0f) ),
ist::soa_transpose44(
_mm_load_ps(particles[bi].velx + 4),
_mm_load_ps(particles[bi].vely + 4),
_mm_load_ps(particles[bi].velz + 4),
_mm_set1_ps(0.0f) ),
};
out[i+0].velocity = aos_vel[0][0];
out[i+1].velocity = aos_vel[0][1];
out[i+2].velocity = aos_vel[0][2];
out[i+3].velocity = aos_vel[0][3];
out[i+4].velocity = aos_vel[1][0];
out[i+5].velocity = aos_vel[1][1];
out[i+6].velocity = aos_vel[1][2];
out[i+7].velocity = aos_vel[1][3];
}
}
35 changes: 35 additions & 0 deletions SPH_types.h
@@ -0,0 +1,35 @@
#ifndef _SPH_types_h_
#define _SPH_types_h_

#include "SPH_const.h"
#include "SPH_core_ispc.h"
#include "SoA.h"
typedef int int32;
typedef unsigned int uint32;

using ist::simdvec4;
using ist::simdvec8;
using ist::soavec24;
using ist::soavec34;
using ist::soavec44;

struct sphParticle
{
simdvec4 position;
simdvec4 velocity;
union {
struct {
float energy;
float density;
uint32 hash;
uint32 dummy[1];
} params;
simdvec4 paramsv;
};
};

void SoAnize(uint32 num, const sphParticle *particles, ispc::Particle_SOA8 *out);
void AoSnize(uint32 num, const ispc::Particle_SOA8 *particles, sphParticle *out);


#endif // _SPH_types_h_

0 comments on commit 337dbba

Please sign in to comment.