opencl/broadphase_benchmark/broadphaseKernel.h

//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
static const char* broadphaseKernelCL= \
"\n"
"//keep this enum in sync with the CPU version (in AdlCollisionShape.h)\n"
"#define SHAPE_CONVEX_HULL 3\n"
"\n"
"typedef float4 Quaternion;\n"
"\n"
"__inline\n"
"float4 cross3(float4 a, float4 b)\n"
"{\n"
"	return cross(a,b);\n"
"}\n"
"\n"
"__inline\n"
"float dot3F4(float4 a, float4 b)\n"
"{\n"
"	float4 a1 = (float4)(a.xyz,0.f);\n"
"	float4 b1 = (float4)(b.xyz,0.f);\n"
"	return dot(a1, b1);\n"
"}\n"
"\n"
"\n"
"__inline\n"
"Quaternion qtMul(Quaternion a, Quaternion b)\n"
"{\n"
"	Quaternion ans;\n"
"	ans = cross3( a, b );\n"
"	ans += a.w*b+b.w*a;\n"
"	ans.w = a.w*b.w - dot3F4(a, b);\n"
"	return ans;\n"
"}\n"
"\n"
"__inline\n"
"Quaternion qtInvert(Quaternion q)\n"
"{\n"
"	return (Quaternion)(-q.xyz, q.w);\n"
"}\n"
"\n"
"__inline\n"
"float4 qtRotate(Quaternion q, float4 vec)\n"
"{\n"
"	Quaternion qInv = qtInvert( q );\n"
"	float4 vcpy = vec;\n"
"	vcpy.w = 0.f;\n"
"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n"
"	return out;\n"
"}\n"
"\n"
"__inline\n"
"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n"
"{\n"
"	return qtRotate( *orientation, *p ) + (*translation);\n"
"}\n"
"\n"
"typedef struct\n"
"{\n"
"	float4	m_row[3];\n"
"} Matrix3x3;\n"
"\n"
"typedef unsigned int u32;\n"
"\n"
"\n"
"typedef struct\n"
"{\n"
"	float4 m_pos;\n"
"	float4 m_quat;\n"
"	float4 m_linVel;\n"
"	float4 m_angVel;\n"
"\n"
"	u32 m_collidableIdx;\n"
"	float m_invMass;\n"
"	float m_restituitionCoeff;\n"
"	float m_frictionCoeff;\n"
"} Body;\n"
"\n"
"typedef struct Collidable\n"
"{\n"
"	int m_shapeType;\n"
"	int m_shapeIndex;\n"
"} Collidable;\n"
"\n"
"\n"
"typedef struct\n"
"{\n"
"	Matrix3x3 m_invInertia;\n"
"	Matrix3x3 m_initInvInertia;\n"
"} Shape;\n"
"\n"
"\n"
"__inline\n"
"Matrix3x3 qtGetRotationMatrix(float4 quat)\n"
"{\n"
"	float4 quat2 = (float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n"
"	Matrix3x3 out;\n"
"\n"
"	out.m_row[0].x=fabs(1-2*quat2.y-2*quat2.z);\n"
"	out.m_row[0].y=fabs(2*quat.x*quat.y-2*quat.w*quat.z);\n"
"	out.m_row[0].z=fabs(2*quat.x*quat.z+2*quat.w*quat.y);\n"
"	out.m_row[0].w = 0.f;\n"
"\n"
"	out.m_row[1].x=fabs(2*quat.x*quat.y+2*quat.w*quat.z);\n"
"	out.m_row[1].y=fabs(1-2*quat2.x-2*quat2.z);\n"
"	out.m_row[1].z=fabs(2*quat.y*quat.z-2*quat.w*quat.x);\n"
"	out.m_row[1].w = 0.f;\n"
"\n"
"	out.m_row[2].x=fabs(2*quat.x*quat.z-2*quat.w*quat.y);\n"
"	out.m_row[2].y=fabs(2*quat.y*quat.z+2*quat.w*quat.x);\n"
"	out.m_row[2].z=fabs(1-2*quat2.x-2*quat2.y);\n"
"	out.m_row[2].w = 0.f;\n"
"\n"
"	return out;\n"
"}\n"
"\n"
"\n"
"typedef struct \n"
"{\n"
"	float			fx;\n"
"	float			fy;\n"
"	float			fz;\n"
"	int	uw;\n"
"} btAABBCL;\n"
"\n"
"__inline\n"
"Matrix3x3 mtTranspose(Matrix3x3 m)\n"
"{\n"
"	Matrix3x3 out;\n"
"	out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n"
"	out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n"
"	out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n"
"	return out;\n"
"}\n"
"\n"
"\n"
"\n"
"__inline\n"
"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n"
"{\n"
"	Matrix3x3 transB;\n"
"	transB = mtTranspose( b );\n"
"	Matrix3x3 ans;\n"
"	//	why this doesn't run when 0ing in the for{}\n"
"	a.m_row[0].w = 0.f;\n"
"	a.m_row[1].w = 0.f;\n"
"	a.m_row[2].w = 0.f;\n"
"	for(int i=0; i<3; i++)\n"
"	{\n"
"//	a.m_row[i].w = 0.f;\n"
"		ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n"
"		ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n"
"		ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n"
"		ans.m_row[i].w = 0.f;\n"
"	}\n"
"	return ans;\n"
"}\n"
"\n"
"\n"
"//apply gravity\n"
"//update world inverse inertia tensor\n"
"//copy velocity from arrays to bodies\n"
"//copy transforms from buffer to bodies\n"
"\n"
"__kernel void \n"
"  setupBodiesKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,\n"
"		   __global float4 *linVel,\n"
"		   __global float4 *pAngVel,\n"
"		   __global Body* gBodies, __global Shape* bodyInertias\n"
"		   )\n"
"{\n"
"	int nodeID = get_global_id(0);\n"
"		\n"
"	float timeStep = 0.0166666f;\n"
"	float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254);\n"
"\n"
"	if( nodeID < numNodes )\n"
"	{\n"
"		float inverseMass = gBodies[nodeID].m_invMass;\n"
"		if (inverseMass != 0.f)\n"
"		{\n"
"			float4 position = g_vertexBuffer[nodeID + startOffset/4];\n"
"			float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];\n"
"\n"
"			float4 gravityAcceleration = (float4)(0.f,-9.8f,0.f,0.f);\n"
"			linVel[nodeID] += gravityAcceleration * timeStep;\n"
"		\n"
"			gBodies[nodeID].m_pos = position;\n"
"			gBodies[nodeID].m_quat = orientation;\n"
"\n"
"			gBodies[nodeID].m_linVel = (float4)(linVel[nodeID].xyz,0.f);\n"
"			gBodies[nodeID].m_angVel = (float4)(pAngVel[nodeID].xyz,0.f);\n"
"\n"
"			Matrix3x3 m = qtGetRotationMatrix( orientation);\n"
"			Matrix3x3 mT = mtTranspose( m );\n"
"            Matrix3x3 localInvInertia = bodyInertias[nodeID].m_initInvInertia;\n"
"\n"
"			Matrix3x3 tmp = mtMul(m, localInvInertia);\n"
"\n"
"			Matrix3x3 tmp2 = mtMul(tmp, mT);\n"
"			bodyInertias[nodeID].m_invInertia = tmp2;\n"
"\n"
"			//shapeInfo.m_invInertia = mtMul( mtMul( m, shapeInfo.m_initInvInertia ), mT );\n"
"\n"
"		} else\n"
"		{\n"
"			gBodies[nodeID].m_linVel = (float4)(0.f,0.f,0.f,0.f);\n"
"			gBodies[nodeID].m_angVel = (float4)(0.f,0.f,0.f,0.f);\n"
"		}\n"
"\n"
"\n"
"	}\n"
"}\n"
"\n"
"\n"
"__kernel void \n"
"  copyVelocitiesKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer,\n"
"		   __global float4 *linVel,\n"
"		   __global float4 *pAngVel,\n"
"		   __global Body* gBodies, __global Shape* bodyInertias\n"
"		   )\n"
"{\n"
"	int nodeID = get_global_id(0);\n"
"	\n"
"	if( nodeID < numNodes )\n"
"	{\n"
"		float inverseMass = gBodies[nodeID].m_invMass;\n"
"		if (inverseMass != 0.f)\n"
"		{\n"
"			linVel[nodeID] = (float4)(gBodies[nodeID].m_linVel.xyz,0.f);\n"
"			pAngVel[nodeID] = (float4)(gBodies[nodeID].m_angVel.xyz,0.f);\n"
"		} else\n"
"		{\n"
"			linVel[nodeID] = (float4)(0,0,0,0);\n"
"			pAngVel[nodeID] = (float4)(0,0,0,0);\n"
"		}\n"
"	}\n"
"}\n"
"\n"
"\n"
"\n"
"__kernel void \n"
"  initializeGpuAabbsSimple( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global btAABBCL* pAABB)\n"
"{\n"
"	int nodeID = get_global_id(0);\n"
"		\n"
"	if( nodeID < numNodes )\n"
"	{\n"
"		float4 position = g_vertexBuffer[nodeID + startOffset/4];\n"
"		float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];\n"
"		float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];\n"
"		\n"
"		float4 green = (float4)(.4f,1.f,.4f,1.f);\n"
"		g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;\n"
"		\n"
"\n"
"		float4 halfExtents = (float4)(1.01f,1.01f,1.01f,0.f);\n"
"		//float4 extent=(float4)(1.f,1.f,1.f,0.f);\n"
"\n"
"		Matrix3x3 abs_b = qtGetRotationMatrix(orientation);\n"
"\n"
"		float4 extent = (float4) (\n"
"			dot(abs_b.m_row[0],halfExtents),\n"
"			dot(abs_b.m_row[1],halfExtents),\n"
"			dot(abs_b.m_row[2],halfExtents),\n"
"			0.f);\n"
"		\n"
"\n"
"		pAABB[nodeID*2].fx = position.x-extent.x;\n"
"		pAABB[nodeID*2].fy = position.y-extent.y;\n"
"		pAABB[nodeID*2].fz = position.z-extent.z;\n"
"		pAABB[nodeID*2].uw = nodeID;\n"
"\n"
"		pAABB[nodeID*2+1].fx = position.x+extent.x;\n"
"		pAABB[nodeID*2+1].fy = position.y+extent.y;\n"
"		pAABB[nodeID*2+1].fz = position.z+extent.z;\n"
"		pAABB[nodeID*2+1].uw = nodeID;		\n"
"	}\n"
"}\n"
"\n"
"\n"
"\n"
"__kernel void \n"
"  initializeGpuAabbsFull( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global Body* gBodies,__global Collidable* collidables, __global btAABBCL* plocalShapeAABB, __global btAABBCL* pAABB)\n"
"{\n"
"	int nodeID = get_global_id(0);\n"
"		\n"
"	if( nodeID < numNodes )\n"
"	{\n"
"		float4 position = g_vertexBuffer[nodeID + startOffset/4];\n"
"		float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];\n"
"		float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];\n"
"		\n"
"		float4 green = (float4)(.4f,1.f,.4f,1.f);\n"
"		g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;\n"
"		\n"
"	\n"
"		\n"
"	//	int shapeType = gBodies[nodeID].m_shapeType;\n"
"		//if (shapeType==SHAPE_CONVEX_HULL)\n"
"		{\n"
"		\n"
"			\n"
"			int collidableIndex = gBodies[nodeID].m_collidableIdx;\n"
"			int shapeIndex = collidables[collidableIndex].m_shapeIndex;\n"
"			\n"
"			if (shapeIndex>=0)\n"
"			{\n"
"				btAABBCL minAabb = plocalShapeAABB[shapeIndex*2];\n"
"				btAABBCL maxAabb = plocalShapeAABB[shapeIndex*2+1];\n"
"				\n"
"				float4 halfExtents = ((float4)(maxAabb.fx - minAabb.fx,maxAabb.fy - minAabb.fy,maxAabb.fz - minAabb.fz,0.f))*0.5f;\n"
"				float4 localCenter = ((float4)(maxAabb.fx + minAabb.fx,maxAabb.fy + minAabb.fy,maxAabb.fz + minAabb.fz,0.f))*0.5f;\n"
"				\n"
"				float4 worldCenter = transform(&localCenter,&position,&orientation);\n"
"				\n"
"				Matrix3x3 abs_b = qtGetRotationMatrix(orientation);\n"
"				float4 extent = (float4) (	dot(abs_b.m_row[0],halfExtents),dot(abs_b.m_row[1],halfExtents),dot(abs_b.m_row[2],halfExtents),0.f);\n"
"			\n"
"	\n"
"				pAABB[nodeID*2].fx = worldCenter.x-extent.x;\n"
"				pAABB[nodeID*2].fy = worldCenter.y-extent.y;\n"
"				pAABB[nodeID*2].fz = worldCenter.z-extent.z;\n"
"				pAABB[nodeID*2].uw = nodeID;\n"
"	\n"
"				pAABB[nodeID*2+1].fx = worldCenter.x+extent.x;\n"
"				pAABB[nodeID*2+1].fy = worldCenter.y+extent.y;\n"
"				pAABB[nodeID*2+1].fz = worldCenter.z+extent.z;\n"
"				pAABB[nodeID*2+1].uw = gBodies[nodeID].m_invMass==0.f? 0 : 1;\n"
"			}\n"
"		} \n"
"	}\n"
"}\n"
"\n"
"\n"
"__kernel void \n"
"  broadphaseColorKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer, __global int2* pOverlappingPairs, const int numOverlap)\n"
"{\n"
"	int nodeID = get_global_id(0);\n"
"	if( nodeID < numOverlap )\n"
"	{\n"
"		int2 pair = pOverlappingPairs[nodeID];\n"
"		float4 red = (float4)(1.f,0.4f,0.4f,1.f);\n"
"		\n"
"		g_vertexBuffer[pair.x + startOffset/4+numNodes+numNodes] = red;\n"
"		g_vertexBuffer[pair.y + startOffset/4+numNodes+numNodes] = red;\n"
"	}\n"
"}\n"
"\n"
"\n"
"\n"
"__kernel void \n"
"  broadphaseKernel( const int startOffset, const int numNodes, __global float4 *g_vertexBuffer)\n"
"{\n"
"	int nodeID = get_global_id(0);\n"
"	\n"
"//	float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254);\n"
"	\n"
"	if( nodeID < numNodes )\n"
"	{\n"
"		float4 position = g_vertexBuffer[nodeID + startOffset/4];\n"
"		//float4 orientation = g_vertexBuffer[nodeID + startOffset/4+numNodes];\n"
"		float4 color = g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes];\n"
"		\n"
"		float4 red = (float4)(1.f,0.f,0.f,0.f);\n"
"		float4 green = (float4)(0.f,1.f,0.f,0.f);\n"
"		float4 blue = (float4)(0.f,0.f,1.f,0.f);\n"
"		float  overlap=0;\n"
"		int equal = 0;\n"
"		\n"
"		g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes] = green;\n"
"		\n"
"		for (int i=0;i<numNodes;i++)\n"
"		{\n"
"			if (i!=nodeID)\n"
"			{\n"
"				float4 otherPosition = g_vertexBuffer[i + startOffset/4];\n"
"				if ((otherPosition.x == position.x)&&\n"
"					(otherPosition.y == position.y)&&\n"
"					(otherPosition.z == position.z))\n"
"						equal=1;\n"
"				\n"
"				\n"
"				float distsqr = \n"
"						((otherPosition.x - position.x)* (otherPosition.x - position.x))+\n"
"						((otherPosition.y - position.y)* (otherPosition.y - position.y))+\n"
"						((otherPosition.z - position.z)* (otherPosition.z - position.z));\n"
"				\n"
"				if (distsqr<7.f)\n"
"					overlap+=0.25f;\n"
"			}\n"
"		}\n"
"		\n"
"		\n"
"		if (equal)\n"
"		{\n"
"				g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=blue;\n"
"		} else\n"
"		{\n"
"			if (overlap>0.f)\n"
"				g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=red*overlap;\n"
"			else\n"
"				g_vertexBuffer[nodeID + startOffset/4+numNodes+numNodes]=green;\n"
"		}\n"
"	}\n"
"}\n"
;