This repository has been archived by the owner on Jan 15, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
shapematching.cuh
99 lines (85 loc) · 2.9 KB
/
shapematching.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include "kernel/cubrasstower.cuh"
// Meshless Deformations Based on Shape Matching
// by Muller et al.
// one block per one shape
__global__ void
shapeMatchingAlphaOne(quaternion * __restrict__ rotations,
float3 * __restrict__ CMs,
float3 * __restrict__ positions,
const int * __restrict__ newIds,
const float3 * __restrict__ initialPositions,
const int2 * __restrict__ rigidBodyParticleIdRange)
{
// typename stuffs
typedef cub::BlockReduce<float3, NUM_MAX_PARTICLE_PER_RIGID_BODY> BlockReduceFloat3;
__shared__ typename BlockReduceFloat3::TempStorage TempStorageFloat3;
// init rigidBodyId, particleRange and numParticles
int rigidBodyId = blockIdx.x;
int2 particleRange = rigidBodyParticleIdRange[rigidBodyId];
int numParticles = particleRange.y - particleRange.x;
int id = particleRange.x + threadIdx.x;
int particleId = newIds[id];
__shared__ float3 CM;
__shared__ matrix3 extractedR;
// find center of mass using block reduce
float3 position = make_float3(0.f);
if (threadIdx.x < numParticles) { position = positions[particleId]; }
float3 sumCM = BlockReduceFloat3(TempStorageFloat3).Sum(position);
if (threadIdx.x == 0)
{
CM = sumCM / (float)numParticles;
CMs[rigidBodyId] = CM;
}
__syncthreads();
float3 qi;
float3 AiCol0 = make_float3(0.f);
float3 AiCol1 = make_float3(0.f);
float3 AiCol2 = make_float3(0.f);
if (threadIdx.x < numParticles)
{
// compute matrix Apq
float3 initialPosition = initialPositions[id];
float3 pi = position - CM;
qi = initialPosition;// do not needed to subtract from initialCM since initialCM = float3(0);
// Matrix Ai refers to p * q
AiCol0 = pi * qi.x;
AiCol1 = pi * qi.y;
AiCol2 = pi * qi.z;
}
// Matrix A refers to Apq
float3 ACol0 = BlockReduceFloat3(TempStorageFloat3).Sum(AiCol0);
float3 ACol1 = BlockReduceFloat3(TempStorageFloat3).Sum(AiCol1);
float3 ACol2 = BlockReduceFloat3(TempStorageFloat3).Sum(AiCol2);
if (threadIdx.x == 0)
{
// extract rotation matrix using method
// using A Robust Method to Extract the Rotational Part of Deformations
// by Muller et al.
quaternion q = rotations[rigidBodyId];
matrix3 R = extract_rotation_matrix3(q);
for (int i = 0; i < 20; i++)
{
matrix3 R = extract_rotation_matrix3(q);
float3 omegaNumerator = (cross(R.col[0], ACol0) +
cross(R.col[1], ACol1) +
cross(R.col[2], ACol2));
float omegaDenominator = 1.0f / fabs(dot(R.col[0], ACol0) +
dot(R.col[1], ACol1) +
dot(R.col[2], ACol2)) + 1e-9f;
float3 omega = omegaNumerator * omegaDenominator;
float w2 = length2(omega);
if (w2 <= 1e-9f) { break; }
float w = sqrtf(w2);
q = mul(angleAxis(omega / w, w), q);
q = normalize(q);
}
extractedR = extract_rotation_matrix3(q);
rotations[rigidBodyId] = q;
}
__syncthreads();
if (threadIdx.x < numParticles)
{
float3 newPosition = extractedR * qi + CM;
positions[particleId] = newPosition;
}
}