Permalink
Cannot retrieve contributors at this time
//////////////////////////////////////////////////////////////////////////////////// | |
// Flags | |
//////////////////////////////////////////////////////////////////////////////////// | |
#define PROFILE 1 | |
#define PRINT_SIMULATED_FRAMES 0 | |
#define PRINT_SCORE_UPDATES 1 | |
#define PRINT_BEST_PARAMETERS 0 | |
#define PRINT_BEST_PARAMETERS_PERIODICALLY 0 | |
#define PRINT_TRIALS 0 | |
#define ENABLE_RESTART 1 | |
#define ENABLE_SHAKE 1 | |
//////////////////////////////////////////////////////////////////////////////////// | |
// Constant values | |
//////////////////////////////////////////////////////////////////////////////////// | |
float MAX_RUNNING_TIME = 9.6; | |
const int MAX_N = 512; | |
const float INF = 1e100; | |
const float G = 9.8; | |
const float E = 0.0; | |
const float DECAY_PER_FRAME = 0.002; | |
const float TIME_PER_FRAME = 0.0005; | |
const float BOUNCE_MARGIN = 0.0001; | |
const float ANTI_PENETRATION_COEFFICIENT = 0.8; | |
const float ANTI_PENETRATION_MILD_COEFFICIENT = 0.03; | |
const float ANTI_PENETRATION_MILD_FRAMES = 300; | |
const int FRAMES_PER_PERIOD = 2500; | |
const int MIN_PERIODS = 2; | |
const int MAX_PERIODS = 11; | |
const float RESTART_THRESHOLD_VS_PERIOD = 1.001; | |
const float RESTART_THRESHOLD_VS_ITERATION = 1.01; | |
const int MAX_RETRY_COUNT = 2; | |
float SHAKE_ACCEL = 2500; | |
//////////////////////////////////////////////////////////////////////////////////// | |
// Headers | |
//////////////////////////////////////////////////////////////////////////////////// | |
#include <iostream> | |
#include <vector> | |
#include <ctime> | |
#include <cmath> | |
#include <sys/time.h> | |
#include <xmmintrin.h> | |
using namespace std; | |
//////////////////////////////////////////////////////////////////////////////////// | |
// Auxiliary classes | |
//////////////////////////////////////////////////////////////////////////////////// | |
#if PROFILE | |
class Profiler { | |
public: | |
int last; | |
long long acc[100]; | |
Profiler() { | |
memset(acc, 0, sizeof(acc)); | |
} | |
void start() { | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
last = tv.tv_usec; | |
} | |
void end(int id) { | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
int elapsed = (tv.tv_usec - last + 1000000) % 1000000; | |
acc[id] += elapsed; | |
} | |
void report() { | |
for (int i = 0; i < 100; i++) if (acc[i] > 0) { | |
cerr << i << ": " << acc[i] / 1000.0 << "(ms), "; | |
} | |
cerr << endl; | |
memset(acc, 0, sizeof(acc)); | |
} | |
}; | |
Profiler prof; | |
#define PROF_START() prof.start(); | |
#define PROF_END(id) prof.end(id); | |
#define PROF_REPORT() prof.report(); | |
#else | |
#define PROF_START() | |
#define PROF_END(id) | |
#define PROF_REPORT() | |
#endif | |
struct Circle | |
{ | |
int index; | |
float m; | |
}; | |
struct Candidate { | |
int a, b; | |
Candidate *prev, *next; | |
}; | |
struct InputStats { | |
float total_area; | |
float max_r; | |
InputStats(): total_area(0.0), max_r(0.0) {} | |
}; | |
struct Parameter { | |
float initial_fill_limit; | |
int first_shake_dir; | |
}; | |
struct Trial { | |
Parameter param; | |
float score; | |
}; | |
bool lessScore(const Trial& lhs, const Trial& rhs) { | |
return lhs.score < rhs.score; | |
} | |
//////////////////////////////////////////////////////////////////////////////////// | |
// Global variables | |
//////////////////////////////////////////////////////////////////////////////////// | |
int N; | |
Circle c[MAX_N]; | |
float best_cost; | |
double best_result[MAX_N*2]; | |
double input_r[MAX_N]; | |
double input_m[MAX_N]; | |
float ball_x[MAX_N]__attribute__((aligned(16))); | |
float ball_y[MAX_N]__attribute__((aligned(16))); | |
float ball_r[MAX_N]__attribute__((aligned(16))); | |
float ball_m[MAX_N]__attribute__((aligned(16))); | |
float ball_vx[MAX_N]__attribute__((aligned(16))); | |
float ball_vy[MAX_N]__attribute__((aligned(16))); | |
float ball_ox[MAX_N]__attribute__((aligned(16))); | |
float ball_oy[MAX_N]__attribute__((aligned(16))); | |
int ball_index[MAX_N]__attribute__((aligned(16))); | |
float ball_gravity[MAX_N]__attribute__((aligned(16))); | |
float ball_inv_m[MAX_N]__attribute__((aligned(16))); | |
float best_x[MAX_N]__attribute__((aligned(16))); | |
float best_y[MAX_N]__attribute__((aligned(16))); | |
float simd_res[4]__attribute__((aligned(16))); | |
unsigned int bounds_x[MAX_N*2]; | |
unsigned int bounds_y[MAX_N*2]; | |
unsigned int BOUNDS_ISLEFT_BIT = (1<<16); | |
unsigned int BOUNDS_NODEID_MASK = (1<<16) - 1; | |
int collisions[MAX_N * 8]; | |
int num_collisions; | |
int overlapping_count[MAX_N][MAX_N]; | |
Candidate candidates[MAX_N][MAX_N]; | |
Candidate* candidates_list_head; | |
float m_mul_div_sum[MAX_N][MAX_N]; | |
float period_best[MAX_N]; | |
const int NUM_TRIALS = 8; | |
Trial trials[NUM_TRIALS]; | |
Trial trials_buf[NUM_TRIALS]; | |
float memo_ball_x[MAX_N]; | |
float memo_ball_y[MAX_N]; | |
float memo_ball_vx[MAX_N]; | |
float memo_ball_vy[MAX_N]; | |
float memo_bounds_x[MAX_N*2]; | |
float memo_bounds_y[MAX_N*2]; | |
int memo_overlapping_count[MAX_N][MAX_N]; | |
Candidate memo_candidates[MAX_N][MAX_N]; | |
Candidate* memo_andidates_list_head; | |
//////////////////////////////////////////////////////////////////////////////////// | |
// Auxiilary functions | |
//////////////////////////////////////////////////////////////////////////////////// | |
static inline unsigned int randxor() { | |
static unsigned int x=123456789,y=362436069,z=521288629,w=88675123; | |
unsigned int t; | |
t=(x^(x<<11));x=y;y=z;z=w; return( w=(w^(w>>19))^(t^(t>>8)) ); | |
} | |
static inline int myrandom (int i) { | |
return randxor() % i; | |
} | |
static inline bool greaterMass(const Circle& lhs, const Circle& rhs) { | |
return lhs.m > rhs.m; | |
} | |
static inline float dot(float* v1, float* v2) { | |
return v1[0] * v2[0] + v1[1] * v2[1]; | |
} | |
static inline void normalize(float* v) { | |
float len = (float)sqrt(v[0] * v[0] + v[1] * v[1]); | |
if (len != 0.0f) { | |
v[0] /= len; | |
v[1] /= len; | |
} | |
} | |
static inline float overlapping_area(float x1, float y1, float r1, float x2, float y2, float r2) { | |
float dx = x2 - x1; | |
float dy = y2 - y1; | |
float rsum = r1 + r2; | |
float dist_squared = dx*dx + dy*dy; | |
if (dist_squared >= rsum*rsum) { | |
return 0; // no overlap | |
} | |
float a = r2, b = r1, c = sqrt(dist_squared); | |
if (c + min(r1, r2) <= max(r1, r2)) { | |
return (float)M_PI * min(r1, r2) * min(r1, r2); // contained | |
} | |
float cosA = (b*b + c*c - a*a) / (2.0f * b * c); | |
float cosB = (a*a + c*c - b*b) / (2.0f * a * c); | |
float fanA = r1 * r1 * acos(cosA); | |
float fanB = r2 * r2 * acos(cosB); | |
float triangle = c * b * (float)sqrt(1.0 - cosA*cosA); | |
return fanA + fanB - triangle; | |
} | |
bool lessBoundX(int i, int j) { | |
int a = (i & BOUNDS_NODEID_MASK); | |
int b = (j & BOUNDS_NODEID_MASK); | |
float x0 = ball_x[a] + (ball_r[a] + BOUNCE_MARGIN) * ((i & BOUNDS_ISLEFT_BIT) ? -1 : 1); | |
float x1 = ball_x[b] + (ball_r[b] + BOUNCE_MARGIN) * ((j & BOUNDS_ISLEFT_BIT) ? -1 : 1); | |
return x0 < x1; | |
} | |
bool lessBoundY(int i, int j) { | |
int a = (i & BOUNDS_NODEID_MASK); | |
int b = (j & BOUNDS_NODEID_MASK); | |
float y0 = ball_y[a] + (ball_r[a] + BOUNCE_MARGIN) * ((i & BOUNDS_ISLEFT_BIT) ? -1 : 1); | |
float y1 = ball_y[b] + (ball_r[b] + BOUNCE_MARGIN) * ((j & BOUNDS_ISLEFT_BIT) ? -1 : 1); | |
return y0 < y1; | |
} | |
//////////////////////////////////////////////////////////////////////////////////// | |
// Main class | |
//////////////////////////////////////////////////////////////////////////////////// | |
class CirclesSeparation { | |
public: | |
CirclesSeparation(): hover_circle_(-1), frames_(0), total_frames_(0), periods_(0), | |
frames_in_period_(0), iteration_(0), iteration_score_(INF), paused_(false), progress_(0.0) {} | |
vector<double> minimumWork(vector<double> x, vector<double> y, vector<double> r, vector<double> m) { | |
clock_t start = clock(); | |
setup(x, y, r, m); | |
while (progress_ < 1.0f) { | |
for (int i=0; i<100; i++) update(); | |
clock_t now = clock(); | |
progress_ = (float)(now - start) / (MAX_RUNNING_TIME * CLOCKS_PER_SEC); | |
} | |
#if PRINT_SIMULATED_FRAMES | |
cerr << "total_frames: " << total_frames_ << endl; | |
#endif | |
printBestParams(); | |
vector<double> res(2*N); | |
for (int i=0; i<N; i++) { | |
res[ball_index[i] * 2] = best_x[i]; | |
res[ball_index[i] * 2 + 1] = best_y[i]; | |
} | |
return res; | |
} | |
void setup(const vector<double>& x, const vector<double>& y, const vector<double>& r, const vector<double>& m) { | |
initializeOnce(); | |
readInput(x, y, r, m); | |
analyzeInput(); | |
determineParameter(); | |
precomputeValues(); | |
setupTrials(); | |
start(); | |
} | |
void setupTrials() { | |
for (int i = 0; i < 8; i++) { | |
trials[i].param.first_shake_dir = i % 4; | |
trials[i].param.initial_fill_limit = fill_limit_lb_ + ((fill_limit_ub_ - fill_limit_lb_) / 7 * i); | |
trials[i].score = INF; | |
} | |
random_shuffle(trials, trials + NUM_TRIALS, myrandom); | |
} | |
void update() { | |
if (paused_) return; | |
/////////////////////////////////////////////////////// | |
// apply force | |
PROF_START(); | |
applyForceToCenter(); | |
applyExternalForce(frames_); | |
PROF_END(0); | |
/////////////////////////////////////////////////////// | |
// broad phase (find pairs of balls which may collide) | |
PROF_START(); | |
sortBounds(bounds_x, ball_x, 2*N); | |
sortBounds(bounds_y, ball_y, 2*N); | |
PROF_END(1); | |
/////////////////////////////////////////////////////// | |
// detect collisions strictly | |
PROF_START(); | |
detectCollisions(); | |
PROF_END(2); | |
PROF_START(); | |
if (frames_ % 100 == 99) updateBest(); | |
PROF_END(6); | |
/////////////////////////////////////////////////////// | |
// change velocity | |
PROF_START(); | |
solveConstraints(frames_ < ANTI_PENETRATION_MILD_FRAMES ? | |
ANTI_PENETRATION_MILD_COEFFICIENT : ANTI_PENETRATION_COEFFICIENT); | |
decelerateVelocity(1 - DECAY_PER_FRAME); | |
PROF_END(3); | |
/////////////////////////////////////////////////////// | |
// change position | |
PROF_START(); | |
integrateVelocity(); | |
PROF_END(4); | |
/////////////////////////////////////////////////////// | |
// others | |
frames_++; | |
frames_in_period_++; | |
total_frames_++; | |
PROF_START(); | |
if (frames_in_period_ == FRAMES_PER_PERIOD) mayRestart(); | |
PROF_END(5); | |
if (total_frames_ % 10000 == 0) { | |
PROF_REPORT(); | |
#if PRINT_BEST_PARAMETERS_PERIODICALLY | |
printBestParams(); | |
#endif | |
} | |
} | |
float currentCost() { | |
PROF_START(); | |
float cost = 0; | |
/* | |
for (int i = 0; i < N; i++) { | |
float d[2]; | |
d[0] = ball_x[i] - ball_ox[i]; | |
d[1] = ball_y[i] - ball_oy[i]; | |
cost += (float)sqrt(d[0]*d[0]+d[1]*d[1]) * ball_m[i]; | |
} | |
*/ | |
__m128 reg[8]; | |
for (int i = 0; i < N; i+=4) { | |
reg[0] = _mm_load_ps(&ball_x[i]); | |
reg[1] = _mm_load_ps(&ball_y[i]); | |
reg[2] = _mm_load_ps(&ball_ox[i]); | |
reg[3] = _mm_load_ps(&ball_oy[i]); | |
reg[4] = _mm_load_ps(&ball_m[i]); | |
reg[0] = _mm_sub_ps(reg[0], reg[2]); | |
reg[1] = _mm_sub_ps(reg[1], reg[3]); | |
reg[0] = _mm_mul_ps(reg[0], reg[0]); | |
reg[1] = _mm_mul_ps(reg[1], reg[1]); | |
reg[0] = _mm_add_ps(reg[0], reg[1]); | |
reg[0] = _mm_sqrt_ps(reg[0]); | |
reg[0] = _mm_mul_ps(reg[0], reg[4]); | |
_mm_store_ps(simd_res, reg[0]); | |
cost += simd_res[0]; | |
if (i+1 < N) cost += simd_res[1]; | |
if (i+2 < N) cost += simd_res[2]; | |
if (i+3 < N) cost += simd_res[3]; | |
} | |
PROF_END(7); | |
return cost; | |
} | |
int currentFrame() { | |
return frames_; | |
} | |
int currentIteration() { | |
return iteration_; | |
} | |
int currentPeriod() { | |
return periods_; | |
} | |
void catch_circle(float x, float y) { | |
for (int i = 0; i < N; i++) { | |
float d[2]; | |
d[0] = x - ball_x[i]; | |
d[1] = y - ball_y[i]; | |
if (d[0]*d[0] + d[1]*d[1] < ball_r[i]*ball_r[i]) { | |
ball_vx[i] = 0; | |
ball_vy[i] = 0; | |
hover_circle_ = i; | |
break; | |
} | |
} | |
} | |
void move_circle(float x, float y) { | |
if (hover_circle_ >= 0) { | |
ball_x[hover_circle_] = x; | |
ball_y[hover_circle_] = y; | |
} | |
} | |
void release_circle() { | |
if (hover_circle_ >= 0) { | |
hover_circle_ = -1; | |
} | |
} | |
void ctrlSetPositions(const vector<double>& xs, const vector<double>& ys) { | |
for (int i = 0; i < N; i++) { | |
cerr << "(" << xs[i] << "," << ys[i] << "), "; | |
ball_x[i] = xs[ball_index[i]]; | |
ball_y[i] = ys[ball_index[i]]; | |
} | |
} | |
void ctrlTogglePause() { | |
paused_ = !paused_; | |
} | |
InputStats input_stats_; | |
Parameter param_, best_param_; | |
private: | |
int frames_; | |
int total_frames_; | |
int periods_; | |
int frames_in_period_; | |
int iteration_; | |
float iteration_score_; | |
float period_best_cost, prev_period_best_cost; | |
int hover_circle_; | |
int shake_dir_; | |
bool paused_; | |
float fill_limit_lb_, fill_limit_ub_; | |
float progress_; | |
void initializeOnce() { | |
srand(10); | |
best_cost = INF; | |
for (int i = 0; i < MAX_PERIODS; i++) period_best[i] = INF; | |
} | |
void readInput(const vector<double>& x, const vector<double>& y, const vector<double>& r, const vector<double>& m) { | |
// read and analyze input data | |
N = x.size(); | |
for (int i = 0; i < N; i++) { | |
// save acculate data | |
input_r[i] = r[i]; | |
input_m[i] = m[i]; | |
// properties | |
c[i].index = i; | |
c[i].m = m[i]; | |
} | |
// order circles. (heavier is prior) | |
sort(c, c + N, greaterMass); | |
for (int i = 0; i < N; i++) { | |
ball_x[i] = ball_ox[i] = x[c[i].index]; | |
ball_y[i] = ball_oy[i] = y[c[i].index]; | |
ball_m[i] = m[c[i].index]; | |
ball_r[i] = r[c[i].index]; | |
ball_index[i] = c[i].index; | |
} | |
} | |
void analyzeInput() { | |
for (int i = 0; i < N; i++) { | |
input_stats_.max_r = max(input_stats_.max_r, ball_r[i]); | |
input_stats_.total_area += M_PI * ball_r[i] * ball_r[i]; | |
} | |
} | |
void determineParameter() { | |
if (N >= 200) { | |
fill_limit_lb_ = 0.75; | |
fill_limit_ub_ = 1.05; | |
} else if (N >= 70) { | |
fill_limit_lb_ = 0.65; | |
fill_limit_ub_ = 1.15; | |
} else { | |
fill_limit_lb_ = 0.4; | |
fill_limit_ub_ = 1.4; | |
} | |
fill_limit_ub_ = min(fill_limit_ub_, input_stats_.total_area); | |
} | |
void precomputeValues() { | |
// precompute for balls | |
float avg_dens = N * 0.5 / input_stats_.total_area; | |
for (int i = 0; i < N; i++) { | |
ball_inv_m[i] = 1.0f / ball_m[i]; | |
ball_gravity[i] = G * TIME_PER_FRAME * min(2.0f, 1 + 0.2f * (ball_m[i] / ball_r[i] / ball_r[i]) / avg_dens); | |
} | |
// precompute for pairs | |
for (int i = 0; i < N; i++) for (int j = i; j < N; j++) { | |
m_mul_div_sum[i][j] = m_mul_div_sum[j][i] = c[i].m * c[j].m / (c[i].m + c[j].m); | |
} | |
} | |
void adjustParameter(int iteration) { | |
if (iteration != 0) { | |
trials[(iteration-1)%NUM_TRIALS].score = iteration_score_; | |
} | |
if (iteration != 0 && iteration % NUM_TRIALS == 0) { | |
// reset trials | |
sort(trials, trials + NUM_TRIALS, lessScore); | |
#if PRINT_TRIALS | |
cerr << "============RESULT===========" << endl; | |
for (int i = 0; i < NUM_TRIALS; i++) { | |
cerr << trials[i].param.initial_fill_limit << "/" << trials[i].param.first_shake_dir << ": " | |
<< trials[i].score << endl; | |
} | |
#endif | |
for (int i = 0; i < NUM_TRIALS - 2; i++) { | |
int a = randxor() % (NUM_TRIALS / 2); | |
int b = randxor() % (NUM_TRIALS / 2); | |
float minl = min(trials[a].param.initial_fill_limit, trials[b].param.initial_fill_limit) - 0.03; | |
float maxl = max(trials[a].param.initial_fill_limit, trials[b].param.initial_fill_limit) + 0.03; | |
if (minl < 0) minl = 0; | |
float newlimit = minl + (maxl - minl) * randxor() / UINT_MAX; | |
float newdir = randxor() % 2 == 0 ? trials[a].param.first_shake_dir : trials[b].param.first_shake_dir; | |
trials_buf[i].param.initial_fill_limit = newlimit; | |
trials_buf[i].param.first_shake_dir = newdir; | |
} | |
trials_buf[NUM_TRIALS-2] = trials[0]; | |
trials_buf[NUM_TRIALS-1].param.initial_fill_limit = | |
fill_limit_lb_ + (fill_limit_ub_ - fill_limit_lb_) * randxor() / UINT_MAX; | |
trials_buf[NUM_TRIALS-1].param.first_shake_dir = randxor() % 4; | |
memcpy(trials, trials_buf, sizeof(trials)); | |
#if PRINT_TRIALS | |
cerr << "=============NEW=============" << endl; | |
for (int i = 0; i < NUM_TRIALS; i++) { | |
cerr << trials[i].param.initial_fill_limit << "/" << trials[i].param.first_shake_dir << endl; | |
} | |
#endif | |
random_shuffle(trials, trials + NUM_TRIALS, myrandom); | |
} | |
param_.initial_fill_limit = trials[iteration % NUM_TRIALS].param.initial_fill_limit; | |
param_.first_shake_dir = trials[iteration % NUM_TRIALS].param.first_shake_dir; | |
SHAKE_ACCEL = 2000.0f + 1000.0f * randxor() / UINT_MAX; | |
} | |
void start() { | |
adjustParameter(iteration_); | |
initIteration(); | |
} | |
void initIteration() { | |
frames_ = 0; | |
periods_ = 0; | |
period_best_cost = prev_period_best_cost = INF; | |
iteration_score_ = INF; | |
shake_dir_ = param_.first_shake_dir; | |
// clear state of balls and collisions | |
for (int i = 0; i < N; i++) { | |
ball_vx[i] = ball_vy[i] = 0.0f; | |
ball_x[i] = ball_ox[i]; | |
ball_y[i] = ball_oy[i]; | |
} | |
memset(overlapping_count, 0, sizeof(overlapping_count)); | |
memset(candidates, 0, sizeof(candidates)); | |
for (int i = 0; i < N; i++) for (int j = i; j < N; j++) { | |
candidates[i][j].a = i; | |
candidates[i][j].b = j; | |
} | |
candidates_list_head = NULL; | |
for (int i = 0; i < N; i++) { | |
bounds_x[2*i] = i | BOUNDS_ISLEFT_BIT; | |
bounds_x[2*i+1] = i; | |
bounds_y[2*i] = i | BOUNDS_ISLEFT_BIT; | |
bounds_y[2*i+1] = i; | |
} | |
// set initial position for balls | |
vector<pair<float, int> > priorities(N); | |
for (int i = 0; i < N; i++) { | |
float priority = ball_m[i] / ball_r[i] / ball_r[i]; | |
priorities[i].first = priority; | |
priorities[i].second = i; | |
} | |
sort(priorities.rbegin(), priorities.rend()); | |
float filledArea = 0.0; | |
vector<int> outer; | |
vector<int> inner; | |
for (int i = 0; i < N; i++) { | |
int index = priorities[i].second; | |
if (filledArea > param_.initial_fill_limit) { | |
outer.push_back(index); | |
} else { | |
inner.push_back(index); | |
filledArea += ball_r[index] * ball_r[index] * 3.14; | |
} | |
} | |
// find weighted center | |
float xsum = 0, ysum = 0, msum = 0; | |
for (int i = 0; i < N; i++) { | |
msum += ball_m[i]; | |
xsum += ball_m[i] * ball_x[i]; | |
ysum += ball_m[i] * ball_y[i]; | |
} | |
float center_x = xsum / msum; | |
float center_y = ysum / msum; | |
for (int i = 0; i < outer.size(); i++) { | |
int index = outer[i]; | |
bool canInsert = false; | |
if (i < outer.size() / 3) { | |
double area_sum = ball_r[index] * ball_r[index] * 3.14f;; | |
double search_r = input_stats_.max_r; | |
for (int j = 0; j < inner.size(); j++) { | |
int id2 = inner[j]; | |
area_sum += overlapping_area(ball_x[index], ball_y[index], search_r, | |
ball_x[id2], ball_y[id2], ball_r[id2]); | |
} | |
if (area_sum < search_r * search_r * 3.14f * 0.9f) { | |
canInsert = true; | |
inner.push_back(index); | |
} | |
} | |
if (!canInsert) { | |
float d[2]; | |
d[0] = ball_x[index] - center_x; | |
d[1] = ball_y[index] - center_y; | |
normalize(d); | |
ball_x[index] += d[0] * (2 + ((float)i / outer.size()) * 2.8); | |
ball_y[index] += d[1] * (2 + ((float)i / outer.size()) * 2.8); | |
} | |
} | |
PROF_START(); | |
sort(bounds_x, bounds_x + 2*N, lessBoundX); | |
sort(bounds_y, bounds_y + 2*N, lessBoundY); | |
for (int i = 0; i < 2*N; i++) { | |
if (bounds_x[i] & BOUNDS_ISLEFT_BIT) { | |
int a = bounds_x[i] & BOUNDS_NODEID_MASK; | |
for (int j = i+1; j < 2*N; j++) { | |
int b = bounds_x[j] & BOUNDS_NODEID_MASK; | |
if (b == a) break; | |
if (bounds_x[j] & BOUNDS_ISLEFT_BIT) { | |
overlapping_count[min(a,b)][max(a,b)]++; | |
} | |
} | |
} | |
} | |
for (int i = 0; i < 2*N; i++) { | |
if (bounds_y[i] & BOUNDS_ISLEFT_BIT) { | |
int a = bounds_y[i] & BOUNDS_NODEID_MASK; | |
for (int j = i+1; j < 2*N; j++) { | |
int b = bounds_y[j] & BOUNDS_NODEID_MASK; | |
if (b == a) break; | |
if (bounds_y[j] & BOUNDS_ISLEFT_BIT) { | |
overlapping_count[min(a,b)][max(a,b)]++; | |
if (overlapping_count[min(a,b)][max(a,b)] == 2) { | |
pushPairToList(min(a,b), max(a,b)); | |
} | |
} | |
} | |
} | |
} | |
// sortBounds(bounds_x, ball_x, 2*N); | |
// sortBounds(bounds_y, ball_y, 2*N); | |
PROF_END(7); | |
initPeriod(); | |
} | |
void initPeriod() { | |
frames_in_period_ = 0; | |
prev_period_best_cost = period_best_cost; | |
period_best_cost = INF; | |
} | |
bool shouldRestart() { | |
if (periods_ >= MAX_PERIODS || (iteration_ < NUM_TRIALS && periods_ >= 3)) return true; | |
if (periods_ < MIN_PERIODS) return false; | |
bool res = false; | |
if (period_best_cost == INF || period_best_cost > period_best[periods_-1] * RESTART_THRESHOLD_VS_ITERATION) { | |
res = true; | |
} | |
if (periods_ >= 2 && period_best_cost > prev_period_best_cost * RESTART_THRESHOLD_VS_PERIOD) { | |
res = true; | |
} | |
period_best[periods_-1] = min(period_best[periods_-1], period_best_cost); | |
return res; | |
} | |
void mayRestart() { | |
static int retry_count = 0; | |
if (period_best_cost == best_cost) saveCurrentState(); | |
if (iteration_ > 1 && prev_period_best_cost == best_cost && period_best_cost > best_cost | |
&& retry_count < MAX_RETRY_COUNT) { | |
// back to the end of previous period and retry | |
retry_count++; | |
loadCurrentState(); | |
period_best_cost = prev_period_best_cost; | |
initPeriod(); | |
} else { | |
retry_count = 0; | |
periods_++; | |
if (shouldRestart()) | |
restart(); | |
else | |
initPeriod(); | |
} | |
} | |
void restart() { | |
#if ENABLE_RESTART | |
iteration_++; | |
start(); | |
#endif | |
} | |
void applyForceToCenter() { | |
__m128 reg[8]; | |
for (int i = 0; i < N; i+=4) { | |
// SAME AS FOLLOWING CODE | |
// | |
// float d[2]; | |
// d[0] = ball_ox[i] - ball_x[i]; | |
// d[1] = ball_oy[i] - ball_y[i]; | |
// normalize(d); | |
// ball_vx[i] += d[0] * c[i].gravity; | |
// ball_vy[i] += d[1] * c[i].gravity; | |
reg[0] = _mm_load_ps(&ball_ox[i]); | |
reg[1] = _mm_load_ps(&ball_x[i]); | |
reg[2] = _mm_load_ps(&ball_oy[i]); | |
reg[3] = _mm_load_ps(&ball_y[i]); | |
reg[4] = _mm_load_ps(&ball_vx[i]); | |
reg[5] = _mm_load_ps(&ball_vy[i]); | |
reg[6] = _mm_load_ps(&ball_gravity[i]); | |
reg[0] = _mm_sub_ps(reg[0], reg[1]); | |
reg[2] = _mm_sub_ps(reg[2], reg[3]); | |
reg[1] = _mm_mul_ps(reg[0], reg[0]); | |
reg[3] = _mm_mul_ps(reg[2], reg[2]); | |
reg[1] = _mm_add_ps(reg[1], reg[3]); | |
reg[1] = _mm_add_ps(reg[1], _mm_set1_ps(1e-10f)); | |
reg[1] = _mm_rsqrt_ps(reg[1]); | |
reg[0] = _mm_mul_ps(reg[0], reg[1]); | |
reg[0] = _mm_mul_ps(reg[0], reg[6]); | |
reg[2] = _mm_mul_ps(reg[2], reg[1]); | |
reg[2] = _mm_mul_ps(reg[2], reg[6]); | |
reg[4] = _mm_add_ps(reg[4], reg[0]); | |
reg[5] = _mm_add_ps(reg[5], reg[2]); | |
_mm_store_ps(&ball_vx[i], reg[4]); | |
_mm_store_ps(&ball_vy[i], reg[5]); | |
} | |
} | |
void applyExternalForce(int frame) { | |
if (frame == 0) return; | |
#if ENABLE_SHAKE | |
if (frame != 0 && frame % FRAMES_PER_PERIOD == 0) { | |
if (hasOverlap()) { | |
expandBalls(); | |
} else { | |
shakeBalls(); | |
} | |
} | |
if (periods_ == 0 && frame % 500 == 0) | |
shakeBalls(); | |
#endif | |
} | |
void detectCollisions() { | |
num_collisions = 0; | |
Candidate* node = candidates_list_head; | |
while (node) { | |
int a = node->a; | |
int b = node->b; | |
node = node->next; | |
float dx = ball_x[a] - ball_x[b]; | |
float dy = ball_y[a] - ball_y[b]; | |
float sum = ball_r[a] + ball_r[b] + BOUNCE_MARGIN*2; | |
if (dx * dx + dy * dy < sum * sum) { | |
collisions[num_collisions++] = (a << 16) | b; | |
} | |
} | |
random_shuffle(collisions, collisions + num_collisions, myrandom); | |
} | |
void solveConstraints(float coefficient) { | |
float CD = coefficient / TIME_PER_FRAME; | |
for (int k = 0; k < num_collisions; k++) { | |
int i = collisions[k] >> 16; | |
int j = collisions[k] & ((1<<16)-1); | |
float norm[2]; | |
norm[0] = ball_x[j] - ball_x[i]; | |
norm[1] = ball_y[j] - ball_y[i]; | |
float norm_dist2 = norm[0] * norm[0] + norm[1] * norm[1]; | |
float minimum_distance = ball_r[i] + ball_r[j] + BOUNCE_MARGIN*2; | |
float dv[2]; | |
dv[0] = ball_vx[j] - ball_vx[i]; | |
dv[1] = ball_vy[j] - ball_vy[i]; | |
float len = (float)sqrt(norm_dist2); | |
float len_inv = 1.0f / len; | |
norm[0] *= len_inv; | |
norm[1] *= len_inv; | |
float CDD = CD * (minimum_distance - len); | |
float C = m_mul_div_sum[i][j] * ((1 + E) * dot(dv, norm) - CDD); | |
float ci = C * ball_inv_m[i]; | |
float cj = C * ball_inv_m[j]; | |
ball_vx[i] += norm[0] * ci; | |
ball_vy[i] += norm[1] * ci; | |
ball_vx[j] -= norm[0] * cj; | |
ball_vy[j] -= norm[1] * cj; | |
} | |
} | |
void decelerateVelocity(float rate) { | |
__m128 reg[8]; | |
for (int i = 0; i < N; i += 16) { | |
// SAME AS FOLLOWING CODE | |
// | |
// ball_vx[i] *= 1 - DECAY_PER_FRAME; | |
// ball_vy[i] *= 1 - DECAY_PER_FRAME; | |
int j = i+4, k = i+8, l = i+12; | |
reg[0] = _mm_load_ps(&ball_vx[i]); | |
reg[1] = _mm_load_ps(&ball_vy[i]); | |
reg[2] = _mm_load_ps(&ball_vx[j]); | |
reg[3] = _mm_load_ps(&ball_vy[j]); | |
reg[4] = _mm_load_ps(&ball_vx[k]); | |
reg[5] = _mm_load_ps(&ball_vy[k]); | |
reg[6] = _mm_load_ps(&ball_vx[l]); | |
reg[7] = _mm_load_ps(&ball_vy[l]); | |
reg[0] = _mm_mul_ps(reg[0], _mm_set1_ps(rate)); | |
reg[1] = _mm_mul_ps(reg[1], _mm_set1_ps(rate)); | |
reg[2] = _mm_mul_ps(reg[2], _mm_set1_ps(rate)); | |
reg[3] = _mm_mul_ps(reg[3], _mm_set1_ps(rate)); | |
reg[4] = _mm_mul_ps(reg[4], _mm_set1_ps(rate)); | |
reg[5] = _mm_mul_ps(reg[5], _mm_set1_ps(rate)); | |
reg[6] = _mm_mul_ps(reg[6], _mm_set1_ps(rate)); | |
reg[7] = _mm_mul_ps(reg[7], _mm_set1_ps(rate)); | |
_mm_store_ps(&ball_vx[i], reg[0]); | |
_mm_store_ps(&ball_vy[i], reg[1]); | |
_mm_store_ps(&ball_vx[j], reg[2]); | |
_mm_store_ps(&ball_vy[j], reg[3]); | |
_mm_store_ps(&ball_vx[k], reg[4]); | |
_mm_store_ps(&ball_vy[k], reg[5]); | |
_mm_store_ps(&ball_vx[l], reg[6]); | |
_mm_store_ps(&ball_vy[l], reg[7]); | |
} | |
} | |
void integrateVelocity() { | |
__m128 reg[8]; | |
for (int i = 0; i < N; i+=8) { | |
// SAME AS FOLLOWING CODE | |
// | |
// ball_x[i] += ball_vx[i] * TIME_PER_FRAME; | |
// ball_y[i] += ball_vy[i] * TIME_PER_FRAME; | |
int j = i+4; | |
reg[0] = _mm_load_ps(&ball_vx[i]); | |
reg[1] = _mm_load_ps(&ball_vy[i]); | |
reg[2] = _mm_load_ps(&ball_x[i]); | |
reg[3] = _mm_load_ps(&ball_y[i]); | |
reg[4] = _mm_load_ps(&ball_vx[j]); | |
reg[5] = _mm_load_ps(&ball_vy[j]); | |
reg[6] = _mm_load_ps(&ball_x[j]); | |
reg[7] = _mm_load_ps(&ball_y[j]); | |
reg[0] = _mm_mul_ps(reg[0], _mm_set1_ps(TIME_PER_FRAME)); | |
reg[1] = _mm_mul_ps(reg[1], _mm_set1_ps(TIME_PER_FRAME)); | |
reg[2] = _mm_add_ps(reg[2], reg[0]); | |
reg[3] = _mm_add_ps(reg[3], reg[1]); | |
reg[4] = _mm_mul_ps(reg[4], _mm_set1_ps(TIME_PER_FRAME)); | |
reg[5] = _mm_mul_ps(reg[5], _mm_set1_ps(TIME_PER_FRAME)); | |
reg[6] = _mm_add_ps(reg[6], reg[4]); | |
reg[7] = _mm_add_ps(reg[7], reg[5]); | |
_mm_store_ps(&ball_x[i], reg[2]); | |
_mm_store_ps(&ball_y[i], reg[3]); | |
_mm_store_ps(&ball_x[j], reg[6]); | |
_mm_store_ps(&ball_y[j], reg[7]); | |
} | |
} | |
bool isOverlap(int i, int j) { | |
if (i == j) return false; | |
double dx = (double)ball_x[i]; | |
dx -= (double)ball_x[j]; | |
double dy = (double)ball_y[i]; | |
dy -= (double)ball_y[j]; | |
double rsum = input_r[ball_index[i]] + input_r[ball_index[j]]; | |
return dx*dx + dy*dy < rsum*rsum; | |
} | |
bool hasOverlap() { | |
for (int k = 0; k < num_collisions; k++) { | |
int i = collisions[k] >> 16; | |
int j = collisions[k] & ((1<<16)-1); | |
if (isOverlap(i, j)) | |
return true; | |
} | |
return false; | |
} | |
void expandBalls() { | |
for (int i = 0; i < N; i++) { | |
ball_x[i] *= 1.1; | |
ball_y[i] *= 1.1; | |
} | |
} | |
void shakeBalls() { | |
static const float DX[] = {-1, 1, 0, 0}; | |
static const float DY[] = {0 ,0, -1, 1}; | |
float DV = SHAKE_ACCEL * TIME_PER_FRAME; | |
for (int i = 0; i < N; i++) { | |
ball_vx[i] += DX[shake_dir_] * DV; | |
ball_vy[i] += DY[shake_dir_] * DV; | |
} | |
shake_dir_ = (shake_dir_ + 1) % 4; | |
} | |
void updateBest() { | |
if (hasOverlap()) return; | |
float cost = currentCost(); | |
period_best_cost = min(period_best_cost, cost); | |
iteration_score_ = min(iteration_score_, cost); | |
if (best_cost > cost) { | |
#if PRINT_SCORE_UPDATES | |
cerr << "cost: " << cost << endl; | |
#endif | |
best_cost = cost; | |
memcpy(best_x, ball_x, sizeof(float)*N); | |
memcpy(best_y, ball_y, sizeof(float)*N); | |
best_param_ = param_; | |
} | |
} | |
void printBestParams() { | |
#if PRINT_BEST_PARAMETERS | |
cerr << N << '\t' | |
<< input_stats_.total_area << '\t' << input_stats_.max_r << '\t' | |
<< best_param_.initial_fill_limit << endl; | |
#endif | |
} | |
void saveCurrentState() { | |
memcpy(memo_ball_x, ball_x, sizeof(float) * N); | |
memcpy(memo_ball_y, ball_y, sizeof(float) * N); | |
memcpy(memo_ball_vx, ball_vx, sizeof(float) * N); | |
memcpy(memo_ball_vy, ball_vy, sizeof(float) * N); | |
memcpy(memo_bounds_x, bounds_x, sizeof(float) * N * 2); | |
memcpy(memo_bounds_y, bounds_y, sizeof(float) * N * 2); | |
memcpy(memo_overlapping_count, overlapping_count, sizeof(overlapping_count)); | |
memcpy(memo_candidates, candidates, sizeof(candidates)); | |
memo_andidates_list_head = candidates_list_head; | |
} | |
void loadCurrentState() { | |
memcpy(ball_x, memo_ball_x, sizeof(float) * N); | |
memcpy(ball_y, memo_ball_y, sizeof(float) * N); | |
memcpy(ball_vx, memo_ball_vx, sizeof(float) * N); | |
memcpy(ball_vy, memo_ball_vy, sizeof(float) * N); | |
memcpy(bounds_x, memo_bounds_x, sizeof(float) * N * 2); | |
memcpy(bounds_y, memo_bounds_y, sizeof(float) * N * 2); | |
memcpy(overlapping_count, memo_overlapping_count, sizeof(overlapping_count)); | |
memcpy(candidates, memo_candidates, sizeof(candidates)); | |
candidates_list_head = memo_andidates_list_head; | |
} | |
void pushPairToList(int a, int b) { | |
Candidate* node = &candidates[a][b]; | |
if (candidates_list_head) { | |
candidates_list_head->prev = node; | |
} | |
node->next = candidates_list_head; | |
node->prev = NULL; | |
candidates_list_head = node; | |
} | |
void removePairFromList(int a, int b) { | |
Candidate* node = &candidates[a][b]; | |
if (node->prev) node->prev->next = node->next; | |
if (node->next) node->next->prev = node->prev; | |
if (node == candidates_list_head) candidates_list_head = node->next; | |
} | |
void sortBounds(unsigned int* bounds, float* pos, int size) { | |
for (int i = 1; i < size; i++) { | |
int j = i-1; | |
while (j >= 0) { | |
int a = (bounds[j] & BOUNDS_NODEID_MASK); | |
int b = (bounds[j+1] & BOUNDS_NODEID_MASK); | |
float pos0 = pos[a] + (ball_r[a] + BOUNCE_MARGIN) * ((bounds[j] & BOUNDS_ISLEFT_BIT) ? -1 : 1); | |
float pos1 = pos[b] + (ball_r[b] + BOUNCE_MARGIN) * ((bounds[j+1] & BOUNDS_ISLEFT_BIT) ? -1 : 1); | |
if (pos0 > pos1) { | |
if (a > b) swap(a, b); | |
if (!(bounds[j] & BOUNDS_ISLEFT_BIT) && (bounds[j+1] & BOUNDS_ISLEFT_BIT)) { | |
overlapping_count[a][b]++; | |
if (overlapping_count[a][b] == 2) pushPairToList(a, b); | |
} else if ((bounds[j] & BOUNDS_ISLEFT_BIT) && !(bounds[j+1] & BOUNDS_ISLEFT_BIT)) { | |
overlapping_count[a][b]--; | |
if (overlapping_count[a][b] == 1) removePairFromList(a, b); | |
} | |
swap(bounds[j], bounds[j+1]); | |
} else { | |
break; | |
} | |
j--; | |
} | |
} | |
} | |
}; | |