Skip to content

Commit

Permalink
#372: Parallel implementation of update centers procedure.
Browse files Browse the repository at this point in the history
  • Loading branch information
annoviko committed Oct 30, 2017
1 parent ae13c54 commit d5866fb
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 18 deletions.
57 changes: 39 additions & 18 deletions ccore/src/cluster/xmeans.cpp
Expand Up @@ -21,6 +21,7 @@


#include <cmath>
#include <future>
#include <iostream>
#include <limits>
#include <numeric>
Expand All @@ -33,6 +34,9 @@
namespace cluster_analysis {


const std::size_t xmeans::DEFAULT_AMOUNT_THREADS = 10;


xmeans::xmeans(const dataset & p_centers, const std::size_t p_kmax, const double p_tolerance, const splitting_type p_criterion) :
m_centers(p_centers),
m_maximum_clusters(p_kmax),
Expand Down Expand Up @@ -67,6 +71,7 @@ void xmeans::process(const dataset & data, cluster_data & output_result) {
}
}


void xmeans::improve_parameters(cluster_sequence & improved_clusters, dataset & improved_centers, const index_sequence & available_indexes) {
double current_change = std::numeric_limits<double>::max();

Expand Down Expand Up @@ -185,40 +190,56 @@ std::size_t xmeans::find_proper_cluster(const dataset & analysed_centers, const
return index_optimum;
}

double xmeans::foo(cluster & p) {
return 0.0;
}

double xmeans::update_centers(const cluster_sequence & analysed_clusters, dataset & analysed_centers) {
double maximum_change = 0;

/* for each cluster */
for (unsigned int index_cluster = 0; index_cluster < analysed_clusters.size(); index_cluster++) {
std::vector<double> total(analysed_centers[index_cluster].size(), 0);

/* for each object in cluster */
for (cluster::const_iterator object_index_iterator = analysed_clusters[index_cluster].begin(); object_index_iterator < analysed_clusters[index_cluster].end(); object_index_iterator++) {
/* for each dimension */
for (std::size_t dimension = 0; dimension < total.size(); dimension++) {
total[dimension] += (*m_ptr_data)[*object_index_iterator][dimension];
}
}
std::vector<std::future<double>> pool_update_futures;

/* average for each dimension */
for (auto & dimension : total) {
dimension = dimension / analysed_clusters[index_cluster].size();
}

double distance = euclidean_distance_sqrt( &(analysed_centers[index_cluster]), &total );
for (std::size_t index_cluster = 0; index_cluster < analysed_clusters.size(); index_cluster++) {
auto update_functor = std::bind(&xmeans::update_center, this, std::cref(analysed_clusters[index_cluster]), std::ref(analysed_centers[index_cluster]));
pool_update_futures.emplace_back(std::async(std::launch::async, update_functor));
}

for (auto & update_future : pool_update_futures) {
double distance = update_future.get();
if (distance > maximum_change) {
maximum_change = distance;
}

std::copy(total.begin(), total.end(), analysed_centers[index_cluster].begin());
}

return maximum_change;
}


double xmeans::update_center(const cluster & p_cluster, point & p_center) {
std::vector<double> total(p_center.size(), 0);

/* for each object in cluster */
for (auto & object_index : p_cluster) {
/* for each dimension */
for (std::size_t dimension = 0; dimension < total.size(); dimension++) {
total[dimension] += (*m_ptr_data)[object_index][dimension];
}
}

/* average for each dimension */
for (auto & dimension : total) {
dimension = dimension / p_cluster.size();
}

double distance = euclidean_distance_sqrt( &p_center, &total );

std::copy(total.begin(), total.end(), p_center.begin());

return distance;
}


double xmeans::bayesian_information_criterion(const cluster_sequence & analysed_clusters, const dataset & analysed_centers) const {
std::vector<double> scores(analysed_centers.size(), 0.0);

Expand Down
12 changes: 12 additions & 0 deletions ccore/src/cluster/xmeans.hpp
Expand Up @@ -22,6 +22,7 @@
#pragma once


#include <mutex>
#include <vector>

#include "cluster/cluster_algorithm.hpp"
Expand All @@ -38,6 +39,9 @@ enum class splitting_type {


class xmeans : public cluster_algorithm {
private:
const static std::size_t DEFAULT_AMOUNT_THREADS;

private:
dataset m_centers;

Expand All @@ -51,6 +55,8 @@ class xmeans : public cluster_algorithm {

splitting_type m_criterion;

std::mutex m_mutex;

public:
/**
*
Expand Down Expand Up @@ -89,8 +95,14 @@ class xmeans : public cluster_algorithm {

double update_centers(const cluster_sequence & clusters, dataset & centers);

double update_center(const cluster & p_cluster, point & p_center);

double foo(cluster & p);

void improve_structure(void);

void improve_region_structure(void);

void improve_parameters(cluster_sequence & clusters, dataset & centers, const index_sequence & available_indexes);

double splitting_criterion(const cluster_sequence & clusters, const dataset & centers) const;
Expand Down
Binary file modified pyclustering/core/x64/win/ccore.dll
Binary file not shown.

0 comments on commit d5866fb

Please sign in to comment.