Skip to content

Commit

Permalink
google_research: Update copybara config to prevent replacement of uin…
Browse files Browse the repository at this point in the history
…t64_t

int64_t, uint64_t, etc. are valid types defined in <cstdint>. The existing copybara rule replaces 'uint64_t' by 'unsigned long long_t' which isn't a valid type. (This is happening in the repo npy_array under google_research/). Update the regex to exclude this case.

The current config doesn't look safe (uint64 could be present in another identifier, like a function name). Alternatively, we could consider retiring it, or changing the rule to include the specific files it's targeting instead of an exclusion rule?

PiperOrigin-RevId: 356861974
  • Loading branch information
fbleibel-g authored and Copybara-Service committed Feb 11, 2021
1 parent 5fbebaf commit 4c7bac5
Show file tree
Hide file tree
Showing 14 changed files with 47 additions and 47 deletions.
4 changes: 2 additions & 2 deletions fast_k_means_2020/fast_k_means_algo.cc
Expand Up @@ -25,11 +25,11 @@ void FastKMeansAlgo::RunAlgorithm(const vector<vector<double>>& input, int k,
int number_greedy_rounds) {
multi_trees_.InitializeTree(input, number_of_trees, scaling_factor);
for (int i = 0; i < k; i++) {
pair<int, unsigned long long_t> best_center_and_improvement (0, 0);
pair<int, uint64_t> best_center_and_improvement (0, 0);
for (int j = 0; j < number_greedy_rounds; j++) {
int next_center = multi_trees_.SampleAPoint();
if (next_center == -1) break;
unsigned long long_t improvement =
uint64_t improvement =
multi_trees_.ComputeCostAndOpen(next_center, false);
// For the case of i = 0, it is important to have equality here.
if (improvement >= best_center_and_improvement.second) {
Expand Down
4 changes: 2 additions & 2 deletions fast_k_means_2020/kmeanspp_seeding.cc
Expand Up @@ -27,8 +27,8 @@ using std::vector;
int KMeansPPSeeding::ReturnD2Sample(const vector<vector<double>>& input) {
double total_prob = 0;
for (int i = 0; i < input.size(); i++) total_prob += distance[i];
unsigned long long_t rand_ll_int =
RandomHandler::eng() % static_cast<unsigned long long_t>(total_prob);
uint64_t rand_ll_int =
RandomHandler::eng() % static_cast<uint64_t>(total_prob);
double rand_ll = static_cast<double>(rand_ll_int);
int picked_center = input.size() - 1;
for (int i = 0; i < input.size(); i++) {
Expand Down
28 changes: 14 additions & 14 deletions fast_k_means_2020/multi_tree_clustering.cc
Expand Up @@ -31,28 +31,28 @@ void MultiTreeClustering::InitializeTree(const vector<vector<double>>& input,
single_trees_[i].InitializeTree(input, scaling_factor);
closets_open_center = vector<int>(input.size());
distance_to_center =
vector<unsigned long long_t>(input.size(), std::numeric_limits<unsigned long long_t>::max());
vector<uint64_t>(input.size(), std::numeric_limits<uint64_t>::max());
number_of_points_ = input.size();
// Finding the boundries of the tree.
binary_tree_boundery_ = 1;
while (binary_tree_boundery_ < input.size()) binary_tree_boundery_ *= 2;
// To ensure that it fits the binary tree
binary_tree_value_ = vector<unsigned long long_t>(2 * binary_tree_boundery_,
std::numeric_limits<unsigned long long_t>::max());
binary_tree_value_ = vector<uint64_t>(2 * binary_tree_boundery_,
std::numeric_limits<uint64_t>::max());
}

unsigned long long_t MultiTreeClustering::ComputeCostAndOpen(int center, bool open_center) {
uint64_t MultiTreeClustering::ComputeCostAndOpen(int center, bool open_center) {
// To keep the previous costs in case open_center is false.
map<int, unsigned long long_t> old_costs;
map<int, uint64_t> old_costs;
// The amount of the improment that opening this center gives.
unsigned long long_t improvement = 0;
uint64_t improvement = 0;
// This the first time so there is in previous cost so improvement is not
// defined.
if (!open_center &&
distance_to_center[0] == std::numeric_limits<unsigned long long_t>::max())
distance_to_center[0] == std::numeric_limits<uint64_t>::max())
return improvement;
for (int i = 0; i < single_trees_.size(); i++)
for (pair<int, unsigned long long_t>& update :
for (pair<int, uint64_t>& update :
single_trees_[i].ComputeCostAndOpen(center, open_center))
if (update.second < distance_to_center[update.first]) {
if (old_costs.find(update.first) == old_costs.end())
Expand All @@ -70,7 +70,7 @@ unsigned long long_t MultiTreeClustering::ComputeCostAndOpen(int center, bool op
return improvement;
}

void MultiTreeClustering::UpdateDistance(pair<int, unsigned long long_t> update, int left,
void MultiTreeClustering::UpdateDistance(pair<int, uint64_t> update, int left,
int right, int binary_tree_id) {
// Finishing Condition
if (left + 1 >= right) {
Expand All @@ -83,9 +83,9 @@ void MultiTreeClustering::UpdateDistance(pair<int, unsigned long long_t> update,
else
UpdateDistance(update, middle, right, binary_tree_id * 2 + 1);
if (binary_tree_value_[binary_tree_id * 2] !=
std::numeric_limits<unsigned long long_t>::max() &&
std::numeric_limits<uint64_t>::max() &&
binary_tree_value_[binary_tree_id * 2 + 1] !=
std::numeric_limits<unsigned long long_t>::max())
std::numeric_limits<uint64_t>::max())
binary_tree_value_[binary_tree_id] =
binary_tree_value_[binary_tree_id * 2] +
binary_tree_value_[binary_tree_id * 2 + 1];
Expand All @@ -97,14 +97,14 @@ void MultiTreeClustering::UpdateDistance(pair<int, unsigned long long_t> update,

int MultiTreeClustering::SampleAPoint() {
// The first point sampled is base on uniform distribution
if (binary_tree_value_[1] == std::numeric_limits<unsigned long long_t>::max()) {
if (binary_tree_value_[1] == std::numeric_limits<uint64_t>::max()) {
return RandomHandler::eng() % number_of_points_;
}
unsigned long long_t chosen_prob = RandomHandler::eng() % binary_tree_value_[1];
uint64_t chosen_prob = RandomHandler::eng() % binary_tree_value_[1];
return SampleAPointRecurse(chosen_prob, 0, binary_tree_boundery_, 1);
}

int MultiTreeClustering::SampleAPointRecurse(unsigned long long_t chosen_prob, int left,
int MultiTreeClustering::SampleAPointRecurse(uint64_t chosen_prob, int left,
int right, int binary_tree_id) {
if (left + 1 >= right) return left;
int middle = (left + right) / 2;
Expand Down
10 changes: 5 additions & 5 deletions fast_k_means_2020/multi_tree_clustering.h
Expand Up @@ -35,7 +35,7 @@ class MultiTreeClustering {
double scaling_factor);

// Returns the benefit of opening center with optional flag to open.
unsigned long long_t ComputeCostAndOpen(int center, bool open_center);
uint64_t ComputeCostAndOpen(int center, bool open_center);

// Samples a point according to distances in the tree.
// The correct distance are D^2.
Expand All @@ -46,22 +46,22 @@ class MultiTreeClustering {
vector<int> closets_open_center;

// The distance of each point to the closest open center.
vector<unsigned long long_t> distance_to_center;
vector<uint64_t> distance_to_center;

private:
void UpdateDistance(pair<int, unsigned long long_t> update, int left, int right,
void UpdateDistance(pair<int, uint64_t> update, int left, int right,
int binary_tree_id);

// Recursive function to sample points based on binary trees.
// Improves the runtime to log n.
int SampleAPointRecurse(unsigned long long_t chosen_prob, int left, int right,
int SampleAPointRecurse(uint64_t chosen_prob, int left, int right,
int binary_tree_id);

// Single trees that we use for computing the distances.
vector<SingleTreeClustering> single_trees_;

// The distances kept in binary tree so could be easily sampled.
vector<unsigned long long_t> binary_tree_value_;
vector<uint64_t> binary_tree_value_;

// The size of the binary tree, e.g., the number points that we sample from.
// Some point might be empty size the input size might not be a power of two.
Expand Down
2 changes: 1 addition & 1 deletion fast_k_means_2020/preprocess_input_points.cc
Expand Up @@ -44,7 +44,7 @@ void PreProcessInputPoints::RandomShiftSpace(
int max_coordinate = 0;
for (int i = 0; i < input_points->size(); i++)
max_coordinate = max(max_coordinate, (*input_points)[i][j]);
unsigned long long_t shift =
uint64_t shift =
fast_k_means::RandomHandler::eng() % max(1, max_coordinate);
for (int i = 0; i < input_points->size(); i++)
(*input_points)[i][j] += shift;
Expand Down
6 changes: 3 additions & 3 deletions fast_k_means_2020/rejection_sampling_lsh.cc
Expand Up @@ -34,7 +34,7 @@ void RejectionSamplingLSH::RunAlgorithm(const vector<vector<double>>& input,
multi_trees_.InitializeTree(input, number_of_trees, scaling_factor);
double max_prob = 0.0;
while (centers.size() < k) {
pair<int, unsigned long long_t> best_center_and_improvement(0, 0);
pair<int, uint64_t> best_center_and_improvement(0, 0);
// Number of the times that we successfully sample.
int number_sampled = 0;
while (number_sampled < number_greedy_rounds) {
Expand All @@ -49,9 +49,9 @@ void RejectionSamplingLSH::RunAlgorithm(const vector<vector<double>>& input,
max_prob = std::max(prob, max_prob);
}
if (static_cast<double>(RandomHandler::eng() /
std::numeric_limits<unsigned long long_t>::max()) > prob)
std::numeric_limits<uint64_t>::max()) > prob)
continue;
unsigned long long_t improvement =
uint64_t improvement =
multi_trees_.ComputeCostAndOpen(next_center, false);
if (improvement >= best_center_and_improvement.second) {
best_center_and_improvement.first = next_center;
Expand Down
6 changes: 3 additions & 3 deletions fast_k_means_2020/single_tree_clustering.cc
Expand Up @@ -38,10 +38,10 @@ void SingleTreeClustering::InitializeTree(const vector<vector<double>>& input,
closets_open_center = vector<int>(input_.size(), -1);
}

vector<pair<int, unsigned long long_t>> SingleTreeClustering::ComputeCostAndOpen(
vector<pair<int, uint64_t>> SingleTreeClustering::ComputeCostAndOpen(
int center, bool open_center) {
// The new distances if this center gets opened.
vector<pair<int, unsigned long long_t>> updated_distances;
vector<pair<int, uint64_t>> updated_distances;
// The nodes that their distance is updated
set<int> updated_nodes;
vector<int> center_coordinate = input_[center];
Expand All @@ -54,7 +54,7 @@ vector<pair<int, unsigned long long_t>> SingleTreeClustering::ComputeCostAndOpen
for (auto point : tree_.points_in_node[node]) {
if (updated_nodes.find(point) == updated_nodes.end()) {
updated_distances.push_back(
pair<int, unsigned long long_t>(point, static_cast<unsigned long long_t>(1) << (2 * i)));
pair<int, uint64_t>(point, static_cast<uint64_t>(1) << (2 * i)));
if (open_center) closets_open_center[point] = center;
updated_nodes.insert(point);
}
Expand Down
2 changes: 1 addition & 1 deletion fast_k_means_2020/single_tree_clustering.h
Expand Up @@ -37,7 +37,7 @@ class SingleTreeClustering {

// Returns the benefit of opening center with optional flag to open.
// It returns the points and their new cost.
vector<pair<int, unsigned long long_t>> ComputeCostAndOpen(int center, bool open_center);
vector<pair<int, uint64_t>> ComputeCostAndOpen(int center, bool open_center);

// Keeps the id of the closest center of each point.
vector<int> closets_open_center;
Expand Down
2 changes: 1 addition & 1 deletion npy_array/npy_array/npy_array.cc
Expand Up @@ -111,7 +111,7 @@ std::string NpyDataTypeString<uint32_t>() {
}

template <>
std::string NpyDataTypeString<unsigned long long_t>() {
std::string NpyDataTypeString<uint64_t>() {
return "u";
}

Expand Down
2 changes: 1 addition & 1 deletion sketching/utils.h
Expand Up @@ -24,7 +24,7 @@
namespace sketch {

typedef unsigned int uint;
typedef unsigned long long ULONG;
typedef uint64 ULONG;
typedef std::pair<uint, float> IntFloatPair;

inline constexpr int HL = 31;
Expand Down
2 changes: 1 addition & 1 deletion tf3d/ops/gpu_utils.h
Expand Up @@ -60,7 +60,7 @@ __host__ __device__ __forceinline__ uint32 FillLowerBits<uint32>(uint32 n) {
}

template <>
__host__ __device__ __forceinline__ unsigned long long FillLowerBits<unsigned long long>(unsigned long long n) {
__host__ __device__ __forceinline__ uint64 FillLowerBits<uint64>(uint64 n) {
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
Expand Down
4 changes: 2 additions & 2 deletions truss_decomposition/graph.h
Expand Up @@ -197,7 +197,7 @@ struct GraphT {
const std::string &backing_file,
mmapped_vector<std::pair<node_t, node_t>> *adj_pairs) {
// Fingerprint.
unsigned long long_t fingerprint = ReadBinaryOrDie<unsigned long long_t>(f);
uint64_t fingerprint = ReadBinaryOrDie<uint64_t>(f);
CHECK(fingerprint == kFingerprint);
// Number of nodes.
size_t N = ReadBinaryOrDie<node_t>(f);
Expand Down Expand Up @@ -228,7 +228,7 @@ struct GraphT {
FILE *f, bool forward_only, const std::string &backing_file,
mmapped_vector<std::pair<node_t, node_t>> *adj_pairs) {
// Fingerprint.
unsigned long long_t fingerprint = ReadBinaryOrDie<unsigned long long_t>(f);
uint64_t fingerprint = ReadBinaryOrDie<uint64_t>(f);
CHECK(fingerprint == kFingerprintStreamed);
// Number of nodes.
size_t N = ReadBinaryOrDie<node_t>(f);
Expand Down
20 changes: 10 additions & 10 deletions truss_decomposition/intersect_edges.h
Expand Up @@ -20,8 +20,8 @@
#include <cstdlib>

template <typename Graph, typename CB>
void IntersectEdgesSmaller(Graph *__restrict__ g, unsigned long long_t start1,
unsigned long long_t end1, unsigned long long_t start2, unsigned long long_t end2,
void IntersectEdgesSmaller(Graph *__restrict__ g, uint64_t start1,
uint64_t end1, uint64_t start2, uint64_t end2,
const CB &cb) {
size_t k2 = start2;
for (size_t k1 = start1; k1 < end1; k1++) {
Expand Down Expand Up @@ -70,19 +70,19 @@ void IntersectEdgesSmaller(Graph *__restrict__ g, unsigned long long_t start1,
// significantly different, calls IntersectEdgesSmaller. Otherwise, uses SIMD to
// quickly compute the intersection of the lists.
template <typename Graph, typename CB>
void IntersectEdges(Graph *__restrict__ g, unsigned long long_t start1, unsigned long long_t end1,
unsigned long long_t start2, unsigned long long_t end2, const CB &cb) {
void IntersectEdges(Graph *__restrict__ g, uint64_t start1, uint64_t end1,
uint64_t start2, uint64_t end2, const CB &cb) {
size_t factor = 2;
if (factor * (end1 - start1) < end2 - start2) {
return IntersectEdgesSmaller(g, start1, end1, start2, end2, cb);
}
if (end1 - start1 > factor * (end2 - start2)) {
return IntersectEdgesSmaller(
g, start2, end2, start1, end1,
[&cb](unsigned long long_t k2, unsigned long long_t k1) { return cb(k1, k2); });
[&cb](uint64_t k2, uint64_t k1) { return cb(k1, k2); });
}
unsigned long long_t k1 = start1;
unsigned long long_t k2 = start2;
uint64_t k1 = start1;
uint64_t k2 = start2;
// Execute SSE-accelerated version if SSE4.1 is available. If not, run the
// fall-back code for the last N % 4 elements of the list on the full list.
#ifdef __SSE4_1__
Expand Down Expand Up @@ -148,11 +148,11 @@ void IntersectEdges(Graph *__restrict__ g, unsigned long long_t start1, unsigned
if (end1 - k1 > factor * (end2 - k2)) {
return IntersectEdgesSmaller(
g, k2, end2, k1, end1,
[&cb](unsigned long long_t k2, unsigned long long_t k1) { return cb(k1, k2); });
[&cb](uint64_t k2, uint64_t k1) { return cb(k1, k2); });
}
while (k1 < end1 && k2 < end2) {
unsigned long long_t a = g->adj[k1];
unsigned long long_t b = g->adj[k2];
uint64_t a = g->adj[k1];
uint64_t b = g->adj[k2];
if (a < b) {
k1++;
} else if (a > b) {
Expand Down
2 changes: 1 addition & 1 deletion truss_decomposition/td_approx_external.cc
Expand Up @@ -25,7 +25,7 @@
#include "intersect_edges.h" // NOLINT
#include "mmapped_vector.h" // NOLINT

using edge_t = unsigned long long_t;
using edge_t = uint64_t;
using node_t = uint32_t;

using Graph = GraphT<node_t, edge_t>;
Expand Down

0 comments on commit 4c7bac5

Please sign in to comment.