Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ModelCalcerWrapper::CalcFlatTransposed #2413

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
27 changes: 27 additions & 0 deletions catboost/libs/model/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,33 @@ class TFullModel {
CalcFlat(featureRefs, results, featureInfo);
}

/**
* Call CalcFlatTransposed on all model trees
* @param features
* @param results
*/
void CalcFlatTransposed(
TConstArrayRef<TConstArrayRef<float>> features,
TArrayRef<double> results,
const TFeatureLayout* featureInfo = nullptr
) const {
CalcFlatTransposed(features, 0, GetTreeCount(), results, featureInfo);
}

/**
* Call CalcFlatTransposed on all model trees
* @param features
* @param results
*/
void CalcFlatTransposed(
TConstArrayRef<TVector<float>> features,
TArrayRef<double> results,
const TFeatureLayout* featureInfo = nullptr
) const {
TVector<TConstArrayRef<float>> featureRefs{features.begin(), features.end()};
CalcFlatTransposed(featureRefs, results, featureInfo);
}

/**
* Same as CalcFlat method but for one object
* @param[in] features flat features array reference. First dimension is object index, second dimension is
Expand Down
14 changes: 14 additions & 0 deletions catboost/libs/model_interface/c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,20 @@ CATBOOST_API bool CalcModelPredictionFlat(ModelCalcerHandle* modelHandle, size_t
return true;
}

CATBOOST_API bool CalcModelPredictionFlatTransposed(ModelCalcerHandle* modelHandle, size_t docCount, const float** floatFeatures, size_t floatFeaturesSize, double* result, size_t resultSize) {
try {
TVector<TConstArrayRef<float>> featuresVec(floatFeaturesSize);
for (size_t i = 0; i < floatFeaturesSize; ++i) {
featuresVec[i] = TConstArrayRef<float>(floatFeatures[i], docCount);
}
FULL_MODEL_PTR(modelHandle)->CalcFlatTransposed(featuresVec, TArrayRef<double>(result, resultSize));
} catch (...) {
Singleton<TErrorMessageHolder>()->Message = CurrentExceptionMessage();
return false;
}
return true;
}

CATBOOST_API bool CalcModelPrediction(
ModelCalcerHandle* modelHandle,
size_t docCount,
Expand Down
21 changes: 20 additions & 1 deletion catboost/libs/model_interface/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ CATBOOST_API bool SetPredictionTypeString(ModelCalcerHandle* modelHandle, const
* Flat here means that float features and categorical feature are in the same float array.
* @param calcer model handle
* @param docCount number of objects
* @param floatFeatures array of array of float (first dimension is object index, second if feature index)
* @param floatFeatures array of array of float (first dimension is object index, second is feature index)
* @param floatFeaturesSize float values array size
* @param result pointer to user allocated results vector
* @param resultSize Result size should be equal to modelApproxDimension * docCount
Expand All @@ -151,6 +151,25 @@ CATBOOST_API bool CalcModelPredictionFlat(
const float** floatFeatures, size_t floatFeaturesSize,
double* result, size_t resultSize);


/**
* **Use this method only if you really understand what you want.**
* Calculate raw model predictions on transposed dataset layout
* @param calcer model handle
* @param docCount number of objects
* @param floatFeatures array of array of float (first dimension is feature index, second is object index)
* @param floatFeaturesSize float values array size
* @param result pointer to user allocated results vector
* @param resultSize Result size should be equal to modelApproxDimension * docCount
* (e.g. for non multiclass models should be equal to docCount)
* @return false if error occured
*/
CATBOOST_API bool CalcModelPredictionFlatTransposed(
ModelCalcerHandle* modelHandle,
size_t docCount,
const float** floatFeatures, size_t floatFeaturesSize,
double* result, size_t resultSize);

/**
* Calculate raw model predictions on float features and string categorical feature values
* @param calcer model handle
Expand Down
1 change: 1 addition & 0 deletions catboost/libs/model_interface/calcer.exports
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ C CalcModelPredictionText
C CalcModelPredictionTextAndEmbeddings
C CalcModelPredictionSingle
C CalcModelPredictionFlat
C CalcModelPredictionFlatTransposed
C CalcModelPredictionWithHashedCatFeatures
C CalcModelPredictionWithHashedCatFeaturesAndTextFeatures
C CalcModelPredictionWithHashedCatFeaturesAndTextAndEmbeddingFeatures
Expand Down
23 changes: 23 additions & 0 deletions catboost/libs/model_interface/wrapped_calcer.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ class ModelCalcerWrapper {
std::vector<double> CalcFlat(const std::vector<std::vector<float>>& features) const {
std::vector<double> result(features.size() * DimensionsCount);
std::vector<const float*> ptrsVector;
ptrsVector.reserve(features.size());
size_t flatVecSize = 0;
for (const auto& flatVec : features) {
flatVecSize = flatVec.size();
Expand All @@ -225,6 +226,28 @@ class ModelCalcerWrapper {
return result;
}

/**
* Evaluate model on transposed dataset layout.
* **WARNING** currently supports only singleclass models.
* @param transposedFeatures
* @return vector of raw prediction values
*/
std::vector<double> CalcFlatTransposed(const std::vector<std::vector<float>>& transposedFeatures) const {
std::vector<const float*> ptrsVector;
ptrsVector.reserve(transposedFeatures.size());
size_t docCount = 0;
for (const auto& feature : transposedFeatures) {
docCount = feature.size();
// TODO(kirillovs): add check that all docCount are equal
ptrsVector.push_back(feature.data());
}
std::vector<double> result(docCount * DimensionsCount);
if (!CalcModelPredictionFlatTransposed(CalcerHolder.get(), docCount, ptrsVector.data(), transposedFeatures.size(), result.data(), result.size())) {
throw std::runtime_error(GetErrorString());
}
return result;
}

/**
* Evaluate model on vectors of float, categorical, text and embedding feature values.
* **WARNING** categorical and text features string values should not contain zero bytes in the middle of the string (latter this could be changed).
Expand Down