Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
140 lines (121 sloc) 4.91 KB
// Copyright (c) 2017, Apple Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-3-clause license that can be
// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
syntax = "proto3";
option optimize_for = LITE_RUNTIME;
package CoreML.Specification;
/**
* A Bayesian probit regressor.
*
* The probit regression model is superficially similar to the more commonly known
* logistic regression, with sampling distribution of the model given by
*
* P(y=+1|x,w) = Φ(<w,x>/β)
*
* where w are the set of weights,
* x are the set of features for the given event,
* β is a model hyper-parameter, and
* Φ is the link function, defined to be the CDF of the normal distribution.
* The weights w[i,j] are Gaussian distributed, with mean μ[i,j] and precision 1/(σ[i,j])^2
* (where i indexes over features and j indexes over the values for the feature).
* The parameter β scales the steepness of the inverse link function.
*
* (see https://en.wikipedia.org/wiki/Probit_model and https://en.wikipedia.org/wiki/Logistic_regression
* for more details on probit model and logistic regression, respectively)
*
* Input: X
* x represents a set of features, each taking on a discrete value (note that continuous values
* would first need to be discretized). x can be represented as a vector where the index i is
* the feature id and x[i] is the feature value. Alternatively, x can be represented as a matrix
* with 2 columns where the first column indicates the feature id and the second column contains
* the feature values, i.e. x[i,0] is the feature id and x[i,1] is the feature value.
*
* additional input features:
* - "optimism": apply a mean shift to the probability, i.e. shift regression mean by o*stdev,
* where o is the "optimism" parameter (see additional output features)
* - "samplingScale": for sampling from posterior, multiply standard deviation by this factor
* - "samplingTruncation": for sampling from posterior, truncate sampling distribution at given multiple of std from mean
*
* Output: Y
* probability P(y|x,w)
*
* additional output features:
* - mean (regression output before applying link function)
* - variance (regression output variance before applying link function)
* - pessimistic probability: P(y|x,w) with a mean shift parameterized by "optimism" feature
* - sampled probability: p ~ P(y|x,w) with standard deviation scaling parametrized by "samplingScale" feature
* and distribution truncated at multiple of standard deviation,
* where multiple parameterized by "samplingTruncation" feature.
*
*/
message BayesianProbitRegressor {
/*
* Parameterization of a Gaussian distribution
*/
message Gaussian {
double mean = 1;
double precision = 2; // inverse of the variance
}
/*
* Weight for a specific feature value
* The weight is represented as a Gaussian distribution
* with a mean and precision (1/variance) to capture
* uncertainty in the weight
*/
message FeatureValueWeight {
uint32 featureValue = 1;
Gaussian featureWeight = 2;
}
/*
* Feature with associated weights (for different values)
* Each feature has a set of weights for the (discrete) values
* it can take
*/
message FeatureWeight {
uint32 featureId = 1;
repeated FeatureValueWeight weights = 2;
}
uint32 numberOfFeatures = 1;
Gaussian bias = 2; // bias term
/*
* Set of features with associated weights
*/
repeated FeatureWeight features = 3; // feature weights
/*
* Set this name to be the same as input feature of type multi-array (1D)
* in the model description you want to use as the regression input
*/
string regressionInputFeatureName = 10;
/*
* Set this name to be the same as optional input feature of type double
* in the model description you want to use as the optimism input
*/
string optimismInputFeatureName = 11;
/*
* Set this name to be the same as optional input feature of type double
* in the model description you want to use as the samplingScale input
*/
string samplingScaleInputFeatureName = 12;
/*
* Set this name to be the same as optional input feature of type double
* in the model description you want to use as the samplingBounds input
*/
string samplingTruncationInputFeatureName = 13;
/*
* name of 'mean' output feature
*/
string meanOutputFeatureName = 20;
/*
* name of 'variance' output feature
*/
string varianceOutputFeatureName = 21;
/*
* name of 'pessimistic' output feature
*/
string pessimisticProbabilityOutputFeatureName = 22;
/*
* name of 'sampled' output feature: samples from the scaled posterior probability distribuiton
*/
string sampledProbabilityOutputFeatureName = 23;
}
You can’t perform that action at this time.