Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PUBDEV-8485 UpliftDRF: Add Qini value metric #6031

Merged
merged 14 commits into from
Feb 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 112 additions & 38 deletions h2o-core/src/main/java/hex/AUUC.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,17 @@ public class AUUC extends Iced{
public final long[] _yControl; // control group and y==1
public final long[] _frequency; // number of data in each bin
public final long[] _frequencyCumsum; // cumulative sum of frequency to plot AUUC
public double[][] _uplift; // output uplift values
public double[][] _uplift; // output uplift values
public double[][] _upliftRandom; // output random uplift values
public final long _n; // number of data

public static final int NBINS = 1000;

public final AUUCType _auucType;
public final int _auucTypeIndx;
public double[] _auucs;
public final AUUCType _auucType; // default auuc metric
public final int _auucTypeIndx; // default auuc metric index
public double[] _auucs; // areas under random uplif curve for all metrics
public double[] _auucsRandom; // areas under random uplift curve for all metrics
public double[] _aecu; // average excess cumulative uplift (auuc - auuc random)

public double threshold( int idx ) { return _ths[idx]; }
public long treatment( int idx ) { return _treatment[idx]; }
Expand All @@ -49,6 +52,11 @@ public double[] upliftByType(AUUCType type){
int idx = Arrays.asList(AUUC.AUUCType.VALUES).indexOf(type);
return _uplift[idx];
}
public double upliftRandom( int idx) { return _upliftRandom[_auucTypeIndx][idx]; }
public double[] upliftRandomByType(AUUCType type){
int idx = Arrays.asList(AUUC.AUUCType.VALUES).indexOf(type);
return _upliftRandom[idx];
}
public AUUC(Vec probs, Vec y, Vec uplift, AUUCType auucType) {
this(NBINS, probs, y, uplift, auucType);
}
Expand Down Expand Up @@ -81,7 +89,8 @@ private AUUC(AUUCBuilder bldr, boolean trueProbabilities, AUUCType auucType) {
_frequency = Arrays.copyOf(bldr._frequency, _nBins);
_frequencyCumsum = Arrays.copyOf(bldr._frequency, _nBins);
_uplift = new double[AUUCType.values().length][_nBins];

_upliftRandom = new double[AUUCType.values().length][_nBins];

// Rollup counts
long tmpt=0, tmpc=0, tmptp = 0, tmpcp = 0, tmpf= 0;
for( int i=0; i<_nBins; i++ ) {
Expand All @@ -91,6 +100,22 @@ private AUUC(AUUCBuilder bldr, boolean trueProbabilities, AUUCType auucType) {
tmpcp += _yControl[i]; _yControl[i] = tmpcp;
tmpf += _frequencyCumsum[i]; _frequencyCumsum[i] = tmpf;
}

// these methods need to be call in this order
setUplift();
setUpliftRandom();

if (trueProbabilities) {
_auucs = computeAuucs();
_auucsRandom = computeAuucsRandom();
_aecu = computeAecu();
_maxIdx = _auucType.maxCriterionIdx(this);
} else {
_maxIdx = 0;
}
}

public void setUplift(){
for(int i=0; i<AUUCType.VALUES.length; i++) {
for (int j = 0; j < _nBins; j++) {
_uplift[i][j] = AUUCType.VALUES[i].exec(this, j);
Expand All @@ -103,11 +128,15 @@ private AUUC(AUUCBuilder bldr, boolean trueProbabilities, AUUCType auucType) {
ArrayUtils.interpolateLinear(_uplift[i]);
}
}
if (trueProbabilities) {
_auucs = computeAuucs();
_maxIdx = _auucType.maxCriterionIdx(this);
} else {
_maxIdx = 0;
}

public void setUpliftRandom(){
for(int i=0; i<AUUCType.VALUES.length; i++) {
int maxIndex = _nBins-1;
double a = _uplift[i][maxIndex]/_frequencyCumsum[maxIndex];
for (int j = 0; j < _nBins; j++) {
_upliftRandom[i][j] = a * _frequencyCumsum[j];
}
}
}

Expand Down Expand Up @@ -157,40 +186,74 @@ public static double[] calculateQuantileThresholds(int groups, Vec preds) {
return quantiles;
}

private double computeAuuc(){
private double computeAuuc(double[] uplift){
double area = 0;
for( int i = 0; i < _nBins; i++ ) {
area += uplift(i) * frequency(i);
area += uplift[i] * frequency(i);
}
return area/(_n+1);
}

private double[] computeAuucs(){
return computeAuucs(_uplift);
}

private double[] computeAuucsRandom(){
return computeAuucs(_upliftRandom);
}

private double[] computeAuucs(double[][] uplift){
AUUCType[] auucTypes = AUUCType.VALUES;
double[] auucs = new double[auucTypes.length];
for(int i = 0; i < auucTypes.length; i++ ) {
double area = 0;
for(int j = 0; j < _nBins; j++) {
area += _uplift[i][j] * frequency(j);
area += uplift[i][j] * frequency(j);
}
auucs[i] = area/(_n+1);
}
return auucs;
}

private double[] computeAecu(){
double[] aecu = new double[_auucs.length];
for(int i = 0; i < _auucs.length; i++){
aecu[i] = auuc(i) - auucRandom(i);
}
return aecu;
}

public double auucByType(AUUCType type){
int idx = Arrays.asList(AUUC.AUUCType.VALUES).indexOf(type);
return auuc(idx);
}

public double auuc(int idx){
return _auucs[idx];

public double auucRandomByType(AUUCType type){
int idx = Arrays.asList(AUUC.AUUCType.VALUES).indexOf(type);
return auucRandom(idx);
}

public double aecuByType(AUUCType type){
int idx = Arrays.asList(AUUC.AUUCType.VALUES).indexOf(type);
return aecu(idx);
}

public double auuc(int idx){ return _auucs[idx]; }

public double auuc(){
return auuc(_auucTypeIndx);
public double auuc(){ return auuc(_auucTypeIndx); }

public double auucRandom(int idx){
return _auucsRandom[idx];
}

public double auucRandom(){ return auucRandom(_auucTypeIndx); }

public double aecu(int idx) { return _aecu[idx];}

public double aecu(){ return aecu(_auucTypeIndx);}

public double qini(){ return aecuByType(AUUCType.qini);}

private static class AUUCImpl extends MRTask<AUUCImpl> {
final double[] _thresholds;
AUUCBuilder _bldr;
Expand Down Expand Up @@ -283,28 +346,39 @@ private String toDebugString() {
* from the basic parts, and from an AUUC at a given threshold index.
*/
public enum AUUCType {
AUTO() { @Override double exec(long treatment, long control, long yTreatment, long yControl) {
return qini.exec(treatment, control, yTreatment, yControl);
} },
qini() { @Override double exec(long treatment, long control, long yTreatment, long yControl) {
double norm = treatment / (double)control;
return yTreatment - yControl * norm;
} },
lift() { @Override double exec(long treatment, long control, long yTreatment, long yControl) {
return yTreatment / (double)treatment - yControl / (double)control;
} },
gain() { @Override double exec(long treatment, long control, long yTreatment, long yControl) {
return lift.exec(treatment, control, yTreatment, yControl) * (double)(treatment + control);
} };
AUTO() {
@Override
double exec(long treatment, long control, long yTreatment, long yControl) {
return qini.exec(treatment, control, yTreatment, yControl);
}
},
qini() {
@Override
double exec(long treatment, long control, long yTreatment, long yControl) {
double norm = treatment / (double)control;
return yTreatment - yControl * norm;
}
},
lift() {
@Override
double exec(long treatment, long control, long yTreatment, long yControl) {
return yTreatment / (double) treatment - yControl / (double)control;
}
},
gain() {
@Override
double exec(long treatment, long control, long yTreatment, long yControl) {
return lift.exec(treatment, control, yTreatment, yControl) * (double)(treatment + control);}
};

/** @param threshold
/** @param treatment
* @param control
* @param yTreatment
* @param yControl
* @return metric value */
abstract double exec(long threshold, long control, long yTreatment, long yControl );
public double exec( AUUC auc, int idx ) { return exec(auc.treatment(idx),auc.control(idx),auc.yTreatment(idx),auc.yControl(idx)); }

abstract double exec(long treatment, long control, long yTreatment, long yControl );
public double exec(AUUC auc, int idx) { return exec(auc.treatment(idx),auc.control(idx),auc.yTreatment(idx),auc.yControl(idx)); }
public static final AUUCType[] VALUES = values();

public static AUUCType fromString(String strRepr) {
Expand All @@ -316,15 +390,15 @@ public static AUUCType fromString(String strRepr) {
return null;
}

public double maxCriterion(AUUC auuc ) { return exec(auuc, maxCriterionIdx(auuc)); }
public double maxCriterion(AUUC auuc) { return exec(auuc, maxCriterionIdx(auuc)); }

/** Convert a criterion into a threshold index that maximizes the criterion
* @return Threshold index that maximizes the criterion
*/
public int maxCriterionIdx(AUUC auuc ) {
public int maxCriterionIdx(AUUC auuc) {
double md = -Double.MAX_VALUE;
int mx = -1;
for( int i=0; i<auuc._nBins; i++ ) {
for( int i=0; i<auuc._nBins; i++) {
double d = exec(auuc,i);
if( d > md ) {
md = d;
Expand Down
4 changes: 4 additions & 0 deletions h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ public String toString() {
}

public double auuc() {return _auuc.auuc();}

public double auucRandom(){return _auuc.auucRandom();}

public double qini(){return _auuc.qini();}

@Override
protected StringBuilder appendToStringMetrics(StringBuilder sb) {
Expand Down
2 changes: 2 additions & 0 deletions h2o-core/src/main/java/hex/ScoreKeeper.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public class ScoreKeeper extends Iced {
public double _anomaly_score = Double.NaN;
public double _anomaly_score_normalized = Double.NaN;
public double _AUUC;
public double _qini;

public ScoreKeeper() {}

Expand Down Expand Up @@ -111,6 +112,7 @@ else if (m instanceof ModelMetricsMultinomial) {
((ScoreKeeperAware) m).fillTo(this);
} else if (m instanceof ModelMetricsBinomialUplift){
_AUUC = ((ModelMetricsBinomialUplift)m).auuc();
_qini = ((ModelMetricsBinomialUplift)m).qini();
}
if (m._custom_metric != null )
_custom_metric = m._custom_metric.value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ public class ModelMetricsBinomialUpliftV3<I extends ModelMetricsBinomialUplift,
@API(help="The default AUUC for this scoring run.", direction=API.Direction.OUTPUT)
public double AUUC;

@API(help="The Qini value for this scoring run.", direction=API.Direction.OUTPUT)
public double qini;

@API(help="The class labels of the response.", direction=API.Direction.OUTPUT)
public String[] domain;

Expand All @@ -25,57 +28,63 @@ public class ModelMetricsBinomialUpliftV3<I extends ModelMetricsBinomialUplift,
@API(help = "Table of all types of AUUC.", direction = API.Direction.OUTPUT, level = API.Level.secondary)
public TwoDimTableV3 auuc_table;

@API(help = "Table of all types of AECU values.", direction = API.Direction.OUTPUT, level = API.Level.secondary)
public TwoDimTableV3 aecu_table;

@Override
public S fillFromImpl(ModelMetricsBinomialUplift modelMetrics) {
super.fillFromImpl(modelMetrics);

AUUC auuc = modelMetrics._auuc;
if (null != auuc) {
AUUC = auuc.auuc();

qini = auuc.qini();
// Fill TwoDimTable
String[] thresholds = new String[auuc._nBins];
AUUCType metrics[] = AUUCType.VALUES;
metrics = ArrayUtils.remove(metrics, Arrays.asList(metrics).indexOf(AUUCType.AUTO));
int metricsLength = metrics.length;
long[] n = new long[auuc._nBins];
double[][] uplift = new double[metricsLength][];
double[][] upliftRandom = new double[metricsLength][];
for( int i = 0; i < auuc._nBins; i++ ) {
thresholds[i] = Double.toString(auuc._ths[i]);
n[i] = auuc._frequencyCumsum[i];
}
String[] colHeaders = new String[metricsLength + 3];
String[] colHeadersMax = new String[metricsLength + 3];
String[] types = new String[metricsLength + 3];
String[] formats = new String[metricsLength + 3];
String[] colHeaders = new String[2 * metricsLength + 3];
String[] types = new String[2 * metricsLength + 3];
String[] formats = new String[2 * metricsLength + 3];
colHeaders[0] = "thresholds";
types[0] = "double";
formats[0] = "%f";
int i;
for(i = 0; i < metricsLength; i++) {
if (colHeadersMax.length > i) colHeadersMax[i] = "max " + metrics[i].toString();
colHeaders[i+1] = metrics[i].toString();
colHeaders[i + 1] = metrics[i].toString();
colHeaders[(i + 1 + metricsLength)] = metrics[i].toString()+"_random";
uplift[i] = auuc.upliftByType(metrics[i]);
types [i+1] = "double";
formats [i+1] = "%f";
upliftRandom[i] = auuc.upliftRandomByType(metrics[i]);
types [i + 1] = "double";
formats [i + 1] = "%f";
types [i + 1 + metricsLength] = "double";
formats [i + 1 + metricsLength] = "%f";
}
colHeaders[i + 1] = "n"; types[i+1] = "int"; formats[i+1] = "%d";
colHeaders[i + 2] = "idx"; types[i+2] = "int"; formats[i+2] = "%d";
colHeaders[i + 1 + metricsLength] = "n"; types[i + 1 + metricsLength] = "int"; formats[i + 1 + metricsLength] = "%d";
colHeaders[i + 2 + metricsLength] = "idx"; types[i + 2 + metricsLength] = "int"; formats[i + 2 + metricsLength] = "%d";
TwoDimTable thresholdsByMetrics = new TwoDimTable("Metrics for Thresholds", "Cumulative Uplift metrics for a given percentile", new String[auuc._nBins], colHeaders, types, formats, null );
for(i = 0; i < auuc._nBins; i++) {
int j = 0;
thresholdsByMetrics.set(i, j, Double.valueOf(thresholds[i]));
for (j = 0; j < metricsLength; j++) {
double d = uplift[j][i];
thresholdsByMetrics.set(i, 1 + j, d);
thresholdsByMetrics.set(i, 1 + j, uplift[j][i]);
thresholdsByMetrics.set(i, 1 + j + metricsLength, upliftRandom[j][i]);
}
thresholdsByMetrics.set(i, 1 + j, n[i]);
thresholdsByMetrics.set(i, 2 + j, i);
thresholdsByMetrics.set(i, 1 + j + metricsLength, n[i]);
thresholdsByMetrics.set(i, 2 + j + metricsLength, i);
}
this.thresholds_and_metric_scores = new TwoDimTableV3().fillFromImpl(thresholdsByMetrics);

// fill AUUC table
String[] rowHeaders = new String[]{"AUUC value"};
String[] rowHeaders = new String[]{"AUUC value", "AUUC random value"};
String[] metricNames = EnumUtils.getNames(AUUCType.class);
colHeaders = ArrayUtils.remove(metricNames, Arrays.asList(metricNames).indexOf("AUTO"));
types = new String[metricsLength];
Expand All @@ -84,11 +93,19 @@ public S fillFromImpl(ModelMetricsBinomialUplift modelMetrics) {
types[i] = "double";
formats[i] = "%f";
}
TwoDimTable auucs = new TwoDimTable("AUUC table", "All types of AUUC", rowHeaders, colHeaders, types, formats, "AUUC type" );
TwoDimTable auucs = new TwoDimTable("AUUC table", "All types of AUUC value", rowHeaders, colHeaders, types, formats, "Uplift type" );
for(i = 0; i < metricsLength; i++) {
auucs.set(0, i, auuc.auucByType(metrics[i]));
auucs.set(1, i, auuc.auucRandomByType(metrics[i]));
}
this.auuc_table = new TwoDimTableV3().fillFromImpl(auucs);

rowHeaders = new String[]{"AECU value"};
TwoDimTable qinis = new TwoDimTable("AECU values table", "All types of AECU value", rowHeaders, colHeaders, types, formats, "Uplift type" );
for(i = 0; i < metricsLength; i++) {
qinis.set(0, i, auuc.aecuByType(metrics[i]));
}
this.aecu_table = new TwoDimTableV3().fillFromImpl(qinis);
}
return (S) this;
}
Expand Down
Loading