diff --git a/core/src/main/java/feast/core/dao/FeatureStatisticsRepository.java b/core/src/main/java/feast/core/dao/FeatureStatisticsRepository.java new file mode 100644 index 00000000000..4046295b75e --- /dev/null +++ b/core/src/main/java/feast/core/dao/FeatureStatisticsRepository.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.dao; + +import feast.core.model.Feature; +import feast.core.model.FeatureStatistics; +import java.util.Date; +import java.util.Optional; +import org.springframework.data.jpa.repository.JpaRepository; + +/** JPA repository supplying Statistics objects for features keyed by id. */ +public interface FeatureStatisticsRepository extends JpaRepository { + Optional findFeatureStatisticsByFeatureAndDatasetId( + Feature feature, String datasetId); + + Optional findFeatureStatisticsByFeatureAndDate(Feature feature, Date date); +} diff --git a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java index db65328b821..7a81690a582 100644 --- a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java +++ b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java @@ -25,31 +25,9 @@ import feast.core.service.AccessManagementService; import feast.core.service.JobService; import feast.core.service.SpecService; +import feast.core.service.StatsService; import feast.proto.core.CoreServiceGrpc.CoreServiceImplBase; -import feast.proto.core.CoreServiceProto.ApplyFeatureSetRequest; -import feast.proto.core.CoreServiceProto.ApplyFeatureSetResponse; -import feast.proto.core.CoreServiceProto.ArchiveProjectRequest; -import feast.proto.core.CoreServiceProto.ArchiveProjectResponse; -import feast.proto.core.CoreServiceProto.CreateProjectRequest; -import feast.proto.core.CoreServiceProto.CreateProjectResponse; -import feast.proto.core.CoreServiceProto.GetFeastCoreVersionRequest; -import feast.proto.core.CoreServiceProto.GetFeastCoreVersionResponse; -import feast.proto.core.CoreServiceProto.GetFeatureSetRequest; -import feast.proto.core.CoreServiceProto.GetFeatureSetResponse; -import feast.proto.core.CoreServiceProto.ListFeatureSetsRequest; -import feast.proto.core.CoreServiceProto.ListFeatureSetsResponse; -import feast.proto.core.CoreServiceProto.ListIngestionJobsRequest; -import feast.proto.core.CoreServiceProto.ListIngestionJobsResponse; -import feast.proto.core.CoreServiceProto.ListProjectsRequest; -import feast.proto.core.CoreServiceProto.ListProjectsResponse; -import feast.proto.core.CoreServiceProto.ListStoresRequest; -import feast.proto.core.CoreServiceProto.ListStoresResponse; -import feast.proto.core.CoreServiceProto.RestartIngestionJobRequest; -import feast.proto.core.CoreServiceProto.RestartIngestionJobResponse; -import feast.proto.core.CoreServiceProto.StopIngestionJobRequest; -import feast.proto.core.CoreServiceProto.StopIngestionJobResponse; -import feast.proto.core.CoreServiceProto.UpdateStoreRequest; -import feast.proto.core.CoreServiceProto.UpdateStoreResponse; +import feast.proto.core.CoreServiceProto.*; import io.grpc.Status; import io.grpc.StatusRuntimeException; import io.grpc.stub.StreamObserver; @@ -69,17 +47,20 @@ public class CoreServiceImpl extends CoreServiceImplBase { private SpecService specService; private AccessManagementService accessManagementService; private JobService jobService; + private StatsService statsService; @Autowired public CoreServiceImpl( SpecService specService, AccessManagementService accessManagementService, + StatsService statsService, JobService jobService, FeastProperties feastProperties) { this.specService = specService; this.accessManagementService = accessManagementService; this.jobService = jobService; this.feastProperties = feastProperties; + this.statsService = statsService; } @Override @@ -126,6 +107,32 @@ public void listFeatureSets( } } + @Override + public void getFeatureStatistics( + GetFeatureStatisticsRequest request, + StreamObserver responseObserver) { + try { + GetFeatureStatisticsResponse response = statsService.getFeatureStatistics(request); + responseObserver.onNext(response); + responseObserver.onCompleted(); + } catch (IllegalArgumentException e) { + log.error("Illegal arguments provided to GetFeatureStatistics method: ", e); + responseObserver.onError( + Status.INVALID_ARGUMENT + .withDescription(e.getMessage()) + .withCause(e) + .asRuntimeException()); + } catch (RetrievalException e) { + log.error("Unable to fetch feature set requested in GetFeatureStatistics method: ", e); + responseObserver.onError( + Status.NOT_FOUND.withDescription(e.getMessage()).withCause(e).asRuntimeException()); + } catch (Exception e) { + log.error("Exception has occurred in GetFeatureStatistics method: ", e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); + } + } + @Override public void listStores( ListStoresRequest request, StreamObserver responseObserver) { diff --git a/core/src/main/java/feast/core/model/FeatureStatistics.java b/core/src/main/java/feast/core/model/FeatureStatistics.java new file mode 100644 index 00000000000..e06e7d9a6f3 --- /dev/null +++ b/core/src/main/java/feast/core/model/FeatureStatistics.java @@ -0,0 +1,243 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.model; + +import com.google.protobuf.InvalidProtocolBufferException; +import java.io.*; +import java.util.Date; +import java.util.List; +import javax.persistence.*; +import javax.persistence.Entity; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.tensorflow.metadata.v0.*; + +@NoArgsConstructor +@Getter +@Setter +@Entity +@Table( + name = "feature_statistics", + indexes = { + @Index(name = "idx_feature_statistics_feature", columnList = "feature_id"), + @Index(name = "idx_feature_statistics_dataset_id", columnList = "datasetId"), + @Index(name = "idx_feature_statistics_date", columnList = "date"), + }) +public class FeatureStatistics { + @Id + @GeneratedValue(strategy = GenerationType.AUTO) + private int id; + + @ManyToOne private Feature feature; + + // Only one of these fields should be populated. + private String datasetId; + private Date date; + + // General statistics + private String featureType; + private long count; + private long numMissing; + private long minNumValues; + private long maxNumValues; + private float avgNumValues; + private long totalNumValues; + private byte[] numValuesHistogram; + + // Numeric statistics + private double mean; + private double stdev; + private long zeroes; + private double min; + private double max; + private double median; + private byte[] numericValueHistogram; + private byte[] numericValueQuantiles; + + // String statistics + @Column(name = "n_unique") + private long unique; + + private float averageLength; + private byte[] rankHistogram; + private byte[] topValues; + + // Byte statistics + private float minBytes; + private float maxBytes; + private float avgBytes; + + // Instantiates a Statistics object from a tensorflow metadata FeatureNameStatistics object and a + // dataset ID. + public static FeatureStatistics createForDataset( + Feature feature, FeatureNameStatistics featureNameStatistics, String datasetId) + throws IOException { + FeatureStatistics featureStatistics = FeatureStatistics.fromProto(featureNameStatistics); + featureStatistics.setFeature(feature); + featureStatistics.setDatasetId(datasetId); + return featureStatistics; + } + + // Instantiates a Statistics object from a tensorflow metadata FeatureNameStatistics object and a + // date. + public static FeatureStatistics createForDate( + Feature feature, FeatureNameStatistics featureNameStatistics, Date date) throws IOException { + FeatureStatistics featureStatistics = FeatureStatistics.fromProto(featureNameStatistics); + featureStatistics.setDate(date); + featureStatistics.setFeature(feature); + return featureStatistics; + } + + public FeatureNameStatistics toProto() throws InvalidProtocolBufferException { + FeatureNameStatistics.Builder featureNameStatisticsBuilder = + FeatureNameStatistics.newBuilder() + .setType(FeatureNameStatistics.Type.valueOf(featureType)) + .setPath(Path.newBuilder().addStep(feature.getName())); + CommonStatistics commonStatistics = + CommonStatistics.newBuilder() + .setNumNonMissing(count - numMissing) + .setNumMissing(numMissing) + .setMaxNumValues(maxNumValues) + .setMinNumValues(minNumValues) + .setTotNumValues(totalNumValues) + .setNumValuesHistogram(Histogram.parseFrom(numValuesHistogram)) + .build(); + + switch (featureNameStatisticsBuilder.getType()) { + case INT: + case FLOAT: + NumericStatistics numStats = + NumericStatistics.newBuilder() + .setCommonStats(commonStatistics) + .setMean(mean) + .setStdDev(stdev) + .setNumZeros(zeroes) + .setMin(min) + .setMax(max) + .setMedian(median) + .addHistograms(Histogram.parseFrom(numericValueHistogram)) + .addHistograms(Histogram.parseFrom(numericValueQuantiles)) + .build(); + featureNameStatisticsBuilder.setNumStats(numStats); + break; + case STRING: + StringStatistics.Builder stringStats = + StringStatistics.newBuilder() + .setCommonStats(commonStatistics) + .setUnique(unique) + .setAvgLength(averageLength); + if (rankHistogram == null) { + stringStats.setRankHistogram(RankHistogram.getDefaultInstance()); + } else { + stringStats.setRankHistogram(RankHistogram.parseFrom(rankHistogram)); + } + try (ByteArrayInputStream bis = new ByteArrayInputStream(topValues)) { + ObjectInputStream ois = new ObjectInputStream(bis); + List freqAndValueList = + (List) ois.readObject(); + stringStats.addAllTopValues(freqAndValueList); + } catch (IOException | ClassNotFoundException e) { + throw new InvalidProtocolBufferException( + "Failed to parse field: StringStatistics.TopValues. Check if the value is malformed."); + } + featureNameStatisticsBuilder.setStringStats(stringStats); + break; + case BYTES: + BytesStatistics bytesStats = + BytesStatistics.newBuilder() + .setCommonStats(commonStatistics) + .setAvgNumBytes(avgBytes) + .setMinNumBytes(minBytes) + .setMaxNumBytes(maxBytes) + .build(); + featureNameStatisticsBuilder.setBytesStats(bytesStats); + break; + case STRUCT: + StructStatistics structStats = + StructStatistics.newBuilder().setCommonStats(commonStatistics).build(); + featureNameStatisticsBuilder.setStructStats(structStats); + break; + } + return featureNameStatisticsBuilder.build(); + } + + private static FeatureStatistics fromProto(FeatureNameStatistics featureNameStatistics) + throws IOException, IllegalArgumentException { + FeatureStatistics featureStatistics = new FeatureStatistics(); + featureStatistics.setFeatureType(featureNameStatistics.getType().toString()); + CommonStatistics commonStats; + switch (featureNameStatistics.getType()) { + case FLOAT: + case INT: + NumericStatistics numStats = featureNameStatistics.getNumStats(); + commonStats = numStats.getCommonStats(); + featureStatistics.setMean(numStats.getMean()); + featureStatistics.setStdev(numStats.getStdDev()); + featureStatistics.setZeroes(numStats.getNumZeros()); + featureStatistics.setMin(numStats.getMin()); + featureStatistics.setMax(numStats.getMax()); + featureStatistics.setMedian(numStats.getMedian()); + for (Histogram histogram : numStats.getHistogramsList()) { + switch (histogram.getType()) { + case STANDARD: + featureStatistics.setNumericValueHistogram(histogram.toByteArray()); + case QUANTILES: + featureStatistics.setNumericValueQuantiles(histogram.toByteArray()); + default: + // invalid type, dropping the values + } + } + break; + case STRING: + StringStatistics stringStats = featureNameStatistics.getStringStats(); + commonStats = stringStats.getCommonStats(); + featureStatistics.setUnique(stringStats.getUnique()); + featureStatistics.setAverageLength(stringStats.getAvgLength()); + featureStatistics.setRankHistogram(stringStats.getRankHistogram().toByteArray()); + try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { + ObjectOutputStream oos = new ObjectOutputStream(bos); + oos.writeObject(stringStats.getTopValuesList()); + featureStatistics.setTopValues(bos.toByteArray()); + } + break; + case BYTES: + BytesStatistics bytesStats = featureNameStatistics.getBytesStats(); + commonStats = bytesStats.getCommonStats(); + featureStatistics.setUnique(bytesStats.getUnique()); + featureStatistics.setMaxBytes(bytesStats.getMaxNumBytes()); + featureStatistics.setMinBytes(bytesStats.getMinNumBytes()); + featureStatistics.setAvgBytes(bytesStats.getAvgNumBytes()); + break; + case STRUCT: + StructStatistics structStats = featureNameStatistics.getStructStats(); + commonStats = structStats.getCommonStats(); + break; + default: + throw new IllegalArgumentException("Feature statistics provided were of unknown type."); + } + featureStatistics.setCount(commonStats.getNumMissing() + commonStats.getNumNonMissing()); + featureStatistics.setNumMissing(commonStats.getNumMissing()); + featureStatistics.setMinNumValues(commonStats.getMinNumValues()); + featureStatistics.setMaxNumValues(commonStats.getMaxNumValues()); + featureStatistics.setAvgNumValues(commonStats.getAvgNumValues()); + featureStatistics.setTotalNumValues(commonStats.getTotNumValues()); + featureStatistics.setNumValuesHistogram(commonStats.getNumValuesHistogram().toByteArray()); + + return featureStatistics; + } +} diff --git a/core/src/main/java/feast/core/service/StatsService.java b/core/src/main/java/feast/core/service/StatsService.java new file mode 100644 index 00000000000..12111866e60 --- /dev/null +++ b/core/src/main/java/feast/core/service/StatsService.java @@ -0,0 +1,635 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.service; + +import static java.lang.Math.*; + +import com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Timestamp; +import feast.core.dao.FeatureSetRepository; +import feast.core.dao.FeatureStatisticsRepository; +import feast.core.dao.StoreRepository; +import feast.core.exception.RetrievalException; +import feast.core.model.*; +import feast.core.model.Feature; +import feast.proto.core.CoreServiceProto.GetFeatureStatisticsRequest; +import feast.proto.core.CoreServiceProto.GetFeatureStatisticsResponse; +import feast.proto.core.StoreProto; +import feast.proto.core.StoreProto.Store.StoreType; +import feast.storage.api.statistics.FeatureSetStatistics; +import feast.storage.api.statistics.StatisticsRetriever; +import feast.storage.connectors.bigquery.statistics.BigQueryStatisticsRetriever; +import java.io.IOException; +import java.time.Instant; +import java.util.*; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.tensorflow.metadata.v0.*; +import org.tensorflow.metadata.v0.FeatureNameStatistics.Builder; + +/** Facilitates the retrieval of feature set statistics from historical stores. */ +@Slf4j +@Service +public class StatsService { + + private StoreRepository storeRepository; + private FeatureStatisticsRepository featureStatisticsRepository; + private FeatureSetRepository featureSetRepository; + + @Autowired + public StatsService( + StoreRepository storeRepository, + FeatureStatisticsRepository featureStatisticsRepository, + FeatureSetRepository featureSetRepository) { + this.storeRepository = storeRepository; + this.featureStatisticsRepository = featureStatisticsRepository; + this.featureSetRepository = featureSetRepository; + } + + /** + * Get {@link DatasetFeatureStatistics} for the requested feature set in the provided datasets or + * date range for the store provided. The {@link DatasetFeatureStatistics} will contain a list of + * {@link FeatureNameStatistics} for each feature requested. Results retrieved will be cached + * indefinitely. To force Feast to recompute the statistics, set forceRefresh to true. + * + *

Only one of ingestionIds or startDate/endDate should be provided. If both are provided, the + * former will be used over the latter. + * + *

If multiple datasetIds or if the date ranges over a few days, statistics will be retrieved + * for each single unit (dataset id or day) and results aggregated across that set. As a result of + * this, in such a scenario, statistics that cannot be aggregated will be dropped. This includes + * all histograms and quantiles, unique values, and top value counts. + * + * @param request {@link GetFeatureStatisticsRequest} containing feature set name, subset of + * features, dataset ids or date range, and store to retrieve the data from. + * @return {@link GetFeatureStatisticsResponse} containing {@link DatasetFeatureStatistics} with + * the feature statistics requested. + * @throws IOException + */ + @Transactional + public GetFeatureStatisticsResponse getFeatureStatistics(GetFeatureStatisticsRequest request) + throws IOException { + + // Validate the request + validateRequest(request); + + // Get the stats retriever for the store requested + StatisticsRetriever statisticsRetriever = getStatisticsRetriever(request.getStore()); + + // 1. Retrieve the feature set spec from the db + FeatureSet featureSet = getFeatureSet(request.getFeatureSetId()); + if (featureSet == null) { + throw new IllegalArgumentException( + String.format( + "Illegal request. Unable to find feature set %s", request.getFeatureSetId())); + } + + // 2. Filter out the features requested by the user. If none are provided, + // use all features in the feature set. + List features = request.getFeaturesList(); + if (features.size() == 0) { + features = + featureSet.getFeatures().stream() + .filter(feature -> !feature.isArchived()) + .map(Feature::getName) + .collect(Collectors.toList()); + } + + // 3. Retrieve the statistics from the StatsRetriever. + List> featureNameStatisticsList = new ArrayList<>(); + if (request.getIngestionIdsCount() == 0) { + Timestamp endDate = request.getEndDate(); + Timestamp startDate = request.getStartDate(); + // If no dataset provided, retrieve by date + + long timestamp = startDate.getSeconds(); + while (timestamp < endDate.getSeconds()) { + List featureNameStatistics = + getFeatureNameStatisticsByDate( + statisticsRetriever, featureSet, features, timestamp, request.getForceRefresh()); + featureNameStatisticsList.add(featureNameStatistics); + timestamp += 86400; // advance by a day + } + if (featureNameStatisticsList.size() == 0) { + DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd"); + DateTime startDateTime = new DateTime(startDate.getSeconds() * 1000, DateTimeZone.UTC); + DateTime endDateTime = new DateTime(endDate.getSeconds() * 1000, DateTimeZone.UTC); + throw new RetrievalException( + String.format( + "Unable to find any data over provided dates [%s, %s)", + fmt.print(startDateTime), fmt.print(endDateTime))); + } + } else { + // else, retrieve by dataset + for (String datasetId : request.getIngestionIdsList()) { + List featureNameStatistics = + getFeatureNameStatisticsByDataset( + statisticsRetriever, featureSet, features, datasetId, request.getForceRefresh()); + featureNameStatisticsList.add(featureNameStatistics); + if (featureNameStatisticsList.size() == 0) { + throw new RetrievalException( + String.format( + "Unable to find any data over provided data sets %s", + request.getIngestionIdsList())); + } + } + } + + // Merge statistics values across days/datasets + List featureNameStatistics = mergeStatistics(featureNameStatisticsList); + long totalCount = getTotalCount(featureNameStatistics.get(0)); + return GetFeatureStatisticsResponse.newBuilder() + .setDatasetFeatureStatisticsList( + DatasetFeatureStatisticsList.newBuilder() + .addDatasets( + DatasetFeatureStatistics.newBuilder() + .setNumExamples(totalCount) + .addAllFeatures(featureNameStatistics))) + .build(); + } + + /** + * Get {@link FeatureNameStatistics} by dataset id. + * + * @param statisticsRetriever {@link StatisticsRetriever} corresponding to the store to get the + * data from. + * @param featureSet {@link FeatureSet} requested by the user + * @param features features to retrieve + * @param datasetId dataset id to subset the data by + * @param forceRefresh whether to override the values in the cache + * @return {@link FeatureNameStatistics} for the data within the dataset id provided + * @throws IOException + */ + private List getFeatureNameStatisticsByDataset( + StatisticsRetriever statisticsRetriever, + FeatureSet featureSet, + List features, + String datasetId, + boolean forceRefresh) + throws IOException { + List featureNameStatistics = new ArrayList<>(); + List featuresMissingStats = new ArrayList<>(); + Map featureNameToFeature = + featureSet.getFeatures().stream().collect(Collectors.toMap(Feature::getName, f -> f)); + + // For each feature requested, check if statistics already exist in the cache + // If not refreshing data in the cache, retrieve the cached data and add it to the + // list of FeatureNameStatistics for this dataset. + // Else, add to the list of features we still need to retrieve statistics for. + for (String featureName : features) { + Feature feature = featureNameToFeature.get(featureName); + Optional cachedFeatureStatistics = Optional.empty(); + if (!forceRefresh) { + cachedFeatureStatistics = + featureStatisticsRepository.findFeatureStatisticsByFeatureAndDatasetId( + feature, datasetId); + } + if (cachedFeatureStatistics.isPresent()) { + featureNameStatistics.add(cachedFeatureStatistics.get().toProto()); + } else { + featuresMissingStats.add(featureName); + } + } + + // Retrieve the balance of statistics after checking the cache, and add it to the + // list of FeatureNameStatistics. + if (featuresMissingStats.size() > 0) { + FeatureSetStatistics featureSetStatistics = + statisticsRetriever.getFeatureStatistics( + featureSet.toProto().getSpec(), featuresMissingStats, datasetId); + + // Persist the newly retrieved statistics in the cache. + for (FeatureNameStatistics stat : featureSetStatistics.getFeatureNameStatistics()) { + if (isEmpty(stat)) { + continue; + } + Feature feature = featureNameToFeature.get(stat.getName()); + FeatureStatistics featureStatistics = + FeatureStatistics.createForDataset(feature, stat, datasetId); + Optional existingRecord = + featureStatisticsRepository.findFeatureStatisticsByFeatureAndDatasetId( + featureStatistics.getFeature(), datasetId); + existingRecord.ifPresent(statistics -> featureStatistics.setId(statistics.getId())); + featureStatisticsRepository.save(featureStatistics); + featureNameStatistics.add(stat); + } + } + return featureNameStatistics; + } + + /** + * Get {@link FeatureNameStatistics} by date. + * + * @param statisticsRetriever {@link StatisticsRetriever} corresponding to the store to get the + * data from. + * @param featureSet {@link FeatureSet} requested by the user + * @param features features to retrieve + * @param timestamp timestamp of the date to subset the data + * @param forceRefresh whether to override the values in the cache + * @return {@link FeatureNameStatistics} for the data within the dataset id provided + * @throws IOException + */ + private List getFeatureNameStatisticsByDate( + StatisticsRetriever statisticsRetriever, + FeatureSet featureSet, + List features, + long timestamp, + boolean forceRefresh) + throws IOException { + Date date = Date.from(Instant.ofEpochSecond(timestamp)); + List featureNameStatistics = new ArrayList<>(); + List featuresMissingStats = new ArrayList<>(); + Map featureNameToFeature = + featureSet.getFeatures().stream().collect(Collectors.toMap(Feature::getName, f -> f)); + + // For each feature requested, check if statistics already exist in the cache + // If not refreshing data in the cache, retrieve the cached data and add it to the + // list of FeatureNameStatistics for this date. + // Else, add to the list of features we still need to retrieve statistics for. + for (String featureName : features) { + Feature feature = featureNameToFeature.get(featureName); + Optional cachedFeatureStatistics = Optional.empty(); + if (!forceRefresh) { + cachedFeatureStatistics = + featureStatisticsRepository.findFeatureStatisticsByFeatureAndDate(feature, date); + } + if (cachedFeatureStatistics.isPresent()) { + featureNameStatistics.add(cachedFeatureStatistics.get().toProto()); + } else { + featuresMissingStats.add(featureName); + } + } + + // Retrieve the balance of statistics after checking the cache, and add it to the + // list of FeatureNameStatistics. + if (featuresMissingStats.size() > 0) { + FeatureSetStatistics featureSetStatistics = + statisticsRetriever.getFeatureStatistics( + featureSet.toProto().getSpec(), + featuresMissingStats, + Timestamp.newBuilder().setSeconds(timestamp).build()); + + // Persist the newly retrieved statistics in the cache. + for (FeatureNameStatistics stat : featureSetStatistics.getFeatureNameStatistics()) { + if (isEmpty(stat)) { + continue; + } + Feature feature = featureNameToFeature.get(stat.getName()); + FeatureStatistics featureStatistics = FeatureStatistics.createForDate(feature, stat, date); + Optional existingRecord = + featureStatisticsRepository.findFeatureStatisticsByFeatureAndDate( + featureStatistics.getFeature(), date); + existingRecord.ifPresent(statistics -> featureStatistics.setId(statistics.getId())); + featureStatisticsRepository.save(featureStatistics); + featureNameStatistics.add(stat); + } + } + return featureNameStatistics; + } + + /** + * Get the {@link StatisticsRetriever} corresponding to the store name provided. + * + * @param storeName name of the store to retrieve statistics from + * @return {@link StatisticsRetriever} + */ + StatisticsRetriever getStatisticsRetriever(String storeName) + throws InvalidProtocolBufferException { + Store store = + storeRepository + .findById(storeName) + .orElseThrow( + () -> + new RetrievalException( + String.format("Could not find store with name %s", storeName))); + StoreProto.Store storeProto = store.toProto(); + if (storeProto.getType() != StoreType.BIGQUERY) { + throw new IllegalArgumentException( + String.format( + "Invalid store %s with type %s specified. Batch statistics are only supported for BigQuery stores", + store.getName(), store.getType())); + } + return BigQueryStatisticsRetriever.create(storeProto.getBigqueryConfig()); + } + + private FeatureSet getFeatureSet(String featureSetId) { + String[] split = featureSetId.split("/"); + String project; + String featureSetName; + if (split.length == 1) { + project = Project.DEFAULT_NAME; + featureSetName = split[0]; + } else { + project = split[0]; + featureSetName = split[1]; + } + FeatureSet featureSet = + featureSetRepository.findFeatureSetByNameAndProject_Name(featureSetName, project); + return featureSet; + } + + /** + * Merge feature statistics by name. This method is used to merge statistics retrieved over + * multiple days or datasets. + * + * @param featureNameStatistics {@link FeatureNameStatistics} retrieved from the store + * @return Merged list of {@link FeatureNameStatistics} by name + */ + @VisibleForTesting + public List mergeStatistics( + List> featureNameStatistics) { + List unnestedList = new ArrayList<>(); + + featureNameStatistics.forEach(unnestedList::addAll); + Map> groupByPath = + unnestedList.stream() + .collect(Collectors.groupingBy(FeatureNameStatistics::getPath, Collectors.toList())); + + List merged = new ArrayList<>(); + for (Path key : groupByPath.keySet()) { + List featureNameStatisticsForKey = groupByPath.get(key); + if (featureNameStatisticsForKey.size() == 1) { + merged.add(featureNameStatisticsForKey.get(0)); + } else { + switch (featureNameStatisticsForKey.get(0).getType()) { + case INT: + case FLOAT: + merged.add(mergeNumStatistics(featureNameStatisticsForKey)); + break; + case STRING: + merged.add(mergeCategoricalStatistics(groupByPath.get(key))); + break; + case BYTES: + merged.add(mergeByteStatistics(groupByPath.get(key))); + break; + case STRUCT: + merged.add(mergeStructStats(groupByPath.get(key))); + break; + default: + throw new IllegalArgumentException( + "Statistics are only supported for string, boolean, bytes and numeric features"); + } + } + } + return merged; + } + + private FeatureNameStatistics mergeStructStats( + List featureNameStatisticsList) { + Builder mergedFeatureNameStatistics = + FeatureNameStatistics.newBuilder() + .setPath(featureNameStatisticsList.get(0).getPath()) + .setType(featureNameStatisticsList.get(0).getType()); + + long totalCount = 0; + long missingCount = 0; + long totalNumValues = 0; + long maxNumValues = + featureNameStatisticsList.get(0).getStructStats().getCommonStats().getMaxNumValues(); + long minNumValues = + featureNameStatisticsList.get(0).getStructStats().getCommonStats().getMinNumValues(); + + for (FeatureNameStatistics featureNameStatistics : featureNameStatisticsList) { + StructStatistics structStats = featureNameStatistics.getStructStats(); + totalCount += structStats.getCommonStats().getNumNonMissing(); + missingCount += structStats.getCommonStats().getNumMissing(); + totalNumValues += + structStats.getCommonStats().getAvgNumValues() + * structStats.getCommonStats().getNumNonMissing(); + maxNumValues = max(maxNumValues, structStats.getCommonStats().getMaxNumValues()); + minNumValues = min(minNumValues, structStats.getCommonStats().getMinNumValues()); + } + + StructStatistics mergedStructStatistics = + StructStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(totalNumValues) + .setNumNonMissing(totalCount) + .setAvgNumValues((float) totalNumValues / totalCount) + .setMaxNumValues(maxNumValues) + .setMinNumValues(minNumValues) + .setNumMissing(missingCount)) + .build(); + + return mergedFeatureNameStatistics.setStructStats(mergedStructStatistics).build(); + } + + private FeatureNameStatistics mergeNumStatistics( + List featureNameStatisticsList) { + Builder mergedFeatureNameStatistics = + FeatureNameStatistics.newBuilder() + .setPath(featureNameStatisticsList.get(0).getPath()) + .setType(featureNameStatisticsList.get(0).getType()); + + FeatureNameStatistics first = featureNameStatisticsList.remove(0); + double max = first.getNumStats().getMax(); + double min = first.getNumStats().getMin(); + double var = pow(first.getNumStats().getStdDev(), 2); + long totalCount = first.getNumStats().getCommonStats().getNumNonMissing(); + double totalVal = totalCount * first.getNumStats().getMean(); + long missingCount = first.getNumStats().getCommonStats().getNumMissing(); + long zeroes = first.getNumStats().getNumZeros(); + + for (FeatureNameStatistics featureNameStatistics : featureNameStatisticsList) { + NumericStatistics numStats = featureNameStatistics.getNumStats(); + max = max(numStats.getMax(), max); + min = min(numStats.getMin(), min); + long count = numStats.getCommonStats().getNumNonMissing(); + double sampleVar = pow(numStats.getStdDev(), 2); + float aggMean = (float) totalVal / totalCount; + var = getVar(var, totalCount, aggMean, sampleVar, count, numStats.getMean()); + totalVal += numStats.getMean() * count; + totalCount += count; + missingCount += numStats.getCommonStats().getNumMissing(); + zeroes += numStats.getNumZeros(); + } + NumericStatistics mergedNumericStatistics = + NumericStatistics.newBuilder() + .setMax(max) + .setMin(min) + .setMean(totalVal / totalCount) + .setNumZeros(zeroes) + .setStdDev(sqrt(var)) + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(totalCount) + .setNumNonMissing(totalCount) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(missingCount)) + .build(); + return mergedFeatureNameStatistics.setNumStats(mergedNumericStatistics).build(); + } + + // Aggregation of sample variance follows the formula described here: + // https://www.tandfonline.com/doi/abs/10.1080/00031305.2014.966589 + private double getVar( + double s1Var, long s1Count, double s1Mean, double s2Var, long s2Count, double s2Mean) { + long totalCount = s1Count + s2Count; + return ((s1Count - 1) * s1Var + + (s2Count - 1) * s2Var + + ((float) s1Count * s2Count / totalCount) * pow(s1Mean - s2Mean, 2)) + / (s1Count + s2Count - 1); + } + + private FeatureNameStatistics mergeCategoricalStatistics( + List featureNameStatisticsList) { + Builder mergedFeatureNameStatistics = + FeatureNameStatistics.newBuilder() + .setPath(featureNameStatisticsList.get(0).getPath()) + .setType(featureNameStatisticsList.get(0).getType()); + long totalCount = 0; + long missingCount = 0; + long totalLen = 0; + for (FeatureNameStatistics featureNameStatistics : featureNameStatisticsList) { + StringStatistics stringStats = featureNameStatistics.getStringStats(); + totalCount += stringStats.getCommonStats().getNumNonMissing(); + missingCount += stringStats.getCommonStats().getNumMissing(); + totalLen += stringStats.getAvgLength() * stringStats.getCommonStats().getNumNonMissing(); + } + StringStatistics mergedStringStatistics = + StringStatistics.newBuilder() + .setAvgLength((float) totalLen / totalCount) + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(totalCount) + .setNumNonMissing(totalCount) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(missingCount)) + .build(); + return mergedFeatureNameStatistics.setStringStats(mergedStringStatistics).build(); + } + + private FeatureNameStatistics mergeByteStatistics( + List featureNameStatisticsList) { + Builder mergedFeatureNameStatistics = + FeatureNameStatistics.newBuilder() + .setPath(featureNameStatisticsList.get(0).getPath()) + .setType(featureNameStatisticsList.get(0).getType()); + + long totalCount = 0; + long missingCount = 0; + float totalNumBytes = 0; + float maxNumBytes = featureNameStatisticsList.get(0).getBytesStats().getMaxNumBytes(); + float minNumBytes = featureNameStatisticsList.get(0).getBytesStats().getMinNumBytes(); + + for (FeatureNameStatistics featureNameStatistics : featureNameStatisticsList) { + BytesStatistics bytesStats = featureNameStatistics.getBytesStats(); + totalCount += bytesStats.getCommonStats().getNumNonMissing(); + missingCount += bytesStats.getCommonStats().getNumMissing(); + totalNumBytes += bytesStats.getAvgNumBytes() * bytesStats.getCommonStats().getNumNonMissing(); + maxNumBytes = max(maxNumBytes, bytesStats.getMaxNumBytes()); + minNumBytes = min(minNumBytes, bytesStats.getMinNumBytes()); + } + + BytesStatistics mergedBytesStatistics = + BytesStatistics.newBuilder() + .setAvgNumBytes(totalNumBytes / totalCount) + .setMinNumBytes(minNumBytes) + .setMaxNumBytes(maxNumBytes) + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(totalCount) + .setNumNonMissing(totalCount) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(missingCount)) + .build(); + + return mergedFeatureNameStatistics.setBytesStats(mergedBytesStatistics).build(); + } + + private long getTotalCount(FeatureNameStatistics featureNameStatistics) { + CommonStatistics commonStats; + switch (featureNameStatistics.getType()) { + case STRUCT: + commonStats = featureNameStatistics.getStructStats().getCommonStats(); + break; + case STRING: + commonStats = featureNameStatistics.getStringStats().getCommonStats(); + break; + case BYTES: + commonStats = featureNameStatistics.getBytesStats().getCommonStats(); + break; + case FLOAT: + case INT: + commonStats = featureNameStatistics.getNumStats().getCommonStats(); + break; + default: + throw new RuntimeException("Unable to extract dataset size; Invalid type provided"); + } + return commonStats.getNumNonMissing() + commonStats.getNumMissing(); + } + + private void validateRequest(GetFeatureStatisticsRequest request) { + if (request.getIngestionIdsCount() == 0) { + Timestamp startDate = request.getStartDate(); + Timestamp endDate = request.getEndDate(); + if (!request.hasStartDate() || !request.hasEndDate()) { + throw new IllegalArgumentException( + "Invalid request. Either provide dataset ids to retrieve statistics over, or a start date and end date."); + } + if (endDate.getSeconds() < startDate.getSeconds()) { + throw new IllegalArgumentException( + String.format( + "Invalid request. Start timestamp %d is greater than the end timestamp %d", + startDate.getSeconds(), endDate.getSeconds())); + } + } + } + + private boolean isEmpty(FeatureNameStatistics featureNameStatistics) { + switch (featureNameStatistics.getType()) { + case STRUCT: + return featureNameStatistics + .getStructStats() + .getCommonStats() + .equals(CommonStatistics.getDefaultInstance()); + case STRING: + return featureNameStatistics + .getStringStats() + .getCommonStats() + .equals(CommonStatistics.getDefaultInstance()); + case BYTES: + return featureNameStatistics + .getBytesStats() + .getCommonStats() + .equals(CommonStatistics.getDefaultInstance()); + case FLOAT: + case INT: + return featureNameStatistics + .getNumStats() + .getCommonStats() + .equals(CommonStatistics.getDefaultInstance()); + default: + return true; + } + } +} diff --git a/core/src/test/java/feast/core/service/StatsServiceTest.java b/core/src/test/java/feast/core/service/StatsServiceTest.java new file mode 100644 index 00000000000..276d8df4b7c --- /dev/null +++ b/core/src/test/java/feast/core/service/StatsServiceTest.java @@ -0,0 +1,384 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.service; + +import static org.hamcrest.Matchers.equalTo; +import static org.junit.Assert.assertThat; +import static org.mockito.Mockito.*; +import static org.mockito.MockitoAnnotations.initMocks; + +import com.google.protobuf.Timestamp; +import feast.core.dao.FeatureSetRepository; +import feast.core.dao.FeatureStatisticsRepository; +import feast.core.dao.StoreRepository; +import feast.core.model.Project; +import feast.core.model.Store; +import feast.proto.core.CoreServiceProto.GetFeatureStatisticsRequest; +import feast.proto.core.StoreProto; +import feast.proto.core.StoreProto.Store.BigQueryConfig; +import feast.proto.core.StoreProto.Store.StoreType; +import feast.storage.connectors.bigquery.statistics.BigQueryStatisticsRetriever; +import java.io.IOException; +import java.util.Arrays; +import java.util.Optional; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.mockito.Mock; +import org.tensorflow.metadata.v0.*; +import org.tensorflow.metadata.v0.FeatureNameStatistics.Type; + +public class StatsServiceTest { + + private StatsService statsService; + @Mock private FeatureStatisticsRepository featureStatisticsRepository; + @Mock private StoreRepository storeRepository; + @Mock private FeatureSetRepository featureSetRepository; + + @Rule public final ExpectedException expectedException = ExpectedException.none(); + + @Before + public void setUp() { + initMocks(this); + statsService = + new StatsService(storeRepository, featureStatisticsRepository, featureSetRepository); + } + + @Test + public void shouldThrowExceptionIfNeitherDatesNorDatasetsProvided() throws IOException { + GetFeatureStatisticsRequest request = GetFeatureStatisticsRequest.newBuilder().build(); + + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage( + "Invalid request. Either provide dataset ids to retrieve statistics over, or a start date and end date."); + statsService.getFeatureStatistics(request); + } + + @Test + public void shouldThrowExceptionIfInvalidDatesProvided() throws IOException { + GetFeatureStatisticsRequest request = + GetFeatureStatisticsRequest.newBuilder() + .setStartDate(Timestamp.newBuilder().setSeconds(1)) + .setEndDate(Timestamp.newBuilder().setSeconds(0)) + .build(); + + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage( + "Invalid request. Start timestamp 1 is greater than the end timestamp 0"); + statsService.getFeatureStatistics(request); + } + + @Test + public void shouldThrowExceptionIfInvalidStoreProvided() throws IOException { + GetFeatureStatisticsRequest request = + GetFeatureStatisticsRequest.newBuilder() + .setStartDate(Timestamp.newBuilder().setSeconds(0)) + .setEndDate(Timestamp.newBuilder().setSeconds(1)) + .setStore("redis") + .build(); + + when(storeRepository.findById("redis")) + .thenReturn( + Optional.of( + Store.fromProto( + StoreProto.Store.newBuilder() + .setName("redis") + .setType(StoreType.REDIS) + .build()))); + + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage( + "Invalid store redis with type REDIS specified. Batch statistics are only supported for BigQuery stores"); + statsService.getFeatureStatistics(request); + } + + @Test + public void shouldThrowExceptionIfFeatureSetNotFound() throws IOException { + GetFeatureStatisticsRequest request = + GetFeatureStatisticsRequest.newBuilder() + .setStartDate(Timestamp.newBuilder().setSeconds(0)) + .setEndDate(Timestamp.newBuilder().setSeconds(1)) + .setStore("bigquery") + .setFeatureSetId("my_feature_set") + .build(); + + StoreProto.Store storeProto = + StoreProto.Store.newBuilder() + .setName("bigquery") + .setType(StoreType.BIGQUERY) + .setBigqueryConfig( + BigQueryConfig.newBuilder().setProjectId("project").setDatasetId("dataset")) + .build(); + when(storeRepository.findById("bigquery")).thenReturn(Optional.of(Store.fromProto(storeProto))); + when(featureSetRepository.findFeatureSetByNameAndProject_Name( + "my_feature_set", Project.DEFAULT_NAME)) + .thenReturn(null); + + statsService = spy(statsService); + BigQueryStatisticsRetriever retriever = mock(BigQueryStatisticsRetriever.class); + doReturn(retriever).when(statsService).getStatisticsRetriever(storeProto.getName()); + + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage("Illegal request. Unable to find feature set my_feature_set"); + statsService.getFeatureStatistics(request); + } + + @Test + public void shouldAggregateNumericStatistics() { + FeatureNameStatistics stat1 = + FeatureNameStatistics.newBuilder() + .setNumStats( + NumericStatistics.newBuilder() + .setMax(20) + .setMin(1) + .setMean(6) + .setNumZeros(0) + .setStdDev(7.90569415) + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(5) + .setNumNonMissing(5) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(0))) + .setType(Type.INT) + .setPath(Path.newBuilder().addStep("feature").build()) + .build(); + FeatureNameStatistics stat2 = + FeatureNameStatistics.newBuilder() + .setNumStats( + NumericStatistics.newBuilder() + .setMax(10) + .setMin(0) + .setMean(4) + .setNumZeros(1) + .setStdDev(3.807886553) + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(5) + .setNumNonMissing(5) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(1))) + .setPath(Path.newBuilder().addStep("feature").build()) + .setType(Type.INT) + .build(); + + FeatureNameStatistics expected = + FeatureNameStatistics.newBuilder() + .setNumStats( + NumericStatistics.newBuilder() + .setMax(20) + .setMin(0) + .setMean(5) + .setNumZeros(1) + .setStdDev(5.944184833146219) + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(10) + .setNumNonMissing(10) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(1))) + .setPath(Path.newBuilder().addStep("feature").build()) + .setType(Type.INT) + .build(); + + assertThat( + statsService.mergeStatistics(Arrays.asList(Arrays.asList(stat1, stat2))), + equalTo(Arrays.asList(expected))); + } + + @Test + public void shouldAggregateCategoricalStatistics() { + FeatureNameStatistics stat1 = + FeatureNameStatistics.newBuilder() + .setStringStats( + StringStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(5) + .setNumNonMissing(5) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(0)) + .setUnique(4) + .setAvgLength(6)) + .setType(Type.STRING) + .setPath(Path.newBuilder().addStep("feature").build()) + .build(); + FeatureNameStatistics stat2 = + FeatureNameStatistics.newBuilder() + .setStringStats( + StringStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(5) + .setNumNonMissing(5) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(1)) + .setUnique(4) + .setAvgLength(4)) + .setType(Type.STRING) + .setPath(Path.newBuilder().addStep("feature").build()) + .build(); + + FeatureNameStatistics expected = + FeatureNameStatistics.newBuilder() + .setStringStats( + StringStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(10) + .setNumNonMissing(10) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(1)) + .setAvgLength(5)) + .setType(Type.STRING) + .setPath(Path.newBuilder().addStep("feature").build()) + .build(); + assertThat( + statsService.mergeStatistics(Arrays.asList(Arrays.asList(stat1, stat2))), + equalTo(Arrays.asList(expected))); + } + + @Test + public void shouldAggregateBytesStatistics() { + FeatureNameStatistics stat1 = + FeatureNameStatistics.newBuilder() + .setBytesStats( + BytesStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(5) + .setNumNonMissing(5) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(0)) + .setUnique(4) + .setAvgNumBytes(6) + .setMaxNumBytes(10) + .setMinNumBytes(0)) + .setType(Type.BYTES) + .setPath(Path.newBuilder().addStep("feature").build()) + .build(); + FeatureNameStatistics stat2 = + FeatureNameStatistics.newBuilder() + .setBytesStats( + BytesStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(5) + .setNumNonMissing(5) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(1)) + .setUnique(4) + .setAvgNumBytes(4) + .setMaxNumBytes(20) + .setMinNumBytes(1)) + .setType(Type.BYTES) + .setPath(Path.newBuilder().addStep("feature").build()) + .build(); + + FeatureNameStatistics expected = + FeatureNameStatistics.newBuilder() + .setBytesStats( + BytesStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(10) + .setNumNonMissing(10) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(1)) + .setAvgNumBytes(5) + .setMaxNumBytes(20) + .setMinNumBytes(0)) + .setType(Type.BYTES) + .setPath(Path.newBuilder().addStep("feature").build()) + .build(); + assertThat( + statsService.mergeStatistics(Arrays.asList(Arrays.asList(stat1, stat2))), + equalTo(Arrays.asList(expected))); + } + + @Test + public void shouldAggregateStructStatistics() { + FeatureNameStatistics stat1 = + FeatureNameStatistics.newBuilder() + .setStructStats( + StructStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(5) + .setNumNonMissing(5) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(0))) + .setType(Type.STRUCT) + .setPath(Path.newBuilder().addStep("feature").build()) + .build(); + FeatureNameStatistics stat2 = + FeatureNameStatistics.newBuilder() + .setStructStats( + StructStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(5) + .setNumNonMissing(5) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(1))) + .setType(Type.STRUCT) + .setPath(Path.newBuilder().addStep("feature").build()) + .build(); + + FeatureNameStatistics expected = + FeatureNameStatistics.newBuilder() + .setStructStats( + StructStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setTotNumValues(10) + .setNumNonMissing(10) + .setAvgNumValues(1) + .setMaxNumValues(1) + .setMinNumValues(1) + .setNumMissing(1))) + .setType(Type.STRUCT) + .setPath(Path.newBuilder().addStep("feature").build()) + .build(); + assertThat( + statsService.mergeStatistics(Arrays.asList(Arrays.asList(stat1, stat2))), + equalTo(Arrays.asList(expected))); + } +} diff --git a/docs/assets/statistics-sources.png b/docs/assets/statistics-sources.png new file mode 100644 index 00000000000..02be233968d Binary files /dev/null and b/docs/assets/statistics-sources.png differ diff --git a/docs/user-guide/statistics.md b/docs/user-guide/statistics.md new file mode 100644 index 00000000000..cfb36dea1a8 --- /dev/null +++ b/docs/user-guide/statistics.md @@ -0,0 +1,164 @@ +# Statistics + +Data is a first-class citizen in machine learning projects, it is critical to have tests and validations around data. To that end, Feast avails various feature statistics to users in order to give users visibility into the data that has been ingested into the system. + +![overview](../assets/statistics-sources.png) + +Feast exposes feature statistics at two points in the Feast system: +1. Inflight feature statistics from the population job +2. Historical feature statistics from the warehouse stores + +## Historical Feature Statistics + +Feast supports the computation of feature statistics over data already written to warehouse stores. These feature statistics, which can be retrieved over distinct sets of historical data, are fully compatible with [TFX's Data Validation](https://tensorflow.google.cn/tfx/tutorials/data_validation/tfdv_basic). + +### Retrieving Statistics + +Statistics can be retrieved from Feast using the python SDK's `get_statistics` method. This requires a connection to Feast core. + +Feature statistics can be retrieved for a single feature set, from a single valid warehouse store. Users can opt to either retrieve feature statistics for a discrete subset of data by providing an `ingestion_id` , a unique id generated for a dataset when it is ingested into feast: + +```{python} +# A unique ingestion id is returned for each batch ingestion +ingestion_id=client.ingest(feature_set,df) + +stats = client.get_statistics( + feature_set_id='project/feature_set', + store='warehouse', + features=['feature_1', 'feature_2'], + ingestion_ids=[ingestion_id]) +``` + +Or by selecting data within a time range by providing a `start_date` and `end_date` (the start date is inclusive, the end date is not): + +```{python} +start_date=datetime(2020,10,1,0,0,0) +end_date=datetime(2020,10,2,0,0,0) + +stats = client.get_statistics( +feature_set_id = 'project/feature_set', + store='warehouse', + features=['feature_1', 'feature_2'], + start_date=start_date, + end_date=end_date) +``` + +{% hint style="info" %} +Although `get_statistics` accepts python `datetime` objects for `start_date` and `end_date`, statistics are computed at the day granularity. +{% endhint %} + +Note that when providing a time range, Feast will NOT filter out duplicated rows. It is therefore highly recommended to provide `ingestion_id`s whenever possible. + +Feast returns the statistics in the form of the protobuf [DatasetFeatureStatisticsList](https://github.com/tensorflow/metadata/blob/master/tensorflow_metadata/proto/v0/statistics.proto#L36), which can be subsequently passed to TFDV methods to [validate the dataset](https://www.tensorflow.org/tfx/data_validation/get_started#checking_the_data_for_errors)... + +```{python} +anomalies = tfdv.validate_statistics( + statistics=stats_2, schema=feature_set.export_tfx_schema()) +tfdv.display_anomalies(anomalies) +``` + +Or [visualise the statistics](https://www.tensorflow.org/tfx/data_validation/get_started#computing_descriptive_data_statistics) in [facets](https://github.com/PAIR-code/facets). + +```{python} +tfdv.visualize_statistics(stats) +``` + +Refer to the [example notebook](https://github.com/feast-dev/feast/blob/master/examples/statistics/Historical%20Feature%20Statistics%20with%20Feast,%20TFDV%20and%20Facets.ipynb) for an end-to-end example showcasing Feast's integration with TFDV and Facets. + +### Aggregating Statistics + +Feast supports retrieval of feature statistics across multiple datasets or days. + +```{python} +stats = client.get_statistics( + feature_set_id='project/feature_set', + store='warehouse', + features=['feature_1', 'feature_2'], + ingestion_ids=[ingestion_id_1, ingestion_id_2]) +``` + +However, when querying across multiple datasets, Feast computes the statistics for each dataset independently (for caching purposes), and aggregates the results. As a result of this, certain un-aggregatable statistics are dropped in the process, such as medians, uniqueness counts, and histograms. + +Refer to the table below for the list of statistics that will be dropped. + +### Caching + +Feast caches the results of all feature statistics requests, and will, by default, retrieve and return the cached results. To recompute previously computed feature statistics, set `force_refresh` to `true` when retrieving the statistics: + +```{python} +stats=client.get_statistics( + feature_set_id='project/feature_set', + store='warehouse', + features=['feature_1', 'feature_2'], + dataset_ids=[dataset_id], + force_refresh=True) +``` + +This will force Feast to recompute the statistics, and replace any previously cached values. + +### Supported Statistics + +Feast supports most, but not all of the feature statistics defined in TFX's [FeatureNameStatistics](https://github.com/tensorflow/metadata/blob/master/tensorflow_metadata/proto/v0/statistics.proto#L147). For the definition of each statistic and information about how each one is computed, refer to the [protobuf definition](https://github.com/tensorflow/metadata/blob/master/tensorflow_metadata/proto/v0/statistics.proto#L147). + +| Type | Statistic | Supported | Aggregateable | +| --- | --- | --- | --- | +| Common | NumNonMissing | ✔ | ✔ | +| | NumMissing | ✔ | ✔ | +| | MinNumValues | ✔ | ✔ | +| | MaxNumValues | ✔ | ✔ | +| | AvgNumValues | ✔ | ✔ | +| | TotalNumValues | ✔ | ✔ | +| | NumValuesHist | | | +| Numeric | Min | ✔ | ✔ | +| | Max | ✔ | ✔ | +| | Median | ✔ | | +| | Mean | ✔ | ✔ | +| | Stdev | ✔ | ✔ | +| | NumZeroes | ✔ | ✔ | +| | Quantiles | ✔ | | +| | Histogram | ✔ | | +| String | RankHistogram | ✔ | | +| | TopValues | ✔ | | +| | Unique | ✔ | | +| | AvgLength | ✔ | ✔ | +| Bytes | MinNumBytes | ✔ | ✔ | +| | MaxNumBytes | ✔ | ✔ | +| | AvgNumBytes | ✔ | ✔ | +| | Unique | ✔ | | +| Struct/List | - (uses common statistics only) | - | - | + +## Inflight Feature Statistics + +For insight into data currently flowing into Feast through the population jobs, [statsd](https://github.com/statsd/statsd) is used to capture feature value statistics. + +Inflight feature statistics are windowed (default window length is 30s) and computed at two points in the feature population pipeline: + +1. Prior to store writes, after successful validation +2. After successful store writes + +The following metrics are written at the end of each window as [statsd gauges](https://github.com/statsd/statsd/blob/master/docs/metric_types.md#gauges): + +``` +feast_ingestion_feature_value_min +feast_ingestion_feature_value_max +feast_ingestion_feature_value_mean +feast_ingestion_feature_value_percentile_25 +feast_ingestion_feature_value_percentile_50 +feast_ingestion_feature_value_percentile_90 +feast_ingestion_feature_value_percentile_95 +feast_ingestion_feature_value_percentile_99 +``` + +{% hint style="info" %} +the gauge metric type is used over histogram because statsd only supports positive values for histogram metric types, while numerical feature values can be of any double value. +{% endhint %} + +The metrics are tagged with and can be aggregated by the following keys: + +| key | description | +| --- | --- | +| feast_store | store the population job is writing to +| feast_project_name | feast project name +| feast_featureSet_name | feature set name +| feast_feature_name | feature name +| ingestion_job_name | id of the population job writing the feature values. \ No newline at end of file diff --git a/examples/statistics/Historical Feature Statistics with Feast, TFDV and Facets.ipynb b/examples/statistics/Historical Feature Statistics with Feast, TFDV and Facets.ipynb new file mode 100644 index 00000000000..1db737824ff --- /dev/null +++ b/examples/statistics/Historical Feature Statistics with Feast, TFDV and Facets.ipynb @@ -0,0 +1,694 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Historical Feature Statistics with Feast, TFDV and Facets\n", + "\n", + "This tutorial covers how Feast can be used in conjunction with TFDV and Facets to retrieve statistics about feature datasets. \n", + "\n", + "The notebook showcases how Feast's integration with TFDV allows users to:\n", + "\n", + "1. Define TFX feature schemas and persist these properties in the Feature Store\n", + "2. Validate new data against the defined schema\n", + "3. Validate data already in Feast against the defined schema\n", + "\n", + "**Prerequisites**:\n", + "\n", + "- Feast running with at least 1 BigQuery warehouse store. This example uses a bigquery store with the name `historical`." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "setting project to statistics...\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import pytest\n", + "import pytz\n", + "import uuid\n", + "import time\n", + "from datetime import datetime, timedelta\n", + "\n", + "from feast.client import Client\n", + "from feast.entity import Entity\n", + "from feast.feature import Feature\n", + "from feast.feature_set import FeatureSet\n", + "from feast.type_map import ValueType\n", + "from google.protobuf import json_format\n", + "from google.protobuf.duration_pb2 import Duration\n", + "from tensorflow_metadata.proto.v0 import statistics_pb2\n", + "from tensorflow_metadata.proto.v0 import schema_pb2\n", + "import tensorflow_data_validation as tfdv\n", + "\n", + "PROJECT_NAME = \"statistics\"\n", + "IRIS_DATASET = \"http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data\"\n", + "BIGQUERY_STORE_NAME = \"historical\"\n", + "client = Client(core_url=\"localhost:6565\")\n", + "print(f\"setting project to {PROJECT_NAME}...\")\n", + "try:\n", + " client.create_project(PROJECT_NAME)\n", + "except:\n", + " print(\"project already exists, skipping.\")\n", + "client.set_project(PROJECT_NAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we are using the iris dataset. More information about this dataset can be found [here](http://archive.ics.uci.edu/ml/datasets/iris)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthclassdatetime
05.13.51.40.2Iris-setosa2020-05-25 07:31:28.230582+00:00
14.93.01.40.2Iris-setosa2020-05-25 07:31:28.230582+00:00
24.73.21.30.2Iris-setosa2020-05-25 07:31:28.230582+00:00
34.63.11.50.2Iris-setosa2020-05-25 07:31:28.230582+00:00
45.03.61.40.2Iris-setosa2020-05-25 07:31:28.230582+00:00
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width class \\\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa \n", + "1 4.9 3.0 1.4 0.2 Iris-setosa \n", + "2 4.7 3.2 1.3 0.2 Iris-setosa \n", + "3 4.6 3.1 1.5 0.2 Iris-setosa \n", + "4 5.0 3.6 1.4 0.2 Iris-setosa \n", + "\n", + " datetime \n", + "0 2020-05-25 07:31:28.230582+00:00 \n", + "1 2020-05-25 07:31:28.230582+00:00 \n", + "2 2020-05-25 07:31:28.230582+00:00 \n", + "3 2020-05-25 07:31:28.230582+00:00 \n", + "4 2020-05-25 07:31:28.230582+00:00 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_feature_names = [\"sepal_length\",\"sepal_width\",\"petal_length\",\"petal_width\"]\n", + "df = pd.read_csv(IRIS_DATASET, names=iris_feature_names + [\"class\"])\n", + "\n", + "# Add datetime to satisfy Feast\n", + "current_datetime = datetime.utcnow().replace(tzinfo=pytz.utc)\n", + "df['datetime'] = current_datetime - timedelta(days=1)\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TFDV schema as part of the feature set definition\n", + "\n", + "An integral part of TFDV is the feature [schemas](https://github.com/tensorflow/metadata/blob/master/tensorflow_metadata/proto/v0/schema.proto) that describe the expected properties of the data in a dataset, such as:\n", + "- expected feature presence\n", + "- type\n", + "- expected domains of features\n", + "\n", + "These schemas, which can be [manually defined or generated by TFDV](https://www.tensorflow.org/tfx/data_validation/get_started#inferring_a_schema_over_the_data), can be then used to extend the definition of features within the feature set. As part of the spec, the schema is persisted within Feast, and is used for both in-flight data validation, as well as offline integration with TFDV.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Ignoring feature datetime of type datetime64[ns, UTC]\n", + "/Users/zhiling/.pyenv/versions/3.7.2/envs/test-feast/lib/python3.7/site-packages/tensorflow_data_validation/arrow/arrow_util.py:236: FutureWarning: Calling .data on ChunkedArray is provided for compatibility after Column was removed, simply drop this attribute\n", + " types.FeaturePath([column_name]), column.data.chunk(0), weights):\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Entity class(ValueType.STRING) manually updated (replacing an existing field).\n", + "Feature sepal_length (ValueType.DOUBLE) added from dataframe.\n", + "Feature sepal_width (ValueType.DOUBLE) added from dataframe.\n", + "Feature petal_length (ValueType.DOUBLE) added from dataframe.\n", + "Feature petal_width (ValueType.DOUBLE) added from dataframe.\n", + "\n", + "{\n", + " \"spec\": {\n", + " \"name\": \"iris\",\n", + " \"entities\": [\n", + " {\n", + " \"name\": \"class\",\n", + " \"valueType\": \"STRING\"\n", + " }\n", + " ],\n", + " \"features\": [\n", + " {\n", + " \"name\": \"sepal_length\",\n", + " \"valueType\": \"DOUBLE\",\n", + " \"presence\": {\n", + " \"minFraction\": 1.0,\n", + " \"minCount\": \"1\"\n", + " },\n", + " \"shape\": {\n", + " \"dim\": [\n", + " {\n", + " \"size\": \"1\"\n", + " }\n", + " ]\n", + " }\n", + " },\n", + " {\n", + " \"name\": \"sepal_width\",\n", + " \"valueType\": \"DOUBLE\",\n", + " \"presence\": {\n", + " \"minFraction\": 1.0,\n", + " \"minCount\": \"1\"\n", + " },\n", + " \"shape\": {\n", + " \"dim\": [\n", + " {\n", + " \"size\": \"1\"\n", + " }\n", + " ]\n", + " }\n", + " },\n", + " {\n", + " \"name\": \"petal_length\",\n", + " \"valueType\": \"DOUBLE\",\n", + " \"presence\": {\n", + " \"minFraction\": 1.0,\n", + " \"minCount\": \"1\"\n", + " },\n", + " \"shape\": {\n", + " \"dim\": [\n", + " {\n", + " \"size\": \"1\"\n", + " }\n", + " ]\n", + " }\n", + " },\n", + " {\n", + " \"name\": \"petal_width\",\n", + " \"valueType\": \"DOUBLE\",\n", + " \"presence\": {\n", + " \"minFraction\": 1.0,\n", + " \"minCount\": \"1\"\n", + " },\n", + " \"shape\": {\n", + " \"dim\": [\n", + " {\n", + " \"size\": \"1\"\n", + " }\n", + " ]\n", + " },\n", + " \"floatDomain\": {\n", + " \"min\": 0.0\n", + " }\n", + " }\n", + " ]\n", + " },\n", + " \"meta\": {}\n", + "}\n" + ] + } + ], + "source": [ + "# Infer a schema over the iris dataset. These values can be tweaked as necessary.\n", + "stats = tfdv.generate_statistics_from_dataframe(df)\n", + "schema = tfdv.infer_schema(statistics=stats)\n", + "width_domain = schema_pb2.FloatDomain(min=0)\n", + "tfdv.set_domain(schema, 'petal_width', width_domain)\n", + "\n", + "# Create a new FeatureSet or retrieve an existing FeatureSet in Feast\n", + "feature_set = FeatureSet(name=\"iris\")\n", + "feature_set.infer_fields_from_df(df[['datetime'] + iris_feature_names], \n", + " entities=[Entity(name=\"class\", dtype=ValueType.STRING)])\n", + "\n", + "# Update the entities and features with constraints defined in the schema\n", + "feature_set.import_tfx_schema(schema)\n", + "print(feature_set)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Computing statistics over an ingested dataset\n", + "\n", + "Feast is able to compute statistics for any data that has been ingested into the system. Statistics can be computed over either discrete datasets using *dataset_ids* or periods of time using a specified time range.\n", + "\n", + "These statistics are computed at a historical store (caveat: only BQ is supported at the moment). The feature statistics returned in the form of TFX's `DatasetFeatureStatisticsList`, which can then be directly fed back into TFDV methods to either visualise the data statistics, or validate the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature set created: \"iris\"\n", + "Waiting for feature set to be ready for ingestion...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 150/150 [00:01<00:00, 122.33rows/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ingestion complete!\n", + "\n", + "Ingestion statistics:\n", + "Success: 150/150\n", + "Removing temporary file(s)...\n", + "\n", + "ingestion id: 73ed84b1-1218-3702-b4c6-673503233264\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "# Apply the featureset\n", + "client.apply(feature_set)\n", + "\n", + "# When a dataset is ingested into Feast, a unique ingestion id referencing the ingested dataset is returned. \n", + "ingestion_id = client.ingest(feature_set, df)\n", + "print(\"\\ningestion id: \" + ingestion_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Get statistics from Feast for the ingested dataset.\n", + "# The statistics are calculated over the data in the store specified.\n", + "stats = client.get_statistics(\n", + " feature_set_id=f'{PROJECT_NAME}/iris', \n", + " store=BIGQUERY_STORE_NAME, \n", + " features=iris_feature_names, \n", + " ingestion_ids=[ingestion_id])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualising statistics with facets\n", + "\n", + "Since Feast outputs statistics in a format compatible with the TFDV API, the stats object can be directly passed to `tfdv.visualize_statistics()` to visualise, in-line, the output statistics on [Facets](https://pair-code.github.io/facets/), allowing for easy and interactive exploration of the shape and distribution of the data inside Feast." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "tfdv.visualize_statistics(stats)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Validating correctness of subsequent datasets \n", + "\n", + "While it is useful to explore dataset statistics using facets, since we have already defined a schema that specifies a dataset's bounds of correctness, we can leverage TFDV's `validate_statistics` to validate if subsequent datasets are problematic or not. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is possible to validate correctness of a new dataset prior to ingestion by retrieving the schema from the feature set, and comparing computed statistics against that schema. \n", + "\n", + "This can be useful if we want to avoid ingesting problematic data into Feast." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Ignoring feature datetime of type datetime64[ns, UTC]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Anomaly short descriptionAnomaly long description
Feature name
'petal_width'Out-of-range valuesUnexpectedly low values: -1<-1(upto six significant digits)
'class'New columnNew column (column in data but not in schema)
\n", + "
" + ], + "text/plain": [ + " Anomaly short description \\\n", + "Feature name \n", + "'petal_width' Out-of-range values \n", + "'class' New column \n", + "\n", + " Anomaly long description \n", + "Feature name \n", + "'petal_width' Unexpectedly low values: -1<-1(upto six significant digits) \n", + "'class' New column (column in data but not in schema) " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Ingest a new dataset with obviously incorrect data\n", + "df_2 = pd.DataFrame(\n", + " {\n", + " \"datetime\": current_datetime,\n", + " \"class\": [\"Iris-setosa\", \"Iris-virginica\", \"Iris-nonsensica\"],\n", + " \"sepal_length\": [4.3, 6.9, 12],\n", + " \"sepal_width\": [3.0, 2.8, 1.1],\n", + " \"petal_length\": [1.2, 4.9, 2.2],\n", + " \"petal_width\": [0.1, 1.8, -1.0]\n", + " }\n", + ")\n", + "\n", + "# Validate correctness\n", + "stats_2 = tfdv.generate_statistics_from_dataframe(df_2)\n", + "anomalies = tfdv.validate_statistics(statistics=stats_2, schema=feature_set.export_tfx_schema())\n", + "tfdv.display_anomalies(anomalies)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, the data can be ingested into Feast, and the statistics computed at the store. This has the benefit of offloading statistics computation for large datasets to Feast." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 0%| | 0/3 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Anomaly short descriptionAnomaly long description
Feature name
'petal_width'Out-of-range valuesUnexpectedly low values: -1<-1(upto six significant digits)
\n", + "" + ], + "text/plain": [ + " Anomaly short description \\\n", + "Feature name \n", + "'petal_width' Out-of-range values \n", + "\n", + " Anomaly long description \n", + "Feature name \n", + "'petal_width' Unexpectedly low values: -1<-1(upto six significant digits) " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Ingest the data into Feast\n", + "ingestion_id_2 = client.ingest(feature_set, df_2)\n", + "time.sleep(10) # Sleep is not necessary if not using DirectRunner\n", + "\n", + "# Compute statistics over the new dataset\n", + "stats_2 = client.get_statistics(\n", + " feature_set_id=f'{PROJECT_NAME}/iris', \n", + " store=BIGQUERY_STORE_NAME, \n", + " features=iris_feature_names, \n", + " ingestion_ids=[ingestion_id_2])\n", + "\n", + "# Detect anomalies in the dataset\n", + "anomalies = tfdv.validate_statistics(statistics=stats_2, schema=feature_set.export_tfx_schema())\n", + "tfdv.display_anomalies(anomalies)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/infra/scripts/test-end-to-end-batch-dataflow.sh b/infra/scripts/test-end-to-end-batch-dataflow.sh index e8fb3b95653..4cb7f1998b4 100755 --- a/infra/scripts/test-end-to-end-batch-dataflow.sh +++ b/infra/scripts/test-end-to-end-batch-dataflow.sh @@ -274,7 +274,7 @@ core_ip=$(kubectl get -o jsonpath="{.spec.clusterIP}" service ${HELM_RELEASE_NAM serving_ip=$(kubectl get -o jsonpath="{.spec.clusterIP}" service ${HELM_RELEASE_NAME}-feast-batch-serving) set +e -pytest -v bq-batch-retrieval.py -m dataflow_runner --core_url "$core_ip:6565" --serving_url "$serving_ip:6566" --gcs_path "gs://${TEMP_BUCKET}/" --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml +pytest -v bq/bq-batch-retrieval.py -m dataflow_runner --core_url "$core_ip:6565" --serving_url "$serving_ip:6566" --gcs_path "gs://${TEMP_BUCKET}/" --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml TEST_EXIT_CODE=$? if [[ ${TEST_EXIT_CODE} != 0 ]]; then diff --git a/infra/scripts/test-end-to-end-batch.sh b/infra/scripts/test-end-to-end-batch.sh index 988c346bde7..c225a8e2991 100755 --- a/infra/scripts/test-end-to-end-batch.sh +++ b/infra/scripts/test-end-to-end-batch.sh @@ -120,7 +120,7 @@ ORIGINAL_DIR=$(pwd) cd tests/e2e set +e -pytest bq-batch-retrieval.py -m ${PYTEST_MARK} --gcs_path "gs://${TEMP_BUCKET}/" --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml +pytest bq/* -m ${PYTEST_MARK} --gcs_path "gs://${TEMP_BUCKET}/" --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml TEST_EXIT_CODE=$? if [[ ${TEST_EXIT_CODE} != 0 ]]; then @@ -137,4 +137,4 @@ cd ${ORIGINAL_DIR} print_banner "Cleaning up" bq rm -r -f ${GOOGLE_CLOUD_PROJECT}:${DATASET_NAME} -exit ${TEST_EXIT_CODE} \ No newline at end of file +exit ${TEST_EXIT_CODE} diff --git a/infra/scripts/test-end-to-end-redis-cluster.sh b/infra/scripts/test-end-to-end-redis-cluster.sh index a11aebf0197..ed6cd42bc05 100755 --- a/infra/scripts/test-end-to-end-redis-cluster.sh +++ b/infra/scripts/test-end-to-end-redis-cluster.sh @@ -89,7 +89,7 @@ ORIGINAL_DIR=$(pwd) cd tests/e2e set +e -pytest basic-ingest-redis-serving.py --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml +pytest redis/* --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml TEST_EXIT_CODE=$? if [[ ${TEST_EXIT_CODE} != 0 ]]; then diff --git a/infra/scripts/test-end-to-end.sh b/infra/scripts/test-end-to-end.sh index c90059b7a05..c37ad20eb48 100755 --- a/infra/scripts/test-end-to-end.sh +++ b/infra/scripts/test-end-to-end.sh @@ -53,7 +53,7 @@ ORIGINAL_DIR=$(pwd) cd tests/e2e set +e -pytest basic-ingest-redis-serving.py --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml +pytest redis/* --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml TEST_EXIT_CODE=$? if [[ ${TEST_EXIT_CODE} != 0 ]]; then diff --git a/protos/feast/core/CoreService.proto b/protos/feast/core/CoreService.proto index 3cd3c756830..05755b701d8 100644 --- a/protos/feast/core/CoreService.proto +++ b/protos/feast/core/CoreService.proto @@ -21,6 +21,8 @@ option go_package = "github.com/feast-dev/feast/sdk/go/protos/feast/core"; option java_outer_classname = "CoreServiceProto"; option java_package = "feast.proto.core"; +import "google/protobuf/timestamp.proto"; +import "tensorflow_metadata/proto/v0/statistics.proto"; import "feast/core/FeatureSet.proto"; import "feast/core/Store.proto"; import "feast/core/FeatureSetReference.proto"; @@ -41,6 +43,11 @@ service CoreService { // sets currently stored in the registry. rpc ListFeatureSets (ListFeatureSetsRequest) returns (ListFeatureSetsResponse); + // Get feature statistics computed over the data in the batch stores. + // + // Returns a dataset containing TFDV statistics mapped to each valid historical store. + rpc GetFeatureStatistics (GetFeatureStatisticsRequest) returns (GetFeatureStatisticsResponse); + // Retrieve store details given a filter. // // Returns all stores matching that filter. If none are found, an empty list will be returned. @@ -270,3 +277,42 @@ message StopIngestionJobRequest { // Request from stopping an ingestion job message StopIngestionJobResponse {} + +message GetFeatureStatisticsRequest { + // Feature set to retrieve the statistics for. A fully qualified feature set + // id in the format of project/feature_set must be provided. + string feature_set_id = 1; + + // Optional filter which filters returned statistics by selected features. These + // features must be present in the data that is being processed. + repeated string features = 2; + + // Optional filter to select store over which the statistics will retrieved. + // Only historical stores are allowed. + string store = 3; + + // Optional start and end dates over which to filter statistical data + // Start date is inclusive, but end date is not. + // Only dates are supported, not times. + // Cannot be used with dataset_ids. + // If this period spans multiple days, unaggregatable statistics will be dropped. + google.protobuf.Timestamp start_date = 4; + google.protobuf.Timestamp end_date = 5; + + // Optional list of ingestion Ids by which to filter data before + // retrieving statistics. + // Cannot be used with the date ranges + // If multiple dataset ids are provided, unaggregatable statistics will be dropped. + repeated string ingestion_ids = 6; + + // Setting this flag to true will force a recalculation of statistics and overwrite results currently in the + // cache, if any. + bool force_refresh = 7; +} + +message GetFeatureStatisticsResponse { + // Contains statistics for the requested data. + // Due to the limitations of TFDV and Facets, only a single dataset can be returned in, + // despite the message being of list type. + tensorflow.metadata.v0.DatasetFeatureStatisticsList dataset_feature_statistics_list = 1; +} diff --git a/protos/tensorflow_metadata/proto/v0/statistics.proto b/protos/tensorflow_metadata/proto/v0/statistics.proto new file mode 100644 index 00000000000..3123dad8746 --- /dev/null +++ b/protos/tensorflow_metadata/proto/v0/statistics.proto @@ -0,0 +1,427 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +// Definitions for aggregated feature statistics for datasets. +// TODO(b/80075690): make a Javascript build rule for this. +// TODO(b/80075691): migrate Facets to use this. +syntax = "proto3"; +option cc_enable_arenas = true; + +package tensorflow.metadata.v0; + +option java_package = "org.tensorflow.metadata.v0"; +option java_multiple_files = true; +option go_package = "github.com/feast-dev/feast/sdk/go/protos/tensorflow_metadata/proto/v0"; + +import "tensorflow_metadata/proto/v0/path.proto"; + +// Copied from Facets feature_statistics.proto +// Must be kept binary-compatible with the original, until all usages +// are updated to use this version, or we write a proto-to-proto converter. + +// A list of features statistics for different datasets. If you wish to compare +// different datasets using this list, then the DatasetFeatureStatistics +// entries should all contain the same list of features. +message DatasetFeatureStatisticsList { + repeated DatasetFeatureStatistics datasets = 1; +} + +// The feature statistics for a single dataset. +message DatasetFeatureStatistics { + // The name of the dataset. + string name = 1; + // The number of examples in the dataset. + uint64 num_examples = 2; + + // Only valid if the weight feature was specified. + // Treats a missing weighted feature as zero. + double weighted_num_examples = 4; + // The feature statistics for the dataset. + repeated FeatureNameStatistics features = 3; + + // Cross feature statistics for the dataset. + repeated CrossFeatureStatistics cross_features = 5; +} + +message CrossFeatureStatistics { + // The path of feature x. + Path path_x = 1; + // The path of feature y. + Path path_y = 2; + + // Number of occurrences of this feature cross in the data. If any of + // the features in the cross is missing, the example is ignored. + uint64 count = 3; + + oneof cross_stats { + NumericCrossStatistics num_cross_stats = 4; + CategoricalCrossStatistics categorical_cross_stats = 5; + } +} + +message NumericCrossStatistics { + // Pearson product-moment correlation coefficient. + float correlation = 1; + // Standard covariance. E[(X-E[X])*(Y-E[Y])] + float covariance = 2; +} + +message CategoricalCrossStatistics { + LiftStatistics lift = 1; +} + +message LiftStatistics { + // Lift information for each value of path_y. Lift is defined for each pair of + // values (x,y) as P(path_y=y|path_x=x)/P(path_y=y). + repeated LiftSeries lift_series = 1; + // Weighted lift information for each value of path_y. Weighted lift is + // defined for each pair of values (x,y) as P(path_y=y|path_x=x)/P(path_y=y) + // where probabilities are computed over weighted example space. + repeated LiftSeries weighted_lift_series = 2; +} + +// Container for lift information for a specific y-value. +message LiftSeries { + // A bucket for referring to binned numeric features. + message Bucket { + // The low value of the bucket, inclusive. + double low_value = 1; + // The high value of the bucket, exclusive (unless the high_value is + // positive infinity). + double high_value = 2; + } + + // The particular value of path_y corresponding to this LiftSeries. Each + // element in lift_values corresponds to the lift a different x_value and + // this specific y_value. + oneof y_value { + int32 y_int = 1; + string y_string = 2; + Bucket y_bucket = 3; + } + + // The number of examples in which y_value appears. + oneof y_count_value { + uint64 y_count = 4; + double weighted_y_count = 5; + } + + // A container for lift information about a specific value of path_x. + message LiftValue { + oneof x_value { + int32 x_int = 1; + string x_string = 2; + } + // P(path_y=y|path_x=x) / P(path_y=y) for x_value and the enclosing y_value. + // In terms of concrete fields, this number represents: + // (x_and_y_count / x_count) / (y_count / num_examples) + double lift = 3; + // The number of examples in which x_value appears. + oneof x_count_value { + uint64 x_count = 4; + double weighted_x_count = 5; + } + // The number of examples in which x_value appears and y_value appears. + oneof x_and_y_count_value { + uint64 x_and_y_count = 6; + double weighted_x_and_y_count = 7; + } + } + + // The lifts for a each path_x value and this y_value. + repeated LiftValue lift_values = 6; +} + +// The complete set of statistics for a given feature name for a dataset. +message FeatureNameStatistics { + // The types supported by the feature statistics. When aggregating + // tf.Examples, if the bytelist contains a string, it is recommended to encode + // it here as STRING instead of BYTES in order to calculate string-specific + // statistical measures. + enum Type { + INT = 0; + FLOAT = 1; + STRING = 2; + BYTES = 3; + STRUCT = 4; + } + + // One can identify a field either by the name (for simple fields), or by + // a path (for structured fields). Note that: + // name: "foo" + // is equivalent to: + // path: {step:"foo"} + // Note: this oneof must be consistently either name or path across all + // FeatureNameStatistics in one DatasetFeatureStatistics. + oneof field_id { + // The feature name + string name = 1; + + // The path of the feature. + Path path = 8; + } + + // The data type of the feature + Type type = 2; + + // The statistics of the values of the feature. + oneof stats { + NumericStatistics num_stats = 3; + StringStatistics string_stats = 4; + BytesStatistics bytes_stats = 5; + StructStatistics struct_stats = 7; + } + + // Any custom statistics can be stored in this list. + repeated CustomStatistic custom_stats = 6; +} + +// Common weighted statistics for all feature types. Statistics counting number +// of values (i.e., avg_num_values and tot_num_values) include NaNs. +// If the weighted column is missing, then this counts as a weight of 1 +// for that example. +message WeightedCommonStatistics { + // Weighted number of examples not missing. + double num_non_missing = 1; + // Weighted number of examples missing. + // Note that if the weighted column is zero, this does not count + // as missing. + double num_missing = 2; + // average number of values, weighted by the number of examples. + double avg_num_values = 3; + // tot_num_values = avg_num_values * num_non_missing. + // This is calculated directly, so should have less numerical error. + double tot_num_values = 4; +} + +// Stores the name and value of any custom statistic. The value can be a string, +// double, or histogram. +message CustomStatistic { + string name = 1; + oneof val { + double num = 2; + string str = 3; + Histogram histogram = 4; + RankHistogram rank_histogram = 5; + } +} + +// Statistics for a numeric feature in a dataset. +message NumericStatistics { + CommonStatistics common_stats = 1; + // The mean of the values + double mean = 2; + // The standard deviation of the values + double std_dev = 3; + // The number of values that equal 0 + uint64 num_zeros = 4; + // The minimum value + double min = 5; + // The median value + double median = 6; + // The maximum value + double max = 7; + // The histogram(s) of the feature values. + repeated Histogram histograms = 8; + + // Weighted statistics for the feature, if the values have weights. + WeightedNumericStatistics weighted_numeric_stats = 9; +} + +// Statistics for a string feature in a dataset. +message StringStatistics { + CommonStatistics common_stats = 1; + // The number of unique values + uint64 unique = 2; + + message FreqAndValue { + string value = 2; + + // The number of times the value occurs. Stored as a double to be able to + // handle weighted features. + double frequency = 3; + + // Deleted fields. + reserved 1; + } + // A sorted list of the most-frequent values and their frequencies, with + // the most-frequent being first. + repeated FreqAndValue top_values = 3; + + // The average length of the values + float avg_length = 4; + + // The rank histogram for the values of the feature. + // The rank is used to measure of how commonly the value is found in the + // dataset. The most common value would have a rank of 1, with the second-most + // common value having a rank of 2, and so on. + RankHistogram rank_histogram = 5; + + // Weighted statistics for the feature, if the values have weights. + WeightedStringStatistics weighted_string_stats = 6; + + // A vocabulary file, used for vocabularies too large to store in the proto + // itself. Note that the file may be relative to some context-dependent + // directory. E.g. in TFX the feature statistics will live in a PPP and + // vocabulary file names will be relative to this PPP. + string vocabulary_file = 7; +} + +// Statistics for a weighted numeric feature in a dataset. +message WeightedNumericStatistics { + // The weighted mean of the values + double mean = 1; + // The weighted standard deviation of the values + double std_dev = 2; + // The weighted median of the values + double median = 3; + + // The histogram(s) of the weighted feature values. + repeated Histogram histograms = 4; +} + +// Statistics for a weighted string feature in a dataset. +message WeightedStringStatistics { + // A sorted list of the most-frequent values and their weighted frequencies, + // with the most-frequent being first. + repeated StringStatistics.FreqAndValue top_values = 1; + + // The rank histogram for the weighted values of the feature. + RankHistogram rank_histogram = 2; +} + +// Statistics for a bytes feature in a dataset. +message BytesStatistics { + CommonStatistics common_stats = 1; + // The number of unique values + uint64 unique = 2; + + // The average number of bytes in a value + float avg_num_bytes = 3; + // The minimum number of bytes in a value + float min_num_bytes = 4; + // The maximum number of bytes in a value + float max_num_bytes = 5; +} + +message StructStatistics { + CommonStatistics common_stats = 1; +} + +// Common statistics for all feature types. Statistics counting number of values +// (i.e., min_num_values, max_num_values, avg_num_values, and tot_num_values) +// include NaNs. +message CommonStatistics { + // The number of examples with at least one value for this feature. + uint64 num_non_missing = 1; + // The number of examples with no values for this feature. + uint64 num_missing = 2; + // The minimum number of values in a single example for this feature. + uint64 min_num_values = 3; + // The maximum number of values in a single example for this feature. + uint64 max_num_values = 4; + // The average number of values in a single example for this feature. + float avg_num_values = 5; + // tot_num_values = avg_num_values * num_non_missing. + // This is calculated directly, so should have less numerical error. + uint64 tot_num_values = 8; + // The quantiles histogram for the number of values in this feature. + Histogram num_values_histogram = 6; + WeightedCommonStatistics weighted_common_stats = 7; + // The histogram for the number of features in the feature list (only set if + // this feature is a non-context feature from a tf.SequenceExample). + // This is different from num_values_histogram, as num_values_histogram tracks + // the count of all values for a feature in an example, whereas this tracks + // the length of the feature list for this feature in an example (where each + // feature list can contain multiple values). + Histogram feature_list_length_histogram = 9; +} + +// The data used to create a histogram of a numeric feature for a dataset. +message Histogram { + // Each bucket defines its low and high values along with its count. The + // low and high values must be a real number or positive or negative + // infinity. They cannot be NaN or undefined. Counts of those special values + // can be found in the numNaN and numUndefined fields. + message Bucket { + // The low value of the bucket, inclusive. + double low_value = 1; + // The high value of the bucket, exclusive (unless the highValue is + // positive infinity). + double high_value = 2; + + // The number of items in the bucket. Stored as a double to be able to + // handle weighted histograms. + double sample_count = 4; + + // Deleted fields. + reserved 3; + } + + // The number of NaN values in the dataset. + uint64 num_nan = 1; + // The number of undefined values in the dataset. + uint64 num_undefined = 2; + + // A list of buckets in the histogram, sorted from lowest bucket to highest + // bucket. + repeated Bucket buckets = 3; + + // The type of the histogram. A standard histogram has equal-width buckets. + // The quantiles type is used for when the histogram message is used to store + // quantile information (by using equal-count buckets with variable widths). + enum HistogramType { + STANDARD = 0; + QUANTILES = 1; + } + + // The type of the histogram. + HistogramType type = 4; + + // An optional descriptive name of the histogram, to be used for labeling. + string name = 5; +} + +// The data used to create a rank histogram of a non-numeric feature of a +// dataset. The rank of a value in a feature can be used as a measure of how +// commonly the value is found in the entire dataset. With bucket sizes of one, +// this becomes a distribution function of all feature values. +message RankHistogram { + // Each bucket defines its start and end ranks along with its count. + message Bucket { + // The low rank of the bucket, inclusive. + uint64 low_rank = 1; + // The high rank of the bucket, exclusive. + uint64 high_rank = 2; + + // The label for the bucket. Can be used to list or summarize the values in + // this rank bucket. + string label = 4; + + // The number of items in the bucket. Stored as a double to be able to + // handle weighted histograms. + double sample_count = 5; + + // Deleted fields. + reserved 3; + } + + // A list of buckets in the histogram, sorted from lowest-ranked bucket to + // highest-ranked bucket. + repeated Bucket buckets = 1; + + // An optional descriptive name of the histogram, to be used for labeling. + string name = 2; +} \ No newline at end of file diff --git a/sdk/go/protos/tensorflow_metadata/proto/v0/statistics.pb.go b/sdk/go/protos/tensorflow_metadata/proto/v0/statistics.pb.go new file mode 100644 index 00000000000..3d9e7da362d --- /dev/null +++ b/sdk/go/protos/tensorflow_metadata/proto/v0/statistics.pb.go @@ -0,0 +1,3120 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +// Definitions for aggregated feature statistics for datasets. +// TODO(b/80075690): make a Javascript build rule for this. +// TODO(b/80075691): migrate Facets to use this. + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.24.0 +// protoc v3.10.0 +// source: tensorflow_metadata/proto/v0/statistics.proto + +package v0 + +import ( + proto "github.com/golang/protobuf/proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// This is a compile-time assertion that a sufficiently up-to-date version +// of the legacy proto package is being used. +const _ = proto.ProtoPackageIsVersion4 + +// The types supported by the feature statistics. When aggregating +// tf.Examples, if the bytelist contains a string, it is recommended to encode +// it here as STRING instead of BYTES in order to calculate string-specific +// statistical measures. +type FeatureNameStatistics_Type int32 + +const ( + FeatureNameStatistics_INT FeatureNameStatistics_Type = 0 + FeatureNameStatistics_FLOAT FeatureNameStatistics_Type = 1 + FeatureNameStatistics_STRING FeatureNameStatistics_Type = 2 + FeatureNameStatistics_BYTES FeatureNameStatistics_Type = 3 + FeatureNameStatistics_STRUCT FeatureNameStatistics_Type = 4 +) + +// Enum value maps for FeatureNameStatistics_Type. +var ( + FeatureNameStatistics_Type_name = map[int32]string{ + 0: "INT", + 1: "FLOAT", + 2: "STRING", + 3: "BYTES", + 4: "STRUCT", + } + FeatureNameStatistics_Type_value = map[string]int32{ + "INT": 0, + "FLOAT": 1, + "STRING": 2, + "BYTES": 3, + "STRUCT": 4, + } +) + +func (x FeatureNameStatistics_Type) Enum() *FeatureNameStatistics_Type { + p := new(FeatureNameStatistics_Type) + *p = x + return p +} + +func (x FeatureNameStatistics_Type) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (FeatureNameStatistics_Type) Descriptor() protoreflect.EnumDescriptor { + return file_tensorflow_metadata_proto_v0_statistics_proto_enumTypes[0].Descriptor() +} + +func (FeatureNameStatistics_Type) Type() protoreflect.EnumType { + return &file_tensorflow_metadata_proto_v0_statistics_proto_enumTypes[0] +} + +func (x FeatureNameStatistics_Type) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use FeatureNameStatistics_Type.Descriptor instead. +func (FeatureNameStatistics_Type) EnumDescriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{7, 0} +} + +// The type of the histogram. A standard histogram has equal-width buckets. +// The quantiles type is used for when the histogram message is used to store +// quantile information (by using equal-count buckets with variable widths). +type Histogram_HistogramType int32 + +const ( + Histogram_STANDARD Histogram_HistogramType = 0 + Histogram_QUANTILES Histogram_HistogramType = 1 +) + +// Enum value maps for Histogram_HistogramType. +var ( + Histogram_HistogramType_name = map[int32]string{ + 0: "STANDARD", + 1: "QUANTILES", + } + Histogram_HistogramType_value = map[string]int32{ + "STANDARD": 0, + "QUANTILES": 1, + } +) + +func (x Histogram_HistogramType) Enum() *Histogram_HistogramType { + p := new(Histogram_HistogramType) + *p = x + return p +} + +func (x Histogram_HistogramType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (Histogram_HistogramType) Descriptor() protoreflect.EnumDescriptor { + return file_tensorflow_metadata_proto_v0_statistics_proto_enumTypes[1].Descriptor() +} + +func (Histogram_HistogramType) Type() protoreflect.EnumType { + return &file_tensorflow_metadata_proto_v0_statistics_proto_enumTypes[1] +} + +func (x Histogram_HistogramType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use Histogram_HistogramType.Descriptor instead. +func (Histogram_HistogramType) EnumDescriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{17, 0} +} + +// A list of features statistics for different datasets. If you wish to compare +// different datasets using this list, then the DatasetFeatureStatistics +// entries should all contain the same list of features. +type DatasetFeatureStatisticsList struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Datasets []*DatasetFeatureStatistics `protobuf:"bytes,1,rep,name=datasets,proto3" json:"datasets,omitempty"` +} + +func (x *DatasetFeatureStatisticsList) Reset() { + *x = DatasetFeatureStatisticsList{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DatasetFeatureStatisticsList) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DatasetFeatureStatisticsList) ProtoMessage() {} + +func (x *DatasetFeatureStatisticsList) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DatasetFeatureStatisticsList.ProtoReflect.Descriptor instead. +func (*DatasetFeatureStatisticsList) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{0} +} + +func (x *DatasetFeatureStatisticsList) GetDatasets() []*DatasetFeatureStatistics { + if x != nil { + return x.Datasets + } + return nil +} + +// The feature statistics for a single dataset. +type DatasetFeatureStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The name of the dataset. + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + // The number of examples in the dataset. + NumExamples uint64 `protobuf:"varint,2,opt,name=num_examples,json=numExamples,proto3" json:"num_examples,omitempty"` + // Only valid if the weight feature was specified. + // Treats a missing weighted feature as zero. + WeightedNumExamples float64 `protobuf:"fixed64,4,opt,name=weighted_num_examples,json=weightedNumExamples,proto3" json:"weighted_num_examples,omitempty"` + // The feature statistics for the dataset. + Features []*FeatureNameStatistics `protobuf:"bytes,3,rep,name=features,proto3" json:"features,omitempty"` + // Cross feature statistics for the dataset. + CrossFeatures []*CrossFeatureStatistics `protobuf:"bytes,5,rep,name=cross_features,json=crossFeatures,proto3" json:"cross_features,omitempty"` +} + +func (x *DatasetFeatureStatistics) Reset() { + *x = DatasetFeatureStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DatasetFeatureStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DatasetFeatureStatistics) ProtoMessage() {} + +func (x *DatasetFeatureStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DatasetFeatureStatistics.ProtoReflect.Descriptor instead. +func (*DatasetFeatureStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{1} +} + +func (x *DatasetFeatureStatistics) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *DatasetFeatureStatistics) GetNumExamples() uint64 { + if x != nil { + return x.NumExamples + } + return 0 +} + +func (x *DatasetFeatureStatistics) GetWeightedNumExamples() float64 { + if x != nil { + return x.WeightedNumExamples + } + return 0 +} + +func (x *DatasetFeatureStatistics) GetFeatures() []*FeatureNameStatistics { + if x != nil { + return x.Features + } + return nil +} + +func (x *DatasetFeatureStatistics) GetCrossFeatures() []*CrossFeatureStatistics { + if x != nil { + return x.CrossFeatures + } + return nil +} + +type CrossFeatureStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The path of feature x. + PathX *Path `protobuf:"bytes,1,opt,name=path_x,json=pathX,proto3" json:"path_x,omitempty"` + // The path of feature y. + PathY *Path `protobuf:"bytes,2,opt,name=path_y,json=pathY,proto3" json:"path_y,omitempty"` + // Number of occurrences of this feature cross in the data. If any of + // the features in the cross is missing, the example is ignored. + Count uint64 `protobuf:"varint,3,opt,name=count,proto3" json:"count,omitempty"` + // Types that are assignable to CrossStats: + // *CrossFeatureStatistics_NumCrossStats + // *CrossFeatureStatistics_CategoricalCrossStats + CrossStats isCrossFeatureStatistics_CrossStats `protobuf_oneof:"cross_stats"` +} + +func (x *CrossFeatureStatistics) Reset() { + *x = CrossFeatureStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CrossFeatureStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CrossFeatureStatistics) ProtoMessage() {} + +func (x *CrossFeatureStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CrossFeatureStatistics.ProtoReflect.Descriptor instead. +func (*CrossFeatureStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{2} +} + +func (x *CrossFeatureStatistics) GetPathX() *Path { + if x != nil { + return x.PathX + } + return nil +} + +func (x *CrossFeatureStatistics) GetPathY() *Path { + if x != nil { + return x.PathY + } + return nil +} + +func (x *CrossFeatureStatistics) GetCount() uint64 { + if x != nil { + return x.Count + } + return 0 +} + +func (m *CrossFeatureStatistics) GetCrossStats() isCrossFeatureStatistics_CrossStats { + if m != nil { + return m.CrossStats + } + return nil +} + +func (x *CrossFeatureStatistics) GetNumCrossStats() *NumericCrossStatistics { + if x, ok := x.GetCrossStats().(*CrossFeatureStatistics_NumCrossStats); ok { + return x.NumCrossStats + } + return nil +} + +func (x *CrossFeatureStatistics) GetCategoricalCrossStats() *CategoricalCrossStatistics { + if x, ok := x.GetCrossStats().(*CrossFeatureStatistics_CategoricalCrossStats); ok { + return x.CategoricalCrossStats + } + return nil +} + +type isCrossFeatureStatistics_CrossStats interface { + isCrossFeatureStatistics_CrossStats() +} + +type CrossFeatureStatistics_NumCrossStats struct { + NumCrossStats *NumericCrossStatistics `protobuf:"bytes,4,opt,name=num_cross_stats,json=numCrossStats,proto3,oneof"` +} + +type CrossFeatureStatistics_CategoricalCrossStats struct { + CategoricalCrossStats *CategoricalCrossStatistics `protobuf:"bytes,5,opt,name=categorical_cross_stats,json=categoricalCrossStats,proto3,oneof"` +} + +func (*CrossFeatureStatistics_NumCrossStats) isCrossFeatureStatistics_CrossStats() {} + +func (*CrossFeatureStatistics_CategoricalCrossStats) isCrossFeatureStatistics_CrossStats() {} + +type NumericCrossStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Pearson product-moment correlation coefficient. + Correlation float32 `protobuf:"fixed32,1,opt,name=correlation,proto3" json:"correlation,omitempty"` + // Standard covariance. E[(X-E[X])*(Y-E[Y])] + Covariance float32 `protobuf:"fixed32,2,opt,name=covariance,proto3" json:"covariance,omitempty"` +} + +func (x *NumericCrossStatistics) Reset() { + *x = NumericCrossStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *NumericCrossStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*NumericCrossStatistics) ProtoMessage() {} + +func (x *NumericCrossStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use NumericCrossStatistics.ProtoReflect.Descriptor instead. +func (*NumericCrossStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{3} +} + +func (x *NumericCrossStatistics) GetCorrelation() float32 { + if x != nil { + return x.Correlation + } + return 0 +} + +func (x *NumericCrossStatistics) GetCovariance() float32 { + if x != nil { + return x.Covariance + } + return 0 +} + +type CategoricalCrossStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Lift *LiftStatistics `protobuf:"bytes,1,opt,name=lift,proto3" json:"lift,omitempty"` +} + +func (x *CategoricalCrossStatistics) Reset() { + *x = CategoricalCrossStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CategoricalCrossStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CategoricalCrossStatistics) ProtoMessage() {} + +func (x *CategoricalCrossStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CategoricalCrossStatistics.ProtoReflect.Descriptor instead. +func (*CategoricalCrossStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{4} +} + +func (x *CategoricalCrossStatistics) GetLift() *LiftStatistics { + if x != nil { + return x.Lift + } + return nil +} + +type LiftStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Lift information for each value of path_y. Lift is defined for each pair of + // values (x,y) as P(path_y=y|path_x=x)/P(path_y=y). + LiftSeries []*LiftSeries `protobuf:"bytes,1,rep,name=lift_series,json=liftSeries,proto3" json:"lift_series,omitempty"` + // Weighted lift information for each value of path_y. Weighted lift is + // defined for each pair of values (x,y) as P(path_y=y|path_x=x)/P(path_y=y) + // where probabilities are computed over weighted example space. + WeightedLiftSeries []*LiftSeries `protobuf:"bytes,2,rep,name=weighted_lift_series,json=weightedLiftSeries,proto3" json:"weighted_lift_series,omitempty"` +} + +func (x *LiftStatistics) Reset() { + *x = LiftStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *LiftStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*LiftStatistics) ProtoMessage() {} + +func (x *LiftStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[5] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use LiftStatistics.ProtoReflect.Descriptor instead. +func (*LiftStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{5} +} + +func (x *LiftStatistics) GetLiftSeries() []*LiftSeries { + if x != nil { + return x.LiftSeries + } + return nil +} + +func (x *LiftStatistics) GetWeightedLiftSeries() []*LiftSeries { + if x != nil { + return x.WeightedLiftSeries + } + return nil +} + +// Container for lift information for a specific y-value. +type LiftSeries struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The particular value of path_y corresponding to this LiftSeries. Each + // element in lift_values corresponds to the lift a different x_value and + // this specific y_value. + // + // Types that are assignable to YValue: + // *LiftSeries_YInt + // *LiftSeries_YString + // *LiftSeries_YBucket + YValue isLiftSeries_YValue `protobuf_oneof:"y_value"` + // The number of examples in which y_value appears. + // + // Types that are assignable to YCountValue: + // *LiftSeries_YCount + // *LiftSeries_WeightedYCount + YCountValue isLiftSeries_YCountValue `protobuf_oneof:"y_count_value"` + // The lifts for a each path_x value and this y_value. + LiftValues []*LiftSeries_LiftValue `protobuf:"bytes,6,rep,name=lift_values,json=liftValues,proto3" json:"lift_values,omitempty"` +} + +func (x *LiftSeries) Reset() { + *x = LiftSeries{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *LiftSeries) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*LiftSeries) ProtoMessage() {} + +func (x *LiftSeries) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[6] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use LiftSeries.ProtoReflect.Descriptor instead. +func (*LiftSeries) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{6} +} + +func (m *LiftSeries) GetYValue() isLiftSeries_YValue { + if m != nil { + return m.YValue + } + return nil +} + +func (x *LiftSeries) GetYInt() int32 { + if x, ok := x.GetYValue().(*LiftSeries_YInt); ok { + return x.YInt + } + return 0 +} + +func (x *LiftSeries) GetYString() string { + if x, ok := x.GetYValue().(*LiftSeries_YString); ok { + return x.YString + } + return "" +} + +func (x *LiftSeries) GetYBucket() *LiftSeries_Bucket { + if x, ok := x.GetYValue().(*LiftSeries_YBucket); ok { + return x.YBucket + } + return nil +} + +func (m *LiftSeries) GetYCountValue() isLiftSeries_YCountValue { + if m != nil { + return m.YCountValue + } + return nil +} + +func (x *LiftSeries) GetYCount() uint64 { + if x, ok := x.GetYCountValue().(*LiftSeries_YCount); ok { + return x.YCount + } + return 0 +} + +func (x *LiftSeries) GetWeightedYCount() float64 { + if x, ok := x.GetYCountValue().(*LiftSeries_WeightedYCount); ok { + return x.WeightedYCount + } + return 0 +} + +func (x *LiftSeries) GetLiftValues() []*LiftSeries_LiftValue { + if x != nil { + return x.LiftValues + } + return nil +} + +type isLiftSeries_YValue interface { + isLiftSeries_YValue() +} + +type LiftSeries_YInt struct { + YInt int32 `protobuf:"varint,1,opt,name=y_int,json=yInt,proto3,oneof"` +} + +type LiftSeries_YString struct { + YString string `protobuf:"bytes,2,opt,name=y_string,json=yString,proto3,oneof"` +} + +type LiftSeries_YBucket struct { + YBucket *LiftSeries_Bucket `protobuf:"bytes,3,opt,name=y_bucket,json=yBucket,proto3,oneof"` +} + +func (*LiftSeries_YInt) isLiftSeries_YValue() {} + +func (*LiftSeries_YString) isLiftSeries_YValue() {} + +func (*LiftSeries_YBucket) isLiftSeries_YValue() {} + +type isLiftSeries_YCountValue interface { + isLiftSeries_YCountValue() +} + +type LiftSeries_YCount struct { + YCount uint64 `protobuf:"varint,4,opt,name=y_count,json=yCount,proto3,oneof"` +} + +type LiftSeries_WeightedYCount struct { + WeightedYCount float64 `protobuf:"fixed64,5,opt,name=weighted_y_count,json=weightedYCount,proto3,oneof"` +} + +func (*LiftSeries_YCount) isLiftSeries_YCountValue() {} + +func (*LiftSeries_WeightedYCount) isLiftSeries_YCountValue() {} + +// The complete set of statistics for a given feature name for a dataset. +type FeatureNameStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // One can identify a field either by the name (for simple fields), or by + // a path (for structured fields). Note that: + // name: "foo" + // is equivalent to: + // path: {step:"foo"} + // Note: this oneof must be consistently either name or path across all + // FeatureNameStatistics in one DatasetFeatureStatistics. + // + // Types that are assignable to FieldId: + // *FeatureNameStatistics_Name + // *FeatureNameStatistics_Path + FieldId isFeatureNameStatistics_FieldId `protobuf_oneof:"field_id"` + // The data type of the feature + Type FeatureNameStatistics_Type `protobuf:"varint,2,opt,name=type,proto3,enum=tensorflow.metadata.v0.FeatureNameStatistics_Type" json:"type,omitempty"` + // The statistics of the values of the feature. + // + // Types that are assignable to Stats: + // *FeatureNameStatistics_NumStats + // *FeatureNameStatistics_StringStats + // *FeatureNameStatistics_BytesStats + // *FeatureNameStatistics_StructStats + Stats isFeatureNameStatistics_Stats `protobuf_oneof:"stats"` + // Any custom statistics can be stored in this list. + CustomStats []*CustomStatistic `protobuf:"bytes,6,rep,name=custom_stats,json=customStats,proto3" json:"custom_stats,omitempty"` +} + +func (x *FeatureNameStatistics) Reset() { + *x = FeatureNameStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *FeatureNameStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FeatureNameStatistics) ProtoMessage() {} + +func (x *FeatureNameStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FeatureNameStatistics.ProtoReflect.Descriptor instead. +func (*FeatureNameStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{7} +} + +func (m *FeatureNameStatistics) GetFieldId() isFeatureNameStatistics_FieldId { + if m != nil { + return m.FieldId + } + return nil +} + +func (x *FeatureNameStatistics) GetName() string { + if x, ok := x.GetFieldId().(*FeatureNameStatistics_Name); ok { + return x.Name + } + return "" +} + +func (x *FeatureNameStatistics) GetPath() *Path { + if x, ok := x.GetFieldId().(*FeatureNameStatistics_Path); ok { + return x.Path + } + return nil +} + +func (x *FeatureNameStatistics) GetType() FeatureNameStatistics_Type { + if x != nil { + return x.Type + } + return FeatureNameStatistics_INT +} + +func (m *FeatureNameStatistics) GetStats() isFeatureNameStatistics_Stats { + if m != nil { + return m.Stats + } + return nil +} + +func (x *FeatureNameStatistics) GetNumStats() *NumericStatistics { + if x, ok := x.GetStats().(*FeatureNameStatistics_NumStats); ok { + return x.NumStats + } + return nil +} + +func (x *FeatureNameStatistics) GetStringStats() *StringStatistics { + if x, ok := x.GetStats().(*FeatureNameStatistics_StringStats); ok { + return x.StringStats + } + return nil +} + +func (x *FeatureNameStatistics) GetBytesStats() *BytesStatistics { + if x, ok := x.GetStats().(*FeatureNameStatistics_BytesStats); ok { + return x.BytesStats + } + return nil +} + +func (x *FeatureNameStatistics) GetStructStats() *StructStatistics { + if x, ok := x.GetStats().(*FeatureNameStatistics_StructStats); ok { + return x.StructStats + } + return nil +} + +func (x *FeatureNameStatistics) GetCustomStats() []*CustomStatistic { + if x != nil { + return x.CustomStats + } + return nil +} + +type isFeatureNameStatistics_FieldId interface { + isFeatureNameStatistics_FieldId() +} + +type FeatureNameStatistics_Name struct { + // The feature name + Name string `protobuf:"bytes,1,opt,name=name,proto3,oneof"` +} + +type FeatureNameStatistics_Path struct { + // The path of the feature. + Path *Path `protobuf:"bytes,8,opt,name=path,proto3,oneof"` +} + +func (*FeatureNameStatistics_Name) isFeatureNameStatistics_FieldId() {} + +func (*FeatureNameStatistics_Path) isFeatureNameStatistics_FieldId() {} + +type isFeatureNameStatistics_Stats interface { + isFeatureNameStatistics_Stats() +} + +type FeatureNameStatistics_NumStats struct { + NumStats *NumericStatistics `protobuf:"bytes,3,opt,name=num_stats,json=numStats,proto3,oneof"` +} + +type FeatureNameStatistics_StringStats struct { + StringStats *StringStatistics `protobuf:"bytes,4,opt,name=string_stats,json=stringStats,proto3,oneof"` +} + +type FeatureNameStatistics_BytesStats struct { + BytesStats *BytesStatistics `protobuf:"bytes,5,opt,name=bytes_stats,json=bytesStats,proto3,oneof"` +} + +type FeatureNameStatistics_StructStats struct { + StructStats *StructStatistics `protobuf:"bytes,7,opt,name=struct_stats,json=structStats,proto3,oneof"` +} + +func (*FeatureNameStatistics_NumStats) isFeatureNameStatistics_Stats() {} + +func (*FeatureNameStatistics_StringStats) isFeatureNameStatistics_Stats() {} + +func (*FeatureNameStatistics_BytesStats) isFeatureNameStatistics_Stats() {} + +func (*FeatureNameStatistics_StructStats) isFeatureNameStatistics_Stats() {} + +// Common weighted statistics for all feature types. Statistics counting number +// of values (i.e., avg_num_values and tot_num_values) include NaNs. +// If the weighted column is missing, then this counts as a weight of 1 +// for that example. +type WeightedCommonStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Weighted number of examples not missing. + NumNonMissing float64 `protobuf:"fixed64,1,opt,name=num_non_missing,json=numNonMissing,proto3" json:"num_non_missing,omitempty"` + // Weighted number of examples missing. + // Note that if the weighted column is zero, this does not count + // as missing. + NumMissing float64 `protobuf:"fixed64,2,opt,name=num_missing,json=numMissing,proto3" json:"num_missing,omitempty"` + // average number of values, weighted by the number of examples. + AvgNumValues float64 `protobuf:"fixed64,3,opt,name=avg_num_values,json=avgNumValues,proto3" json:"avg_num_values,omitempty"` + // tot_num_values = avg_num_values * num_non_missing. + // This is calculated directly, so should have less numerical error. + TotNumValues float64 `protobuf:"fixed64,4,opt,name=tot_num_values,json=totNumValues,proto3" json:"tot_num_values,omitempty"` +} + +func (x *WeightedCommonStatistics) Reset() { + *x = WeightedCommonStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *WeightedCommonStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*WeightedCommonStatistics) ProtoMessage() {} + +func (x *WeightedCommonStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[8] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use WeightedCommonStatistics.ProtoReflect.Descriptor instead. +func (*WeightedCommonStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{8} +} + +func (x *WeightedCommonStatistics) GetNumNonMissing() float64 { + if x != nil { + return x.NumNonMissing + } + return 0 +} + +func (x *WeightedCommonStatistics) GetNumMissing() float64 { + if x != nil { + return x.NumMissing + } + return 0 +} + +func (x *WeightedCommonStatistics) GetAvgNumValues() float64 { + if x != nil { + return x.AvgNumValues + } + return 0 +} + +func (x *WeightedCommonStatistics) GetTotNumValues() float64 { + if x != nil { + return x.TotNumValues + } + return 0 +} + +// Stores the name and value of any custom statistic. The value can be a string, +// double, or histogram. +type CustomStatistic struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + // Types that are assignable to Val: + // *CustomStatistic_Num + // *CustomStatistic_Str + // *CustomStatistic_Histogram + // *CustomStatistic_RankHistogram + Val isCustomStatistic_Val `protobuf_oneof:"val"` +} + +func (x *CustomStatistic) Reset() { + *x = CustomStatistic{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CustomStatistic) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CustomStatistic) ProtoMessage() {} + +func (x *CustomStatistic) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[9] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CustomStatistic.ProtoReflect.Descriptor instead. +func (*CustomStatistic) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{9} +} + +func (x *CustomStatistic) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (m *CustomStatistic) GetVal() isCustomStatistic_Val { + if m != nil { + return m.Val + } + return nil +} + +func (x *CustomStatistic) GetNum() float64 { + if x, ok := x.GetVal().(*CustomStatistic_Num); ok { + return x.Num + } + return 0 +} + +func (x *CustomStatistic) GetStr() string { + if x, ok := x.GetVal().(*CustomStatistic_Str); ok { + return x.Str + } + return "" +} + +func (x *CustomStatistic) GetHistogram() *Histogram { + if x, ok := x.GetVal().(*CustomStatistic_Histogram); ok { + return x.Histogram + } + return nil +} + +func (x *CustomStatistic) GetRankHistogram() *RankHistogram { + if x, ok := x.GetVal().(*CustomStatistic_RankHistogram); ok { + return x.RankHistogram + } + return nil +} + +type isCustomStatistic_Val interface { + isCustomStatistic_Val() +} + +type CustomStatistic_Num struct { + Num float64 `protobuf:"fixed64,2,opt,name=num,proto3,oneof"` +} + +type CustomStatistic_Str struct { + Str string `protobuf:"bytes,3,opt,name=str,proto3,oneof"` +} + +type CustomStatistic_Histogram struct { + Histogram *Histogram `protobuf:"bytes,4,opt,name=histogram,proto3,oneof"` +} + +type CustomStatistic_RankHistogram struct { + RankHistogram *RankHistogram `protobuf:"bytes,5,opt,name=rank_histogram,json=rankHistogram,proto3,oneof"` +} + +func (*CustomStatistic_Num) isCustomStatistic_Val() {} + +func (*CustomStatistic_Str) isCustomStatistic_Val() {} + +func (*CustomStatistic_Histogram) isCustomStatistic_Val() {} + +func (*CustomStatistic_RankHistogram) isCustomStatistic_Val() {} + +// Statistics for a numeric feature in a dataset. +type NumericStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + CommonStats *CommonStatistics `protobuf:"bytes,1,opt,name=common_stats,json=commonStats,proto3" json:"common_stats,omitempty"` + // The mean of the values + Mean float64 `protobuf:"fixed64,2,opt,name=mean,proto3" json:"mean,omitempty"` + // The standard deviation of the values + StdDev float64 `protobuf:"fixed64,3,opt,name=std_dev,json=stdDev,proto3" json:"std_dev,omitempty"` + // The number of values that equal 0 + NumZeros uint64 `protobuf:"varint,4,opt,name=num_zeros,json=numZeros,proto3" json:"num_zeros,omitempty"` + // The minimum value + Min float64 `protobuf:"fixed64,5,opt,name=min,proto3" json:"min,omitempty"` + // The median value + Median float64 `protobuf:"fixed64,6,opt,name=median,proto3" json:"median,omitempty"` + // The maximum value + Max float64 `protobuf:"fixed64,7,opt,name=max,proto3" json:"max,omitempty"` + // The histogram(s) of the feature values. + Histograms []*Histogram `protobuf:"bytes,8,rep,name=histograms,proto3" json:"histograms,omitempty"` + // Weighted statistics for the feature, if the values have weights. + WeightedNumericStats *WeightedNumericStatistics `protobuf:"bytes,9,opt,name=weighted_numeric_stats,json=weightedNumericStats,proto3" json:"weighted_numeric_stats,omitempty"` +} + +func (x *NumericStatistics) Reset() { + *x = NumericStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *NumericStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*NumericStatistics) ProtoMessage() {} + +func (x *NumericStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[10] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use NumericStatistics.ProtoReflect.Descriptor instead. +func (*NumericStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{10} +} + +func (x *NumericStatistics) GetCommonStats() *CommonStatistics { + if x != nil { + return x.CommonStats + } + return nil +} + +func (x *NumericStatistics) GetMean() float64 { + if x != nil { + return x.Mean + } + return 0 +} + +func (x *NumericStatistics) GetStdDev() float64 { + if x != nil { + return x.StdDev + } + return 0 +} + +func (x *NumericStatistics) GetNumZeros() uint64 { + if x != nil { + return x.NumZeros + } + return 0 +} + +func (x *NumericStatistics) GetMin() float64 { + if x != nil { + return x.Min + } + return 0 +} + +func (x *NumericStatistics) GetMedian() float64 { + if x != nil { + return x.Median + } + return 0 +} + +func (x *NumericStatistics) GetMax() float64 { + if x != nil { + return x.Max + } + return 0 +} + +func (x *NumericStatistics) GetHistograms() []*Histogram { + if x != nil { + return x.Histograms + } + return nil +} + +func (x *NumericStatistics) GetWeightedNumericStats() *WeightedNumericStatistics { + if x != nil { + return x.WeightedNumericStats + } + return nil +} + +// Statistics for a string feature in a dataset. +type StringStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + CommonStats *CommonStatistics `protobuf:"bytes,1,opt,name=common_stats,json=commonStats,proto3" json:"common_stats,omitempty"` + // The number of unique values + Unique uint64 `protobuf:"varint,2,opt,name=unique,proto3" json:"unique,omitempty"` + // A sorted list of the most-frequent values and their frequencies, with + // the most-frequent being first. + TopValues []*StringStatistics_FreqAndValue `protobuf:"bytes,3,rep,name=top_values,json=topValues,proto3" json:"top_values,omitempty"` + // The average length of the values + AvgLength float32 `protobuf:"fixed32,4,opt,name=avg_length,json=avgLength,proto3" json:"avg_length,omitempty"` + // The rank histogram for the values of the feature. + // The rank is used to measure of how commonly the value is found in the + // dataset. The most common value would have a rank of 1, with the second-most + // common value having a rank of 2, and so on. + RankHistogram *RankHistogram `protobuf:"bytes,5,opt,name=rank_histogram,json=rankHistogram,proto3" json:"rank_histogram,omitempty"` + // Weighted statistics for the feature, if the values have weights. + WeightedStringStats *WeightedStringStatistics `protobuf:"bytes,6,opt,name=weighted_string_stats,json=weightedStringStats,proto3" json:"weighted_string_stats,omitempty"` + // A vocabulary file, used for vocabularies too large to store in the proto + // itself. Note that the file may be relative to some context-dependent + // directory. E.g. in TFX the feature statistics will live in a PPP and + // vocabulary file names will be relative to this PPP. + VocabularyFile string `protobuf:"bytes,7,opt,name=vocabulary_file,json=vocabularyFile,proto3" json:"vocabulary_file,omitempty"` +} + +func (x *StringStatistics) Reset() { + *x = StringStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StringStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StringStatistics) ProtoMessage() {} + +func (x *StringStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[11] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StringStatistics.ProtoReflect.Descriptor instead. +func (*StringStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{11} +} + +func (x *StringStatistics) GetCommonStats() *CommonStatistics { + if x != nil { + return x.CommonStats + } + return nil +} + +func (x *StringStatistics) GetUnique() uint64 { + if x != nil { + return x.Unique + } + return 0 +} + +func (x *StringStatistics) GetTopValues() []*StringStatistics_FreqAndValue { + if x != nil { + return x.TopValues + } + return nil +} + +func (x *StringStatistics) GetAvgLength() float32 { + if x != nil { + return x.AvgLength + } + return 0 +} + +func (x *StringStatistics) GetRankHistogram() *RankHistogram { + if x != nil { + return x.RankHistogram + } + return nil +} + +func (x *StringStatistics) GetWeightedStringStats() *WeightedStringStatistics { + if x != nil { + return x.WeightedStringStats + } + return nil +} + +func (x *StringStatistics) GetVocabularyFile() string { + if x != nil { + return x.VocabularyFile + } + return "" +} + +// Statistics for a weighted numeric feature in a dataset. +type WeightedNumericStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The weighted mean of the values + Mean float64 `protobuf:"fixed64,1,opt,name=mean,proto3" json:"mean,omitempty"` + // The weighted standard deviation of the values + StdDev float64 `protobuf:"fixed64,2,opt,name=std_dev,json=stdDev,proto3" json:"std_dev,omitempty"` + // The weighted median of the values + Median float64 `protobuf:"fixed64,3,opt,name=median,proto3" json:"median,omitempty"` + // The histogram(s) of the weighted feature values. + Histograms []*Histogram `protobuf:"bytes,4,rep,name=histograms,proto3" json:"histograms,omitempty"` +} + +func (x *WeightedNumericStatistics) Reset() { + *x = WeightedNumericStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *WeightedNumericStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*WeightedNumericStatistics) ProtoMessage() {} + +func (x *WeightedNumericStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[12] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use WeightedNumericStatistics.ProtoReflect.Descriptor instead. +func (*WeightedNumericStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{12} +} + +func (x *WeightedNumericStatistics) GetMean() float64 { + if x != nil { + return x.Mean + } + return 0 +} + +func (x *WeightedNumericStatistics) GetStdDev() float64 { + if x != nil { + return x.StdDev + } + return 0 +} + +func (x *WeightedNumericStatistics) GetMedian() float64 { + if x != nil { + return x.Median + } + return 0 +} + +func (x *WeightedNumericStatistics) GetHistograms() []*Histogram { + if x != nil { + return x.Histograms + } + return nil +} + +// Statistics for a weighted string feature in a dataset. +type WeightedStringStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // A sorted list of the most-frequent values and their weighted frequencies, + // with the most-frequent being first. + TopValues []*StringStatistics_FreqAndValue `protobuf:"bytes,1,rep,name=top_values,json=topValues,proto3" json:"top_values,omitempty"` + // The rank histogram for the weighted values of the feature. + RankHistogram *RankHistogram `protobuf:"bytes,2,opt,name=rank_histogram,json=rankHistogram,proto3" json:"rank_histogram,omitempty"` +} + +func (x *WeightedStringStatistics) Reset() { + *x = WeightedStringStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *WeightedStringStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*WeightedStringStatistics) ProtoMessage() {} + +func (x *WeightedStringStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[13] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use WeightedStringStatistics.ProtoReflect.Descriptor instead. +func (*WeightedStringStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{13} +} + +func (x *WeightedStringStatistics) GetTopValues() []*StringStatistics_FreqAndValue { + if x != nil { + return x.TopValues + } + return nil +} + +func (x *WeightedStringStatistics) GetRankHistogram() *RankHistogram { + if x != nil { + return x.RankHistogram + } + return nil +} + +// Statistics for a bytes feature in a dataset. +type BytesStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + CommonStats *CommonStatistics `protobuf:"bytes,1,opt,name=common_stats,json=commonStats,proto3" json:"common_stats,omitempty"` + // The number of unique values + Unique uint64 `protobuf:"varint,2,opt,name=unique,proto3" json:"unique,omitempty"` + // The average number of bytes in a value + AvgNumBytes float32 `protobuf:"fixed32,3,opt,name=avg_num_bytes,json=avgNumBytes,proto3" json:"avg_num_bytes,omitempty"` + // The minimum number of bytes in a value + MinNumBytes float32 `protobuf:"fixed32,4,opt,name=min_num_bytes,json=minNumBytes,proto3" json:"min_num_bytes,omitempty"` + // The maximum number of bytes in a value + MaxNumBytes float32 `protobuf:"fixed32,5,opt,name=max_num_bytes,json=maxNumBytes,proto3" json:"max_num_bytes,omitempty"` +} + +func (x *BytesStatistics) Reset() { + *x = BytesStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *BytesStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*BytesStatistics) ProtoMessage() {} + +func (x *BytesStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[14] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use BytesStatistics.ProtoReflect.Descriptor instead. +func (*BytesStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{14} +} + +func (x *BytesStatistics) GetCommonStats() *CommonStatistics { + if x != nil { + return x.CommonStats + } + return nil +} + +func (x *BytesStatistics) GetUnique() uint64 { + if x != nil { + return x.Unique + } + return 0 +} + +func (x *BytesStatistics) GetAvgNumBytes() float32 { + if x != nil { + return x.AvgNumBytes + } + return 0 +} + +func (x *BytesStatistics) GetMinNumBytes() float32 { + if x != nil { + return x.MinNumBytes + } + return 0 +} + +func (x *BytesStatistics) GetMaxNumBytes() float32 { + if x != nil { + return x.MaxNumBytes + } + return 0 +} + +type StructStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + CommonStats *CommonStatistics `protobuf:"bytes,1,opt,name=common_stats,json=commonStats,proto3" json:"common_stats,omitempty"` +} + +func (x *StructStatistics) Reset() { + *x = StructStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[15] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StructStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StructStatistics) ProtoMessage() {} + +func (x *StructStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[15] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StructStatistics.ProtoReflect.Descriptor instead. +func (*StructStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{15} +} + +func (x *StructStatistics) GetCommonStats() *CommonStatistics { + if x != nil { + return x.CommonStats + } + return nil +} + +// Common statistics for all feature types. Statistics counting number of values +// (i.e., min_num_values, max_num_values, avg_num_values, and tot_num_values) +// include NaNs. +type CommonStatistics struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The number of examples with at least one value for this feature. + NumNonMissing uint64 `protobuf:"varint,1,opt,name=num_non_missing,json=numNonMissing,proto3" json:"num_non_missing,omitempty"` + // The number of examples with no values for this feature. + NumMissing uint64 `protobuf:"varint,2,opt,name=num_missing,json=numMissing,proto3" json:"num_missing,omitempty"` + // The minimum number of values in a single example for this feature. + MinNumValues uint64 `protobuf:"varint,3,opt,name=min_num_values,json=minNumValues,proto3" json:"min_num_values,omitempty"` + // The maximum number of values in a single example for this feature. + MaxNumValues uint64 `protobuf:"varint,4,opt,name=max_num_values,json=maxNumValues,proto3" json:"max_num_values,omitempty"` + // The average number of values in a single example for this feature. + AvgNumValues float32 `protobuf:"fixed32,5,opt,name=avg_num_values,json=avgNumValues,proto3" json:"avg_num_values,omitempty"` + // tot_num_values = avg_num_values * num_non_missing. + // This is calculated directly, so should have less numerical error. + TotNumValues uint64 `protobuf:"varint,8,opt,name=tot_num_values,json=totNumValues,proto3" json:"tot_num_values,omitempty"` + // The quantiles histogram for the number of values in this feature. + NumValuesHistogram *Histogram `protobuf:"bytes,6,opt,name=num_values_histogram,json=numValuesHistogram,proto3" json:"num_values_histogram,omitempty"` + WeightedCommonStats *WeightedCommonStatistics `protobuf:"bytes,7,opt,name=weighted_common_stats,json=weightedCommonStats,proto3" json:"weighted_common_stats,omitempty"` + // The histogram for the number of features in the feature list (only set if + // this feature is a non-context feature from a tf.SequenceExample). + // This is different from num_values_histogram, as num_values_histogram tracks + // the count of all values for a feature in an example, whereas this tracks + // the length of the feature list for this feature in an example (where each + // feature list can contain multiple values). + FeatureListLengthHistogram *Histogram `protobuf:"bytes,9,opt,name=feature_list_length_histogram,json=featureListLengthHistogram,proto3" json:"feature_list_length_histogram,omitempty"` +} + +func (x *CommonStatistics) Reset() { + *x = CommonStatistics{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[16] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommonStatistics) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommonStatistics) ProtoMessage() {} + +func (x *CommonStatistics) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[16] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommonStatistics.ProtoReflect.Descriptor instead. +func (*CommonStatistics) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{16} +} + +func (x *CommonStatistics) GetNumNonMissing() uint64 { + if x != nil { + return x.NumNonMissing + } + return 0 +} + +func (x *CommonStatistics) GetNumMissing() uint64 { + if x != nil { + return x.NumMissing + } + return 0 +} + +func (x *CommonStatistics) GetMinNumValues() uint64 { + if x != nil { + return x.MinNumValues + } + return 0 +} + +func (x *CommonStatistics) GetMaxNumValues() uint64 { + if x != nil { + return x.MaxNumValues + } + return 0 +} + +func (x *CommonStatistics) GetAvgNumValues() float32 { + if x != nil { + return x.AvgNumValues + } + return 0 +} + +func (x *CommonStatistics) GetTotNumValues() uint64 { + if x != nil { + return x.TotNumValues + } + return 0 +} + +func (x *CommonStatistics) GetNumValuesHistogram() *Histogram { + if x != nil { + return x.NumValuesHistogram + } + return nil +} + +func (x *CommonStatistics) GetWeightedCommonStats() *WeightedCommonStatistics { + if x != nil { + return x.WeightedCommonStats + } + return nil +} + +func (x *CommonStatistics) GetFeatureListLengthHistogram() *Histogram { + if x != nil { + return x.FeatureListLengthHistogram + } + return nil +} + +// The data used to create a histogram of a numeric feature for a dataset. +type Histogram struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The number of NaN values in the dataset. + NumNan uint64 `protobuf:"varint,1,opt,name=num_nan,json=numNan,proto3" json:"num_nan,omitempty"` + // The number of undefined values in the dataset. + NumUndefined uint64 `protobuf:"varint,2,opt,name=num_undefined,json=numUndefined,proto3" json:"num_undefined,omitempty"` + // A list of buckets in the histogram, sorted from lowest bucket to highest + // bucket. + Buckets []*Histogram_Bucket `protobuf:"bytes,3,rep,name=buckets,proto3" json:"buckets,omitempty"` + // The type of the histogram. + Type Histogram_HistogramType `protobuf:"varint,4,opt,name=type,proto3,enum=tensorflow.metadata.v0.Histogram_HistogramType" json:"type,omitempty"` + // An optional descriptive name of the histogram, to be used for labeling. + Name string `protobuf:"bytes,5,opt,name=name,proto3" json:"name,omitempty"` +} + +func (x *Histogram) Reset() { + *x = Histogram{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[17] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Histogram) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Histogram) ProtoMessage() {} + +func (x *Histogram) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[17] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Histogram.ProtoReflect.Descriptor instead. +func (*Histogram) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{17} +} + +func (x *Histogram) GetNumNan() uint64 { + if x != nil { + return x.NumNan + } + return 0 +} + +func (x *Histogram) GetNumUndefined() uint64 { + if x != nil { + return x.NumUndefined + } + return 0 +} + +func (x *Histogram) GetBuckets() []*Histogram_Bucket { + if x != nil { + return x.Buckets + } + return nil +} + +func (x *Histogram) GetType() Histogram_HistogramType { + if x != nil { + return x.Type + } + return Histogram_STANDARD +} + +func (x *Histogram) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +// The data used to create a rank histogram of a non-numeric feature of a +// dataset. The rank of a value in a feature can be used as a measure of how +// commonly the value is found in the entire dataset. With bucket sizes of one, +// this becomes a distribution function of all feature values. +type RankHistogram struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // A list of buckets in the histogram, sorted from lowest-ranked bucket to + // highest-ranked bucket. + Buckets []*RankHistogram_Bucket `protobuf:"bytes,1,rep,name=buckets,proto3" json:"buckets,omitempty"` + // An optional descriptive name of the histogram, to be used for labeling. + Name string `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"` +} + +func (x *RankHistogram) Reset() { + *x = RankHistogram{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[18] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *RankHistogram) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*RankHistogram) ProtoMessage() {} + +func (x *RankHistogram) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[18] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use RankHistogram.ProtoReflect.Descriptor instead. +func (*RankHistogram) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{18} +} + +func (x *RankHistogram) GetBuckets() []*RankHistogram_Bucket { + if x != nil { + return x.Buckets + } + return nil +} + +func (x *RankHistogram) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +// A bucket for referring to binned numeric features. +type LiftSeries_Bucket struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The low value of the bucket, inclusive. + LowValue float64 `protobuf:"fixed64,1,opt,name=low_value,json=lowValue,proto3" json:"low_value,omitempty"` + // The high value of the bucket, exclusive (unless the high_value is + // positive infinity). + HighValue float64 `protobuf:"fixed64,2,opt,name=high_value,json=highValue,proto3" json:"high_value,omitempty"` +} + +func (x *LiftSeries_Bucket) Reset() { + *x = LiftSeries_Bucket{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[19] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *LiftSeries_Bucket) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*LiftSeries_Bucket) ProtoMessage() {} + +func (x *LiftSeries_Bucket) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[19] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use LiftSeries_Bucket.ProtoReflect.Descriptor instead. +func (*LiftSeries_Bucket) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{6, 0} +} + +func (x *LiftSeries_Bucket) GetLowValue() float64 { + if x != nil { + return x.LowValue + } + return 0 +} + +func (x *LiftSeries_Bucket) GetHighValue() float64 { + if x != nil { + return x.HighValue + } + return 0 +} + +// A container for lift information about a specific value of path_x. +type LiftSeries_LiftValue struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Types that are assignable to XValue: + // *LiftSeries_LiftValue_XInt + // *LiftSeries_LiftValue_XString + XValue isLiftSeries_LiftValue_XValue `protobuf_oneof:"x_value"` + // P(path_y=y|path_x=x) / P(path_y=y) for x_value and the enclosing y_value. + // In terms of concrete fields, this number represents: + // (x_and_y_count / x_count) / (y_count / num_examples) + Lift float64 `protobuf:"fixed64,3,opt,name=lift,proto3" json:"lift,omitempty"` + // The number of examples in which x_value appears. + // + // Types that are assignable to XCountValue: + // *LiftSeries_LiftValue_XCount + // *LiftSeries_LiftValue_WeightedXCount + XCountValue isLiftSeries_LiftValue_XCountValue `protobuf_oneof:"x_count_value"` + // The number of examples in which x_value appears and y_value appears. + // + // Types that are assignable to XAndYCountValue: + // *LiftSeries_LiftValue_XAndYCount + // *LiftSeries_LiftValue_WeightedXAndYCount + XAndYCountValue isLiftSeries_LiftValue_XAndYCountValue `protobuf_oneof:"x_and_y_count_value"` +} + +func (x *LiftSeries_LiftValue) Reset() { + *x = LiftSeries_LiftValue{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[20] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *LiftSeries_LiftValue) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*LiftSeries_LiftValue) ProtoMessage() {} + +func (x *LiftSeries_LiftValue) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[20] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use LiftSeries_LiftValue.ProtoReflect.Descriptor instead. +func (*LiftSeries_LiftValue) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{6, 1} +} + +func (m *LiftSeries_LiftValue) GetXValue() isLiftSeries_LiftValue_XValue { + if m != nil { + return m.XValue + } + return nil +} + +func (x *LiftSeries_LiftValue) GetXInt() int32 { + if x, ok := x.GetXValue().(*LiftSeries_LiftValue_XInt); ok { + return x.XInt + } + return 0 +} + +func (x *LiftSeries_LiftValue) GetXString() string { + if x, ok := x.GetXValue().(*LiftSeries_LiftValue_XString); ok { + return x.XString + } + return "" +} + +func (x *LiftSeries_LiftValue) GetLift() float64 { + if x != nil { + return x.Lift + } + return 0 +} + +func (m *LiftSeries_LiftValue) GetXCountValue() isLiftSeries_LiftValue_XCountValue { + if m != nil { + return m.XCountValue + } + return nil +} + +func (x *LiftSeries_LiftValue) GetXCount() uint64 { + if x, ok := x.GetXCountValue().(*LiftSeries_LiftValue_XCount); ok { + return x.XCount + } + return 0 +} + +func (x *LiftSeries_LiftValue) GetWeightedXCount() float64 { + if x, ok := x.GetXCountValue().(*LiftSeries_LiftValue_WeightedXCount); ok { + return x.WeightedXCount + } + return 0 +} + +func (m *LiftSeries_LiftValue) GetXAndYCountValue() isLiftSeries_LiftValue_XAndYCountValue { + if m != nil { + return m.XAndYCountValue + } + return nil +} + +func (x *LiftSeries_LiftValue) GetXAndYCount() uint64 { + if x, ok := x.GetXAndYCountValue().(*LiftSeries_LiftValue_XAndYCount); ok { + return x.XAndYCount + } + return 0 +} + +func (x *LiftSeries_LiftValue) GetWeightedXAndYCount() float64 { + if x, ok := x.GetXAndYCountValue().(*LiftSeries_LiftValue_WeightedXAndYCount); ok { + return x.WeightedXAndYCount + } + return 0 +} + +type isLiftSeries_LiftValue_XValue interface { + isLiftSeries_LiftValue_XValue() +} + +type LiftSeries_LiftValue_XInt struct { + XInt int32 `protobuf:"varint,1,opt,name=x_int,json=xInt,proto3,oneof"` +} + +type LiftSeries_LiftValue_XString struct { + XString string `protobuf:"bytes,2,opt,name=x_string,json=xString,proto3,oneof"` +} + +func (*LiftSeries_LiftValue_XInt) isLiftSeries_LiftValue_XValue() {} + +func (*LiftSeries_LiftValue_XString) isLiftSeries_LiftValue_XValue() {} + +type isLiftSeries_LiftValue_XCountValue interface { + isLiftSeries_LiftValue_XCountValue() +} + +type LiftSeries_LiftValue_XCount struct { + XCount uint64 `protobuf:"varint,4,opt,name=x_count,json=xCount,proto3,oneof"` +} + +type LiftSeries_LiftValue_WeightedXCount struct { + WeightedXCount float64 `protobuf:"fixed64,5,opt,name=weighted_x_count,json=weightedXCount,proto3,oneof"` +} + +func (*LiftSeries_LiftValue_XCount) isLiftSeries_LiftValue_XCountValue() {} + +func (*LiftSeries_LiftValue_WeightedXCount) isLiftSeries_LiftValue_XCountValue() {} + +type isLiftSeries_LiftValue_XAndYCountValue interface { + isLiftSeries_LiftValue_XAndYCountValue() +} + +type LiftSeries_LiftValue_XAndYCount struct { + XAndYCount uint64 `protobuf:"varint,6,opt,name=x_and_y_count,json=xAndYCount,proto3,oneof"` +} + +type LiftSeries_LiftValue_WeightedXAndYCount struct { + WeightedXAndYCount float64 `protobuf:"fixed64,7,opt,name=weighted_x_and_y_count,json=weightedXAndYCount,proto3,oneof"` +} + +func (*LiftSeries_LiftValue_XAndYCount) isLiftSeries_LiftValue_XAndYCountValue() {} + +func (*LiftSeries_LiftValue_WeightedXAndYCount) isLiftSeries_LiftValue_XAndYCountValue() {} + +type StringStatistics_FreqAndValue struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` + // The number of times the value occurs. Stored as a double to be able to + // handle weighted features. + Frequency float64 `protobuf:"fixed64,3,opt,name=frequency,proto3" json:"frequency,omitempty"` +} + +func (x *StringStatistics_FreqAndValue) Reset() { + *x = StringStatistics_FreqAndValue{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[21] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StringStatistics_FreqAndValue) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StringStatistics_FreqAndValue) ProtoMessage() {} + +func (x *StringStatistics_FreqAndValue) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[21] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StringStatistics_FreqAndValue.ProtoReflect.Descriptor instead. +func (*StringStatistics_FreqAndValue) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{11, 0} +} + +func (x *StringStatistics_FreqAndValue) GetValue() string { + if x != nil { + return x.Value + } + return "" +} + +func (x *StringStatistics_FreqAndValue) GetFrequency() float64 { + if x != nil { + return x.Frequency + } + return 0 +} + +// Each bucket defines its low and high values along with its count. The +// low and high values must be a real number or positive or negative +// infinity. They cannot be NaN or undefined. Counts of those special values +// can be found in the numNaN and numUndefined fields. +type Histogram_Bucket struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The low value of the bucket, inclusive. + LowValue float64 `protobuf:"fixed64,1,opt,name=low_value,json=lowValue,proto3" json:"low_value,omitempty"` + // The high value of the bucket, exclusive (unless the highValue is + // positive infinity). + HighValue float64 `protobuf:"fixed64,2,opt,name=high_value,json=highValue,proto3" json:"high_value,omitempty"` + // The number of items in the bucket. Stored as a double to be able to + // handle weighted histograms. + SampleCount float64 `protobuf:"fixed64,4,opt,name=sample_count,json=sampleCount,proto3" json:"sample_count,omitempty"` +} + +func (x *Histogram_Bucket) Reset() { + *x = Histogram_Bucket{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[22] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Histogram_Bucket) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Histogram_Bucket) ProtoMessage() {} + +func (x *Histogram_Bucket) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[22] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Histogram_Bucket.ProtoReflect.Descriptor instead. +func (*Histogram_Bucket) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{17, 0} +} + +func (x *Histogram_Bucket) GetLowValue() float64 { + if x != nil { + return x.LowValue + } + return 0 +} + +func (x *Histogram_Bucket) GetHighValue() float64 { + if x != nil { + return x.HighValue + } + return 0 +} + +func (x *Histogram_Bucket) GetSampleCount() float64 { + if x != nil { + return x.SampleCount + } + return 0 +} + +// Each bucket defines its start and end ranks along with its count. +type RankHistogram_Bucket struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The low rank of the bucket, inclusive. + LowRank uint64 `protobuf:"varint,1,opt,name=low_rank,json=lowRank,proto3" json:"low_rank,omitempty"` + // The high rank of the bucket, exclusive. + HighRank uint64 `protobuf:"varint,2,opt,name=high_rank,json=highRank,proto3" json:"high_rank,omitempty"` + // The label for the bucket. Can be used to list or summarize the values in + // this rank bucket. + Label string `protobuf:"bytes,4,opt,name=label,proto3" json:"label,omitempty"` + // The number of items in the bucket. Stored as a double to be able to + // handle weighted histograms. + SampleCount float64 `protobuf:"fixed64,5,opt,name=sample_count,json=sampleCount,proto3" json:"sample_count,omitempty"` +} + +func (x *RankHistogram_Bucket) Reset() { + *x = RankHistogram_Bucket{} + if protoimpl.UnsafeEnabled { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[23] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *RankHistogram_Bucket) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*RankHistogram_Bucket) ProtoMessage() {} + +func (x *RankHistogram_Bucket) ProtoReflect() protoreflect.Message { + mi := &file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[23] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use RankHistogram_Bucket.ProtoReflect.Descriptor instead. +func (*RankHistogram_Bucket) Descriptor() ([]byte, []int) { + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP(), []int{18, 0} +} + +func (x *RankHistogram_Bucket) GetLowRank() uint64 { + if x != nil { + return x.LowRank + } + return 0 +} + +func (x *RankHistogram_Bucket) GetHighRank() uint64 { + if x != nil { + return x.HighRank + } + return 0 +} + +func (x *RankHistogram_Bucket) GetLabel() string { + if x != nil { + return x.Label + } + return "" +} + +func (x *RankHistogram_Bucket) GetSampleCount() float64 { + if x != nil { + return x.SampleCount + } + return 0 +} + +var File_tensorflow_metadata_proto_v0_statistics_proto protoreflect.FileDescriptor + +var file_tensorflow_metadata_proto_v0_statistics_proto_rawDesc = []byte{ + 0x0a, 0x2d, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x76, 0x30, 0x2f, 0x73, + 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, + 0x16, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x1a, 0x27, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, + 0x6c, 0x6f, 0x77, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2f, 0x76, 0x30, 0x2f, 0x70, 0x61, 0x74, 0x68, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x22, 0x6c, 0x0a, 0x1c, 0x44, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x46, 0x65, 0x61, 0x74, 0x75, + 0x72, 0x65, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x4c, 0x69, 0x73, 0x74, + 0x12, 0x4c, 0x0a, 0x08, 0x64, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x30, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x44, 0x61, 0x74, 0x61, + 0x73, 0x65, 0x74, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, + 0x74, 0x69, 0x63, 0x73, 0x52, 0x08, 0x64, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x22, 0xa7, + 0x02, 0x0a, 0x18, 0x44, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, + 0x65, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, + 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, + 0x21, 0x0a, 0x0c, 0x6e, 0x75, 0x6d, 0x5f, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x73, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x6e, 0x75, 0x6d, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, + 0x65, 0x73, 0x12, 0x32, 0x0a, 0x15, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x5f, 0x6e, + 0x75, 0x6d, 0x5f, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x01, 0x52, 0x13, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x4e, 0x75, 0x6d, 0x45, 0x78, + 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x73, 0x12, 0x49, 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, + 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, + 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, + 0x30, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x53, 0x74, 0x61, + 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x73, 0x12, 0x55, 0x0a, 0x0e, 0x63, 0x72, 0x6f, 0x73, 0x73, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, + 0x72, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x74, 0x65, 0x6e, 0x73, + 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x76, 0x30, 0x2e, 0x43, 0x72, 0x6f, 0x73, 0x73, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, + 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x52, 0x0d, 0x63, 0x72, 0x6f, 0x73, 0x73, + 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x22, 0xef, 0x02, 0x0a, 0x16, 0x43, 0x72, 0x6f, + 0x73, 0x73, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, + 0x69, 0x63, 0x73, 0x12, 0x33, 0x0a, 0x06, 0x70, 0x61, 0x74, 0x68, 0x5f, 0x78, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, + 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x50, 0x61, 0x74, + 0x68, 0x52, 0x05, 0x70, 0x61, 0x74, 0x68, 0x58, 0x12, 0x33, 0x0a, 0x06, 0x70, 0x61, 0x74, 0x68, + 0x5f, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, + 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, + 0x30, 0x2e, 0x50, 0x61, 0x74, 0x68, 0x52, 0x05, 0x70, 0x61, 0x74, 0x68, 0x59, 0x12, 0x14, 0x0a, + 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x63, 0x6f, + 0x75, 0x6e, 0x74, 0x12, 0x58, 0x0a, 0x0f, 0x6e, 0x75, 0x6d, 0x5f, 0x63, 0x72, 0x6f, 0x73, 0x73, + 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x74, + 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x4e, 0x75, 0x6d, 0x65, 0x72, 0x69, 0x63, 0x43, 0x72, 0x6f, + 0x73, 0x73, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x48, 0x00, 0x52, 0x0d, + 0x6e, 0x75, 0x6d, 0x43, 0x72, 0x6f, 0x73, 0x73, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x6c, 0x0a, + 0x17, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, 0x63, 0x61, 0x6c, 0x5f, 0x63, 0x72, 0x6f, + 0x73, 0x73, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x32, + 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x43, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, + 0x63, 0x61, 0x6c, 0x43, 0x72, 0x6f, 0x73, 0x73, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, + 0x63, 0x73, 0x48, 0x00, 0x52, 0x15, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, 0x63, 0x61, + 0x6c, 0x43, 0x72, 0x6f, 0x73, 0x73, 0x53, 0x74, 0x61, 0x74, 0x73, 0x42, 0x0d, 0x0a, 0x0b, 0x63, + 0x72, 0x6f, 0x73, 0x73, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x22, 0x5a, 0x0a, 0x16, 0x4e, 0x75, + 0x6d, 0x65, 0x72, 0x69, 0x63, 0x43, 0x72, 0x6f, 0x73, 0x73, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, + 0x74, 0x69, 0x63, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x63, 0x6f, 0x72, 0x72, 0x65, 0x6c, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x63, 0x6f, 0x72, 0x72, 0x65, + 0x6c, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1e, 0x0a, 0x0a, 0x63, 0x6f, 0x76, 0x61, 0x72, 0x69, + 0x61, 0x6e, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x63, 0x6f, 0x76, 0x61, + 0x72, 0x69, 0x61, 0x6e, 0x63, 0x65, 0x22, 0x58, 0x0a, 0x1a, 0x43, 0x61, 0x74, 0x65, 0x67, 0x6f, + 0x72, 0x69, 0x63, 0x61, 0x6c, 0x43, 0x72, 0x6f, 0x73, 0x73, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, + 0x74, 0x69, 0x63, 0x73, 0x12, 0x3a, 0x0a, 0x04, 0x6c, 0x69, 0x66, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x4c, 0x69, 0x66, 0x74, + 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x52, 0x04, 0x6c, 0x69, 0x66, 0x74, + 0x22, 0xab, 0x01, 0x0a, 0x0e, 0x4c, 0x69, 0x66, 0x74, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, + 0x69, 0x63, 0x73, 0x12, 0x43, 0x0a, 0x0b, 0x6c, 0x69, 0x66, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x69, + 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, + 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, + 0x30, 0x2e, 0x4c, 0x69, 0x66, 0x74, 0x53, 0x65, 0x72, 0x69, 0x65, 0x73, 0x52, 0x0a, 0x6c, 0x69, + 0x66, 0x74, 0x53, 0x65, 0x72, 0x69, 0x65, 0x73, 0x12, 0x54, 0x0a, 0x14, 0x77, 0x65, 0x69, 0x67, + 0x68, 0x74, 0x65, 0x64, 0x5f, 0x6c, 0x69, 0x66, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x69, 0x65, 0x73, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, + 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, + 0x4c, 0x69, 0x66, 0x74, 0x53, 0x65, 0x72, 0x69, 0x65, 0x73, 0x52, 0x12, 0x77, 0x65, 0x69, 0x67, + 0x68, 0x74, 0x65, 0x64, 0x4c, 0x69, 0x66, 0x74, 0x53, 0x65, 0x72, 0x69, 0x65, 0x73, 0x22, 0xab, + 0x05, 0x0a, 0x0a, 0x4c, 0x69, 0x66, 0x74, 0x53, 0x65, 0x72, 0x69, 0x65, 0x73, 0x12, 0x15, 0x0a, + 0x05, 0x79, 0x5f, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x48, 0x00, 0x52, 0x04, + 0x79, 0x49, 0x6e, 0x74, 0x12, 0x1b, 0x0a, 0x08, 0x79, 0x5f, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x07, 0x79, 0x53, 0x74, 0x72, 0x69, 0x6e, + 0x67, 0x12, 0x46, 0x0a, 0x08, 0x79, 0x5f, 0x62, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, + 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x4c, 0x69, 0x66, + 0x74, 0x53, 0x65, 0x72, 0x69, 0x65, 0x73, 0x2e, 0x42, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x48, 0x00, + 0x52, 0x07, 0x79, 0x42, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x19, 0x0a, 0x07, 0x79, 0x5f, 0x63, + 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x48, 0x01, 0x52, 0x06, 0x79, 0x43, + 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x2a, 0x0a, 0x10, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, + 0x5f, 0x79, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x01, 0x48, 0x01, + 0x52, 0x0e, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x59, 0x43, 0x6f, 0x75, 0x6e, 0x74, + 0x12, 0x4d, 0x0a, 0x0b, 0x6c, 0x69, 0x66, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, + 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2c, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, + 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x4c, + 0x69, 0x66, 0x74, 0x53, 0x65, 0x72, 0x69, 0x65, 0x73, 0x2e, 0x4c, 0x69, 0x66, 0x74, 0x56, 0x61, + 0x6c, 0x75, 0x65, 0x52, 0x0a, 0x6c, 0x69, 0x66, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x1a, + 0x44, 0x0a, 0x06, 0x42, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x6c, 0x6f, 0x77, + 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x01, 0x52, 0x08, 0x6c, 0x6f, + 0x77, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x68, 0x69, 0x67, 0x68, 0x5f, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, 0x52, 0x09, 0x68, 0x69, 0x67, 0x68, + 0x56, 0x61, 0x6c, 0x75, 0x65, 0x1a, 0xa8, 0x02, 0x0a, 0x09, 0x4c, 0x69, 0x66, 0x74, 0x56, 0x61, + 0x6c, 0x75, 0x65, 0x12, 0x15, 0x0a, 0x05, 0x78, 0x5f, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x48, 0x00, 0x52, 0x04, 0x78, 0x49, 0x6e, 0x74, 0x12, 0x1b, 0x0a, 0x08, 0x78, 0x5f, + 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x07, + 0x78, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x12, 0x0a, 0x04, 0x6c, 0x69, 0x66, 0x74, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x01, 0x52, 0x04, 0x6c, 0x69, 0x66, 0x74, 0x12, 0x19, 0x0a, 0x07, 0x78, + 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x48, 0x01, 0x52, 0x06, + 0x78, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x2a, 0x0a, 0x10, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, + 0x65, 0x64, 0x5f, 0x78, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x01, + 0x48, 0x01, 0x52, 0x0e, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x58, 0x43, 0x6f, 0x75, + 0x6e, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x78, 0x5f, 0x61, 0x6e, 0x64, 0x5f, 0x79, 0x5f, 0x63, 0x6f, + 0x75, 0x6e, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x48, 0x02, 0x52, 0x0a, 0x78, 0x41, 0x6e, + 0x64, 0x59, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x34, 0x0a, 0x16, 0x77, 0x65, 0x69, 0x67, 0x68, + 0x74, 0x65, 0x64, 0x5f, 0x78, 0x5f, 0x61, 0x6e, 0x64, 0x5f, 0x79, 0x5f, 0x63, 0x6f, 0x75, 0x6e, + 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x01, 0x48, 0x02, 0x52, 0x12, 0x77, 0x65, 0x69, 0x67, 0x68, + 0x74, 0x65, 0x64, 0x58, 0x41, 0x6e, 0x64, 0x59, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x42, 0x09, 0x0a, + 0x07, 0x78, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x0f, 0x0a, 0x0d, 0x78, 0x5f, 0x63, 0x6f, + 0x75, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x15, 0x0a, 0x13, 0x78, 0x5f, 0x61, + 0x6e, 0x64, 0x5f, 0x79, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x42, 0x09, 0x0a, 0x07, 0x79, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x0f, 0x0a, 0x0d, 0x79, + 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0xfd, 0x04, 0x0a, + 0x15, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x53, 0x74, 0x61, 0x74, + 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x12, 0x14, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x32, 0x0a, 0x04, + 0x70, 0x61, 0x74, 0x68, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x74, 0x65, 0x6e, + 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x76, 0x30, 0x2e, 0x50, 0x61, 0x74, 0x68, 0x48, 0x00, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68, + 0x12, 0x46, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x32, + 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4e, + 0x61, 0x6d, 0x65, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x2e, 0x54, 0x79, + 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x48, 0x0a, 0x09, 0x6e, 0x75, 0x6d, 0x5f, + 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x74, 0x65, + 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x4e, 0x75, 0x6d, 0x65, 0x72, 0x69, 0x63, 0x53, 0x74, 0x61, 0x74, + 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x48, 0x01, 0x52, 0x08, 0x6e, 0x75, 0x6d, 0x53, 0x74, 0x61, + 0x74, 0x73, 0x12, 0x4d, 0x0a, 0x0c, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x73, 0x74, 0x61, + 0x74, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, + 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, + 0x30, 0x2e, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, + 0x63, 0x73, 0x48, 0x01, 0x52, 0x0b, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x53, 0x74, 0x61, 0x74, + 0x73, 0x12, 0x4a, 0x0a, 0x0b, 0x62, 0x79, 0x74, 0x65, 0x73, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, + 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, + 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, + 0x42, 0x79, 0x74, 0x65, 0x73, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x48, + 0x01, 0x52, 0x0a, 0x62, 0x79, 0x74, 0x65, 0x73, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x4d, 0x0a, + 0x0c, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x07, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, + 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x53, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x48, 0x01, 0x52, + 0x0b, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x4a, 0x0a, 0x0c, + 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x43, 0x75, 0x73, 0x74, + 0x6f, 0x6d, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x52, 0x0b, 0x63, 0x75, 0x73, + 0x74, 0x6f, 0x6d, 0x53, 0x74, 0x61, 0x74, 0x73, 0x22, 0x3d, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x07, 0x0a, 0x03, 0x49, 0x4e, 0x54, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x46, 0x4c, 0x4f, + 0x41, 0x54, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x02, + 0x12, 0x09, 0x0a, 0x05, 0x42, 0x59, 0x54, 0x45, 0x53, 0x10, 0x03, 0x12, 0x0a, 0x0a, 0x06, 0x53, + 0x54, 0x52, 0x55, 0x43, 0x54, 0x10, 0x04, 0x42, 0x0a, 0x0a, 0x08, 0x66, 0x69, 0x65, 0x6c, 0x64, + 0x5f, 0x69, 0x64, 0x42, 0x07, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x73, 0x22, 0xaf, 0x01, 0x0a, + 0x18, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x43, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x53, + 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x75, 0x6d, + 0x5f, 0x6e, 0x6f, 0x6e, 0x5f, 0x6d, 0x69, 0x73, 0x73, 0x69, 0x6e, 0x67, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x01, 0x52, 0x0d, 0x6e, 0x75, 0x6d, 0x4e, 0x6f, 0x6e, 0x4d, 0x69, 0x73, 0x73, 0x69, 0x6e, + 0x67, 0x12, 0x1f, 0x0a, 0x0b, 0x6e, 0x75, 0x6d, 0x5f, 0x6d, 0x69, 0x73, 0x73, 0x69, 0x6e, 0x67, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, 0x52, 0x0a, 0x6e, 0x75, 0x6d, 0x4d, 0x69, 0x73, 0x73, 0x69, + 0x6e, 0x67, 0x12, 0x24, 0x0a, 0x0e, 0x61, 0x76, 0x67, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x01, 0x52, 0x0c, 0x61, 0x76, 0x67, 0x4e, + 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x74, 0x6f, 0x74, 0x5f, + 0x6e, 0x75, 0x6d, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x01, + 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x4e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x22, 0xe7, + 0x01, 0x0a, 0x0f, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, + 0x69, 0x63, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x03, 0x6e, 0x75, 0x6d, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x01, 0x48, 0x00, 0x52, 0x03, 0x6e, 0x75, 0x6d, 0x12, 0x12, 0x0a, 0x03, 0x73, 0x74, + 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x03, 0x73, 0x74, 0x72, 0x12, 0x41, + 0x0a, 0x09, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x21, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x48, 0x69, 0x73, 0x74, 0x6f, + 0x67, 0x72, 0x61, 0x6d, 0x48, 0x00, 0x52, 0x09, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, + 0x6d, 0x12, 0x4e, 0x0a, 0x0e, 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x67, + 0x72, 0x61, 0x6d, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x74, 0x65, 0x6e, 0x73, + 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x76, 0x30, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, + 0x48, 0x00, 0x52, 0x0d, 0x72, 0x61, 0x6e, 0x6b, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, + 0x6d, 0x42, 0x05, 0x0a, 0x03, 0x76, 0x61, 0x6c, 0x22, 0x92, 0x03, 0x0a, 0x11, 0x4e, 0x75, 0x6d, + 0x65, 0x72, 0x69, 0x63, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x12, 0x4b, + 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, + 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x43, 0x6f, + 0x6d, 0x6d, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x52, 0x0b, + 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6d, + 0x65, 0x61, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, 0x52, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x12, + 0x17, 0x0a, 0x07, 0x73, 0x74, 0x64, 0x5f, 0x64, 0x65, 0x76, 0x18, 0x03, 0x20, 0x01, 0x28, 0x01, + 0x52, 0x06, 0x73, 0x74, 0x64, 0x44, 0x65, 0x76, 0x12, 0x1b, 0x0a, 0x09, 0x6e, 0x75, 0x6d, 0x5f, + 0x7a, 0x65, 0x72, 0x6f, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x08, 0x6e, 0x75, 0x6d, + 0x5a, 0x65, 0x72, 0x6f, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x69, 0x6e, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x01, 0x52, 0x03, 0x6d, 0x69, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x6d, 0x65, 0x64, 0x69, 0x61, + 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x01, 0x52, 0x06, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x6e, 0x12, + 0x10, 0x0a, 0x03, 0x6d, 0x61, 0x78, 0x18, 0x07, 0x20, 0x01, 0x28, 0x01, 0x52, 0x03, 0x6d, 0x61, + 0x78, 0x12, 0x41, 0x0a, 0x0a, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x73, 0x18, + 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, + 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x48, + 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x52, 0x0a, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x67, + 0x72, 0x61, 0x6d, 0x73, 0x12, 0x67, 0x0a, 0x16, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, + 0x5f, 0x6e, 0x75, 0x6d, 0x65, 0x72, 0x69, 0x63, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x09, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x31, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, + 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x57, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x4e, 0x75, 0x6d, 0x65, 0x72, 0x69, 0x63, 0x53, 0x74, 0x61, + 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x52, 0x14, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, + 0x64, 0x4e, 0x75, 0x6d, 0x65, 0x72, 0x69, 0x63, 0x53, 0x74, 0x61, 0x74, 0x73, 0x22, 0x93, 0x04, + 0x0a, 0x10, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, + 0x63, 0x73, 0x12, 0x4b, 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x5f, 0x73, 0x74, 0x61, + 0x74, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, + 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, + 0x30, 0x2e, 0x43, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, + 0x63, 0x73, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, + 0x16, 0x0a, 0x06, 0x75, 0x6e, 0x69, 0x71, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, + 0x06, 0x75, 0x6e, 0x69, 0x71, 0x75, 0x65, 0x12, 0x54, 0x0a, 0x0a, 0x74, 0x6f, 0x70, 0x5f, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x74, 0x65, + 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x53, 0x74, 0x61, 0x74, 0x69, + 0x73, 0x74, 0x69, 0x63, 0x73, 0x2e, 0x46, 0x72, 0x65, 0x71, 0x41, 0x6e, 0x64, 0x56, 0x61, 0x6c, + 0x75, 0x65, 0x52, 0x09, 0x74, 0x6f, 0x70, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x1d, 0x0a, + 0x0a, 0x61, 0x76, 0x67, 0x5f, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x02, 0x52, 0x09, 0x61, 0x76, 0x67, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x4c, 0x0a, 0x0e, + 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, + 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x52, 0x61, + 0x6e, 0x6b, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x52, 0x0d, 0x72, 0x61, 0x6e, + 0x6b, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x12, 0x64, 0x0a, 0x15, 0x77, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x5f, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x73, 0x74, + 0x61, 0x74, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x30, 0x2e, 0x74, 0x65, 0x6e, 0x73, + 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x76, 0x30, 0x2e, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x53, 0x74, 0x72, 0x69, 0x6e, + 0x67, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x52, 0x13, 0x77, 0x65, 0x69, + 0x67, 0x68, 0x74, 0x65, 0x64, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x53, 0x74, 0x61, 0x74, 0x73, + 0x12, 0x27, 0x0a, 0x0f, 0x76, 0x6f, 0x63, 0x61, 0x62, 0x75, 0x6c, 0x61, 0x72, 0x79, 0x5f, 0x66, + 0x69, 0x6c, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x76, 0x6f, 0x63, 0x61, 0x62, + 0x75, 0x6c, 0x61, 0x72, 0x79, 0x46, 0x69, 0x6c, 0x65, 0x1a, 0x48, 0x0a, 0x0c, 0x46, 0x72, 0x65, + 0x71, 0x41, 0x6e, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x12, + 0x1c, 0x0a, 0x09, 0x66, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x01, 0x52, 0x09, 0x66, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x4a, 0x04, 0x08, + 0x01, 0x10, 0x02, 0x22, 0xa3, 0x01, 0x0a, 0x19, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, + 0x4e, 0x75, 0x6d, 0x65, 0x72, 0x69, 0x63, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, + 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x01, 0x52, + 0x04, 0x6d, 0x65, 0x61, 0x6e, 0x12, 0x17, 0x0a, 0x07, 0x73, 0x74, 0x64, 0x5f, 0x64, 0x65, 0x76, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, 0x52, 0x06, 0x73, 0x74, 0x64, 0x44, 0x65, 0x76, 0x12, 0x16, + 0x0a, 0x06, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x01, 0x52, 0x06, + 0x6d, 0x65, 0x64, 0x69, 0x61, 0x6e, 0x12, 0x41, 0x0a, 0x0a, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x67, + 0x72, 0x61, 0x6d, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x74, 0x65, 0x6e, + 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x76, 0x30, 0x2e, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x52, 0x0a, 0x68, + 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x73, 0x22, 0xbe, 0x01, 0x0a, 0x18, 0x57, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x53, 0x74, 0x61, 0x74, + 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x12, 0x54, 0x0a, 0x0a, 0x74, 0x6f, 0x70, 0x5f, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x74, 0x65, 0x6e, + 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x76, 0x30, 0x2e, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, + 0x74, 0x69, 0x63, 0x73, 0x2e, 0x46, 0x72, 0x65, 0x71, 0x41, 0x6e, 0x64, 0x56, 0x61, 0x6c, 0x75, + 0x65, 0x52, 0x09, 0x74, 0x6f, 0x70, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x4c, 0x0a, 0x0e, + 0x72, 0x61, 0x6e, 0x6b, 0x5f, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, + 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x52, 0x61, + 0x6e, 0x6b, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x52, 0x0d, 0x72, 0x61, 0x6e, + 0x6b, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x22, 0xe2, 0x01, 0x0a, 0x0f, 0x42, + 0x79, 0x74, 0x65, 0x73, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x12, 0x4b, + 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, + 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x43, 0x6f, + 0x6d, 0x6d, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x52, 0x0b, + 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x75, + 0x6e, 0x69, 0x71, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x75, 0x6e, 0x69, + 0x71, 0x75, 0x65, 0x12, 0x22, 0x0a, 0x0d, 0x61, 0x76, 0x67, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x62, + 0x79, 0x74, 0x65, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x61, 0x76, 0x67, 0x4e, + 0x75, 0x6d, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0d, 0x6d, 0x69, 0x6e, 0x5f, 0x6e, + 0x75, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, + 0x6d, 0x69, 0x6e, 0x4e, 0x75, 0x6d, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0d, 0x6d, + 0x61, 0x78, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x02, 0x52, 0x0b, 0x6d, 0x61, 0x78, 0x4e, 0x75, 0x6d, 0x42, 0x79, 0x74, 0x65, 0x73, 0x22, + 0x5f, 0x0a, 0x10, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, + 0x69, 0x63, 0x73, 0x12, 0x4b, 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x5f, 0x73, 0x74, + 0x61, 0x74, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x74, 0x65, 0x6e, 0x73, + 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x76, 0x30, 0x2e, 0x43, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, + 0x69, 0x63, 0x73, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x73, + 0x22, 0x94, 0x04, 0x0a, 0x10, 0x43, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x69, + 0x73, 0x74, 0x69, 0x63, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x75, 0x6d, 0x5f, 0x6e, 0x6f, 0x6e, + 0x5f, 0x6d, 0x69, 0x73, 0x73, 0x69, 0x6e, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0d, + 0x6e, 0x75, 0x6d, 0x4e, 0x6f, 0x6e, 0x4d, 0x69, 0x73, 0x73, 0x69, 0x6e, 0x67, 0x12, 0x1f, 0x0a, + 0x0b, 0x6e, 0x75, 0x6d, 0x5f, 0x6d, 0x69, 0x73, 0x73, 0x69, 0x6e, 0x67, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x0a, 0x6e, 0x75, 0x6d, 0x4d, 0x69, 0x73, 0x73, 0x69, 0x6e, 0x67, 0x12, 0x24, + 0x0a, 0x0e, 0x6d, 0x69, 0x6e, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x6d, 0x69, 0x6e, 0x4e, 0x75, 0x6d, 0x56, 0x61, + 0x6c, 0x75, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x61, 0x78, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x6d, 0x61, + 0x78, 0x4e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x61, 0x76, + 0x67, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x02, 0x52, 0x0c, 0x61, 0x76, 0x67, 0x4e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, + 0x12, 0x24, 0x0a, 0x0e, 0x74, 0x6f, 0x74, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x4e, 0x75, 0x6d, + 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x53, 0x0a, 0x14, 0x6e, 0x75, 0x6d, 0x5f, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x73, 0x5f, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x18, 0x06, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, + 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x48, 0x69, + 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x52, 0x12, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, + 0x65, 0x73, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x12, 0x64, 0x0a, 0x15, 0x77, + 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x5f, 0x73, + 0x74, 0x61, 0x74, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x30, 0x2e, 0x74, 0x65, 0x6e, + 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x76, 0x30, 0x2e, 0x57, 0x65, 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x43, 0x6f, 0x6d, 0x6d, + 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x52, 0x13, 0x77, 0x65, + 0x69, 0x67, 0x68, 0x74, 0x65, 0x64, 0x43, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, + 0x73, 0x12, 0x64, 0x0a, 0x1d, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x6c, 0x69, 0x73, + 0x74, 0x5f, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x5f, 0x68, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, + 0x61, 0x6d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, + 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, + 0x30, 0x2e, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x52, 0x1a, 0x66, 0x65, 0x61, + 0x74, 0x75, 0x72, 0x65, 0x4c, 0x69, 0x73, 0x74, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x48, 0x69, + 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x22, 0x83, 0x03, 0x0a, 0x09, 0x48, 0x69, 0x73, 0x74, + 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x12, 0x17, 0x0a, 0x07, 0x6e, 0x75, 0x6d, 0x5f, 0x6e, 0x61, 0x6e, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x4e, 0x61, 0x6e, 0x12, 0x23, + 0x0a, 0x0d, 0x6e, 0x75, 0x6d, 0x5f, 0x75, 0x6e, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x6e, 0x75, 0x6d, 0x55, 0x6e, 0x64, 0x65, 0x66, 0x69, + 0x6e, 0x65, 0x64, 0x12, 0x42, 0x0a, 0x07, 0x62, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x73, 0x18, 0x03, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, + 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x48, 0x69, + 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x2e, 0x42, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x52, 0x07, + 0x62, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x73, 0x12, 0x43, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2f, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, + 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x48, + 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x2e, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, + 0x61, 0x6d, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, + 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, + 0x1a, 0x6d, 0x0a, 0x06, 0x42, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x6c, 0x6f, + 0x77, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x01, 0x52, 0x08, 0x6c, + 0x6f, 0x77, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x68, 0x69, 0x67, 0x68, 0x5f, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, 0x52, 0x09, 0x68, 0x69, 0x67, + 0x68, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x73, 0x61, 0x6d, 0x70, 0x6c, 0x65, + 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x01, 0x52, 0x0b, 0x73, 0x61, + 0x6d, 0x70, 0x6c, 0x65, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x4a, 0x04, 0x08, 0x03, 0x10, 0x04, 0x22, + 0x2c, 0x0a, 0x0d, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x41, 0x4e, 0x44, 0x41, 0x52, 0x44, 0x10, 0x00, 0x12, 0x0d, + 0x0a, 0x09, 0x51, 0x55, 0x41, 0x4e, 0x54, 0x49, 0x4c, 0x45, 0x53, 0x10, 0x01, 0x22, 0xec, 0x01, + 0x0a, 0x0d, 0x52, 0x61, 0x6e, 0x6b, 0x48, 0x69, 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x12, + 0x46, 0x0a, 0x07, 0x62, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x2c, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x2e, 0x52, 0x61, 0x6e, 0x6b, 0x48, 0x69, + 0x73, 0x74, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x2e, 0x42, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x52, 0x07, + 0x62, 0x75, 0x63, 0x6b, 0x65, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x1a, 0x7f, 0x0a, 0x06, 0x42, + 0x75, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x6c, 0x6f, 0x77, 0x5f, 0x72, 0x61, 0x6e, + 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x6c, 0x6f, 0x77, 0x52, 0x61, 0x6e, 0x6b, + 0x12, 0x1b, 0x0a, 0x09, 0x68, 0x69, 0x67, 0x68, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x08, 0x68, 0x69, 0x67, 0x68, 0x52, 0x61, 0x6e, 0x6b, 0x12, 0x14, 0x0a, + 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, + 0x62, 0x65, 0x6c, 0x12, 0x21, 0x0a, 0x0c, 0x73, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x5f, 0x63, 0x6f, + 0x75, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x01, 0x52, 0x0b, 0x73, 0x61, 0x6d, 0x70, 0x6c, + 0x65, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x4a, 0x04, 0x08, 0x03, 0x10, 0x04, 0x42, 0x68, 0x0a, 0x1a, + 0x6f, 0x72, 0x67, 0x2e, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x2e, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x76, 0x30, 0x50, 0x01, 0x5a, 0x45, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2d, 0x64, + 0x65, 0x76, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x73, 0x64, 0x6b, 0x2f, 0x67, 0x6f, 0x2f, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x74, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, + 0x77, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2f, 0x76, 0x30, 0xf8, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_tensorflow_metadata_proto_v0_statistics_proto_rawDescOnce sync.Once + file_tensorflow_metadata_proto_v0_statistics_proto_rawDescData = file_tensorflow_metadata_proto_v0_statistics_proto_rawDesc +) + +func file_tensorflow_metadata_proto_v0_statistics_proto_rawDescGZIP() []byte { + file_tensorflow_metadata_proto_v0_statistics_proto_rawDescOnce.Do(func() { + file_tensorflow_metadata_proto_v0_statistics_proto_rawDescData = protoimpl.X.CompressGZIP(file_tensorflow_metadata_proto_v0_statistics_proto_rawDescData) + }) + return file_tensorflow_metadata_proto_v0_statistics_proto_rawDescData +} + +var file_tensorflow_metadata_proto_v0_statistics_proto_enumTypes = make([]protoimpl.EnumInfo, 2) +var file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes = make([]protoimpl.MessageInfo, 24) +var file_tensorflow_metadata_proto_v0_statistics_proto_goTypes = []interface{}{ + (FeatureNameStatistics_Type)(0), // 0: tensorflow.metadata.v0.FeatureNameStatistics.Type + (Histogram_HistogramType)(0), // 1: tensorflow.metadata.v0.Histogram.HistogramType + (*DatasetFeatureStatisticsList)(nil), // 2: tensorflow.metadata.v0.DatasetFeatureStatisticsList + (*DatasetFeatureStatistics)(nil), // 3: tensorflow.metadata.v0.DatasetFeatureStatistics + (*CrossFeatureStatistics)(nil), // 4: tensorflow.metadata.v0.CrossFeatureStatistics + (*NumericCrossStatistics)(nil), // 5: tensorflow.metadata.v0.NumericCrossStatistics + (*CategoricalCrossStatistics)(nil), // 6: tensorflow.metadata.v0.CategoricalCrossStatistics + (*LiftStatistics)(nil), // 7: tensorflow.metadata.v0.LiftStatistics + (*LiftSeries)(nil), // 8: tensorflow.metadata.v0.LiftSeries + (*FeatureNameStatistics)(nil), // 9: tensorflow.metadata.v0.FeatureNameStatistics + (*WeightedCommonStatistics)(nil), // 10: tensorflow.metadata.v0.WeightedCommonStatistics + (*CustomStatistic)(nil), // 11: tensorflow.metadata.v0.CustomStatistic + (*NumericStatistics)(nil), // 12: tensorflow.metadata.v0.NumericStatistics + (*StringStatistics)(nil), // 13: tensorflow.metadata.v0.StringStatistics + (*WeightedNumericStatistics)(nil), // 14: tensorflow.metadata.v0.WeightedNumericStatistics + (*WeightedStringStatistics)(nil), // 15: tensorflow.metadata.v0.WeightedStringStatistics + (*BytesStatistics)(nil), // 16: tensorflow.metadata.v0.BytesStatistics + (*StructStatistics)(nil), // 17: tensorflow.metadata.v0.StructStatistics + (*CommonStatistics)(nil), // 18: tensorflow.metadata.v0.CommonStatistics + (*Histogram)(nil), // 19: tensorflow.metadata.v0.Histogram + (*RankHistogram)(nil), // 20: tensorflow.metadata.v0.RankHistogram + (*LiftSeries_Bucket)(nil), // 21: tensorflow.metadata.v0.LiftSeries.Bucket + (*LiftSeries_LiftValue)(nil), // 22: tensorflow.metadata.v0.LiftSeries.LiftValue + (*StringStatistics_FreqAndValue)(nil), // 23: tensorflow.metadata.v0.StringStatistics.FreqAndValue + (*Histogram_Bucket)(nil), // 24: tensorflow.metadata.v0.Histogram.Bucket + (*RankHistogram_Bucket)(nil), // 25: tensorflow.metadata.v0.RankHistogram.Bucket + (*Path)(nil), // 26: tensorflow.metadata.v0.Path +} +var file_tensorflow_metadata_proto_v0_statistics_proto_depIdxs = []int32{ + 3, // 0: tensorflow.metadata.v0.DatasetFeatureStatisticsList.datasets:type_name -> tensorflow.metadata.v0.DatasetFeatureStatistics + 9, // 1: tensorflow.metadata.v0.DatasetFeatureStatistics.features:type_name -> tensorflow.metadata.v0.FeatureNameStatistics + 4, // 2: tensorflow.metadata.v0.DatasetFeatureStatistics.cross_features:type_name -> tensorflow.metadata.v0.CrossFeatureStatistics + 26, // 3: tensorflow.metadata.v0.CrossFeatureStatistics.path_x:type_name -> tensorflow.metadata.v0.Path + 26, // 4: tensorflow.metadata.v0.CrossFeatureStatistics.path_y:type_name -> tensorflow.metadata.v0.Path + 5, // 5: tensorflow.metadata.v0.CrossFeatureStatistics.num_cross_stats:type_name -> tensorflow.metadata.v0.NumericCrossStatistics + 6, // 6: tensorflow.metadata.v0.CrossFeatureStatistics.categorical_cross_stats:type_name -> tensorflow.metadata.v0.CategoricalCrossStatistics + 7, // 7: tensorflow.metadata.v0.CategoricalCrossStatistics.lift:type_name -> tensorflow.metadata.v0.LiftStatistics + 8, // 8: tensorflow.metadata.v0.LiftStatistics.lift_series:type_name -> tensorflow.metadata.v0.LiftSeries + 8, // 9: tensorflow.metadata.v0.LiftStatistics.weighted_lift_series:type_name -> tensorflow.metadata.v0.LiftSeries + 21, // 10: tensorflow.metadata.v0.LiftSeries.y_bucket:type_name -> tensorflow.metadata.v0.LiftSeries.Bucket + 22, // 11: tensorflow.metadata.v0.LiftSeries.lift_values:type_name -> tensorflow.metadata.v0.LiftSeries.LiftValue + 26, // 12: tensorflow.metadata.v0.FeatureNameStatistics.path:type_name -> tensorflow.metadata.v0.Path + 0, // 13: tensorflow.metadata.v0.FeatureNameStatistics.type:type_name -> tensorflow.metadata.v0.FeatureNameStatistics.Type + 12, // 14: tensorflow.metadata.v0.FeatureNameStatistics.num_stats:type_name -> tensorflow.metadata.v0.NumericStatistics + 13, // 15: tensorflow.metadata.v0.FeatureNameStatistics.string_stats:type_name -> tensorflow.metadata.v0.StringStatistics + 16, // 16: tensorflow.metadata.v0.FeatureNameStatistics.bytes_stats:type_name -> tensorflow.metadata.v0.BytesStatistics + 17, // 17: tensorflow.metadata.v0.FeatureNameStatistics.struct_stats:type_name -> tensorflow.metadata.v0.StructStatistics + 11, // 18: tensorflow.metadata.v0.FeatureNameStatistics.custom_stats:type_name -> tensorflow.metadata.v0.CustomStatistic + 19, // 19: tensorflow.metadata.v0.CustomStatistic.histogram:type_name -> tensorflow.metadata.v0.Histogram + 20, // 20: tensorflow.metadata.v0.CustomStatistic.rank_histogram:type_name -> tensorflow.metadata.v0.RankHistogram + 18, // 21: tensorflow.metadata.v0.NumericStatistics.common_stats:type_name -> tensorflow.metadata.v0.CommonStatistics + 19, // 22: tensorflow.metadata.v0.NumericStatistics.histograms:type_name -> tensorflow.metadata.v0.Histogram + 14, // 23: tensorflow.metadata.v0.NumericStatistics.weighted_numeric_stats:type_name -> tensorflow.metadata.v0.WeightedNumericStatistics + 18, // 24: tensorflow.metadata.v0.StringStatistics.common_stats:type_name -> tensorflow.metadata.v0.CommonStatistics + 23, // 25: tensorflow.metadata.v0.StringStatistics.top_values:type_name -> tensorflow.metadata.v0.StringStatistics.FreqAndValue + 20, // 26: tensorflow.metadata.v0.StringStatistics.rank_histogram:type_name -> tensorflow.metadata.v0.RankHistogram + 15, // 27: tensorflow.metadata.v0.StringStatistics.weighted_string_stats:type_name -> tensorflow.metadata.v0.WeightedStringStatistics + 19, // 28: tensorflow.metadata.v0.WeightedNumericStatistics.histograms:type_name -> tensorflow.metadata.v0.Histogram + 23, // 29: tensorflow.metadata.v0.WeightedStringStatistics.top_values:type_name -> tensorflow.metadata.v0.StringStatistics.FreqAndValue + 20, // 30: tensorflow.metadata.v0.WeightedStringStatistics.rank_histogram:type_name -> tensorflow.metadata.v0.RankHistogram + 18, // 31: tensorflow.metadata.v0.BytesStatistics.common_stats:type_name -> tensorflow.metadata.v0.CommonStatistics + 18, // 32: tensorflow.metadata.v0.StructStatistics.common_stats:type_name -> tensorflow.metadata.v0.CommonStatistics + 19, // 33: tensorflow.metadata.v0.CommonStatistics.num_values_histogram:type_name -> tensorflow.metadata.v0.Histogram + 10, // 34: tensorflow.metadata.v0.CommonStatistics.weighted_common_stats:type_name -> tensorflow.metadata.v0.WeightedCommonStatistics + 19, // 35: tensorflow.metadata.v0.CommonStatistics.feature_list_length_histogram:type_name -> tensorflow.metadata.v0.Histogram + 24, // 36: tensorflow.metadata.v0.Histogram.buckets:type_name -> tensorflow.metadata.v0.Histogram.Bucket + 1, // 37: tensorflow.metadata.v0.Histogram.type:type_name -> tensorflow.metadata.v0.Histogram.HistogramType + 25, // 38: tensorflow.metadata.v0.RankHistogram.buckets:type_name -> tensorflow.metadata.v0.RankHistogram.Bucket + 39, // [39:39] is the sub-list for method output_type + 39, // [39:39] is the sub-list for method input_type + 39, // [39:39] is the sub-list for extension type_name + 39, // [39:39] is the sub-list for extension extendee + 0, // [0:39] is the sub-list for field type_name +} + +func init() { file_tensorflow_metadata_proto_v0_statistics_proto_init() } +func file_tensorflow_metadata_proto_v0_statistics_proto_init() { + if File_tensorflow_metadata_proto_v0_statistics_proto != nil { + return + } + file_tensorflow_metadata_proto_v0_path_proto_init() + if !protoimpl.UnsafeEnabled { + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DatasetFeatureStatisticsList); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DatasetFeatureStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CrossFeatureStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*NumericCrossStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CategoricalCrossStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*LiftStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*LiftSeries); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*FeatureNameStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*WeightedCommonStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CustomStatistic); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*NumericStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StringStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*WeightedNumericStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*WeightedStringStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*BytesStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StructStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommonStatistics); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Histogram); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*RankHistogram); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*LiftSeries_Bucket); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*LiftSeries_LiftValue); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StringStatistics_FreqAndValue); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Histogram_Bucket); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[23].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*RankHistogram_Bucket); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[2].OneofWrappers = []interface{}{ + (*CrossFeatureStatistics_NumCrossStats)(nil), + (*CrossFeatureStatistics_CategoricalCrossStats)(nil), + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[6].OneofWrappers = []interface{}{ + (*LiftSeries_YInt)(nil), + (*LiftSeries_YString)(nil), + (*LiftSeries_YBucket)(nil), + (*LiftSeries_YCount)(nil), + (*LiftSeries_WeightedYCount)(nil), + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[7].OneofWrappers = []interface{}{ + (*FeatureNameStatistics_Name)(nil), + (*FeatureNameStatistics_Path)(nil), + (*FeatureNameStatistics_NumStats)(nil), + (*FeatureNameStatistics_StringStats)(nil), + (*FeatureNameStatistics_BytesStats)(nil), + (*FeatureNameStatistics_StructStats)(nil), + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[9].OneofWrappers = []interface{}{ + (*CustomStatistic_Num)(nil), + (*CustomStatistic_Str)(nil), + (*CustomStatistic_Histogram)(nil), + (*CustomStatistic_RankHistogram)(nil), + } + file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes[20].OneofWrappers = []interface{}{ + (*LiftSeries_LiftValue_XInt)(nil), + (*LiftSeries_LiftValue_XString)(nil), + (*LiftSeries_LiftValue_XCount)(nil), + (*LiftSeries_LiftValue_WeightedXCount)(nil), + (*LiftSeries_LiftValue_XAndYCount)(nil), + (*LiftSeries_LiftValue_WeightedXAndYCount)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_tensorflow_metadata_proto_v0_statistics_proto_rawDesc, + NumEnums: 2, + NumMessages: 24, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_tensorflow_metadata_proto_v0_statistics_proto_goTypes, + DependencyIndexes: file_tensorflow_metadata_proto_v0_statistics_proto_depIdxs, + EnumInfos: file_tensorflow_metadata_proto_v0_statistics_proto_enumTypes, + MessageInfos: file_tensorflow_metadata_proto_v0_statistics_proto_msgTypes, + }.Build() + File_tensorflow_metadata_proto_v0_statistics_proto = out.File + file_tensorflow_metadata_proto_v0_statistics_proto_rawDesc = nil + file_tensorflow_metadata_proto_v0_statistics_proto_goTypes = nil + file_tensorflow_metadata_proto_v0_statistics_proto_depIdxs = nil +} diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 6d1df292764..4db41f92f18 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -18,8 +18,8 @@ import click import pkg_resources -import yaml +import yaml from feast.client import Client from feast.config import Config from feast.core.IngestionJob_pb2 import IngestionJobStatus diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py index f10204c59d9..316573ce93d 100644 --- a/sdk/python/feast/client.py +++ b/sdk/python/feast/client.py @@ -13,6 +13,7 @@ # limitations under the License. +import datetime import logging import os import shutil @@ -25,9 +26,10 @@ import grpc import pandas as pd +from google.protobuf.timestamp_pb2 import Timestamp + import pyarrow as pa import pyarrow.parquet as pq - from feast.config import Config from feast.constants import ( CONFIG_CORE_SECURE_KEY, @@ -48,6 +50,7 @@ GetFeastCoreVersionRequest, GetFeatureSetRequest, GetFeatureSetResponse, + GetFeatureStatisticsRequest, ListFeatureSetsRequest, ListFeatureSetsResponse, ListIngestionJobsRequest, @@ -76,6 +79,7 @@ GetOnlineFeaturesResponse, ) from feast.serving.ServingService_pb2_grpc import ServingServiceStub +from tensorflow_metadata.proto.v0 import statistics_pb2 _logger = logging.getLogger(__name__) @@ -859,6 +863,80 @@ def ingest( return ingestion_id + def get_statistics( + self, + feature_set_id: str, + store: str, + features: List[str] = [], + ingestion_ids: Optional[List[str]] = None, + start_date: Optional[datetime.datetime] = None, + end_date: Optional[datetime.datetime] = None, + force_refresh: bool = False, + default_project: Optional[str] = None, + ) -> statistics_pb2.DatasetFeatureStatisticsList: + """ + Retrieves the feature featureStatistics computed over the data in the batch + stores. + + Args: + feature_set_id: Fully qualified feature set id in the format + project/feature_set to retrieve batch featureStatistics for. If project + is not provided, the default ("default") will be used. + store: Name of the store to retrieve feature featureStatistics over. This + store must be a historical store. + features: Optional list of feature names to filter from the results. + ingestion_ids: Optional list of dataset Ids by which to filter data + before retrieving featureStatistics. Cannot be used with start_date + and end_date. + If multiple dataset ids are provided, unaggregatable featureStatistics + will be dropped. + start_date: Optional start date over which to filter statistical data. + Data from this date will be included. + Cannot be used with dataset_ids. If the provided period spans + multiple days, unaggregatable featureStatistics will be dropped. + end_date: Optional end date over which to filter statistical data. + Data from this data will not be included. + Cannot be used with dataset_ids. If the provided period spans + multiple days, unaggregatable featureStatistics will be dropped. + force_refresh: Setting this flag to true will force a recalculation + of featureStatistics and overwrite results currently in the cache, if any. + default_project: Manual override for default project. + + Returns: + Returns a tensorflow DatasetFeatureStatisticsList containing TFDV featureStatistics. + """ + + self._connect_core() + if ingestion_ids is not None and ( + start_date is not None or end_date is not None + ): + raise ValueError( + "Only one of dataset_id or [start_date, end_date] can be provided." + ) + + if default_project != "" and "/" not in feature_set_id: + feature_set_id = f"{default_project}/{feature_set_id}" + + request = GetFeatureStatisticsRequest( + feature_set_id=feature_set_id, + features=features, + store=store, + force_refresh=force_refresh, + ) + if ingestion_ids is not None: + request.ingestion_ids.extend(ingestion_ids) + else: + if start_date is not None: + request.start_date.CopyFrom( + Timestamp(seconds=int(start_date.timestamp())) + ) + if end_date is not None: + request.end_date.CopyFrom(Timestamp(seconds=int(end_date.timestamp()))) + + return self._core_service_stub.GetFeatureStatistics( + request + ).dataset_feature_statistics_list + def _build_feature_references( feature_ref_strs: List[str], project: Optional[str] = None diff --git a/sdk/python/feast/feature_set.py b/sdk/python/feast/feature_set.py index a181c9378bd..1e7ec132763 100644 --- a/sdk/python/feast/feature_set.py +++ b/sdk/python/feast/feature_set.py @@ -16,15 +16,14 @@ from typing import Dict, List, Optional import pandas as pd -import pyarrow as pa -import yaml from google.protobuf import json_format from google.protobuf.duration_pb2 import Duration from google.protobuf.json_format import MessageToDict, MessageToJson from google.protobuf.message import Message from pandas.api.types import is_datetime64_ns_dtype -from pyarrow.lib import TimestampType +import pyarrow as pa +import yaml from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto from feast.core.FeatureSet_pb2 import FeatureSetMeta as FeatureSetMetaProto from feast.core.FeatureSet_pb2 import FeatureSetSpec as FeatureSetSpecProto @@ -40,6 +39,7 @@ pa_to_feast_value_type, python_type_to_feast_value_type, ) +from pyarrow.lib import TimestampType from tensorflow_metadata.proto.v0 import schema_pb2 diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py index b439dbd3027..5e8e5da4d41 100644 --- a/sdk/python/feast/loaders/ingest.py +++ b/sdk/python/feast/loaders/ingest.py @@ -4,8 +4,8 @@ from typing import Iterable, List import pandas as pd -import pyarrow.parquet as pq +import pyarrow.parquet as pq from feast.constants import DATETIME_COLUMN from feast.feature_set import FeatureSet from feast.type_map import ( diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 85def25fcb9..7a483a6c2a0 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -17,10 +17,9 @@ import numpy as np import pandas as pd -import pyarrow as pa from google.protobuf.timestamp_pb2 import Timestamp -from pyarrow.lib import TimestampType +import pyarrow as pa from feast.constants import DATETIME_COLUMN from feast.types import FeatureRow_pb2 as FeatureRowProto from feast.types import Field_pb2 as FieldProto @@ -36,6 +35,7 @@ from feast.types.Value_pb2 import Value as ProtoValue from feast.types.Value_pb2 import ValueType as ProtoValueType from feast.value_type import ValueType +from pyarrow.lib import TimestampType def python_type_to_feast_value_type( diff --git a/sdk/python/setup.py b/sdk/python/setup.py index 69ea44a1871..b0d49ccabcf 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -42,7 +42,7 @@ "tabulate==0.8.*", "toml==0.10.*", "tqdm==4.*", - "pyarrow>=0.15.1", + "pyarrow<0.16.0,>=0.15.1", "numpy", "google", "confluent_kafka", diff --git a/storage/api/src/main/java/feast/storage/api/statistics/FeatureSetStatistics.java b/storage/api/src/main/java/feast/storage/api/statistics/FeatureSetStatistics.java new file mode 100644 index 00000000000..a328df46b97 --- /dev/null +++ b/storage/api/src/main/java/feast/storage/api/statistics/FeatureSetStatistics.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.storage.api.statistics; + +import com.google.auto.value.AutoValue; +import com.google.common.collect.ImmutableList; +import org.tensorflow.metadata.v0.FeatureNameStatistics; + +/** Feature statistics for a feature set over a bounded set of data. */ +@AutoValue +public abstract class FeatureSetStatistics { + + public abstract long getNumExamples(); + + public abstract ImmutableList getFeatureNameStatistics(); + + public static Builder newBuilder() { + return new AutoValue_FeatureSetStatistics.Builder(); + } + + @AutoValue.Builder + public abstract static class Builder { + public abstract Builder setNumExamples(long numExamples); + + protected abstract ImmutableList.Builder featureNameStatisticsBuilder(); + + public Builder addFeatureNameStatistics(FeatureNameStatistics featureNameStatistics) { + featureNameStatisticsBuilder().add(featureNameStatistics); + return this; + } + + public abstract FeatureSetStatistics build(); + } +} diff --git a/storage/api/src/main/java/feast/storage/api/statistics/StatisticsRetriever.java b/storage/api/src/main/java/feast/storage/api/statistics/StatisticsRetriever.java new file mode 100644 index 00000000000..bdd51de2cba --- /dev/null +++ b/storage/api/src/main/java/feast/storage/api/statistics/StatisticsRetriever.java @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.storage.api.statistics; + +import com.google.protobuf.Timestamp; +import feast.proto.core.FeatureSetProto.FeatureSetSpec; +import java.util.List; + +public interface StatisticsRetriever { + + /** + * Get feature set statistics for a single feature set, for a single dataset id. + * + * @param featureSetSpec feature set spec of the requested feature set + * @param features subset of features to retrieve. + * @param dataset dataset id to filter the data by + * @return {@link FeatureSetStatistics} containing statistics for the requested features. + */ + FeatureSetStatistics getFeatureStatistics( + FeatureSetSpec featureSetSpec, List features, String dataset); + + /** + * Get feature set statistics for a single feature set, for a single day. + * + * @param featureSetSpec feature set spec of the requested feature set + * @param features subset of features to retrieve. + * @param date date to filter the data by + * @return {@link FeatureSetStatistics} containing statistics for the requested features. + */ + FeatureSetStatistics getFeatureStatistics( + FeatureSetSpec featureSetSpec, List features, Timestamp date); +} diff --git a/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/BigQueryStatisticsRetriever.java b/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/BigQueryStatisticsRetriever.java new file mode 100644 index 00000000000..3ea4c64c394 --- /dev/null +++ b/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/BigQueryStatisticsRetriever.java @@ -0,0 +1,159 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.storage.connectors.bigquery.statistics; + +import com.google.auto.value.AutoValue; +import com.google.cloud.bigquery.*; +import com.google.common.collect.Streams; +import com.google.protobuf.Timestamp; +import feast.proto.core.FeatureSetProto.FeatureSetSpec; +import feast.proto.core.FeatureSetProto.FeatureSpec; +import feast.proto.core.StoreProto.Store.BigQueryConfig; +import feast.storage.api.statistics.FeatureSetStatistics; +import feast.storage.api.statistics.StatisticsRetriever; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.tensorflow.metadata.v0.FeatureNameStatistics; + +@AutoValue +public abstract class BigQueryStatisticsRetriever implements StatisticsRetriever { + + public abstract String projectId(); + + public abstract String datasetId(); + + public abstract BigQuery bigquery(); + + public static BigQueryStatisticsRetriever create(BigQueryConfig config) { + BigQuery bigquery = + BigQueryOptions.getDefaultInstance() + .toBuilder() + .setProjectId(config.getProjectId()) + .build() + .getService(); + return newBuilder() + .setBigquery(bigquery) + .setDatasetId(config.getDatasetId()) + .setProjectId(config.getProjectId()) + .build(); + } + + static Builder newBuilder() { + return new AutoValue_BigQueryStatisticsRetriever.Builder(); + } + + @AutoValue.Builder + abstract static class Builder { + + public abstract Builder setProjectId(String projectId); + + public abstract Builder setDatasetId(String datasetId); + + public abstract Builder setBigquery(BigQuery bigquery); + + public abstract BigQueryStatisticsRetriever build(); + } + + @Override + public FeatureSetStatistics getFeatureStatistics( + FeatureSetSpec featureSetSpec, List features, String dataset) { + FeatureSetStatisticsQueryInfo featureSetStatisticsQueryInfo = + new FeatureSetStatisticsQueryInfo( + featureSetSpec.getProject(), featureSetSpec.getName(), dataset); + return getFeatureSetStatistics(featureSetSpec, features, featureSetStatisticsQueryInfo); + } + + @Override + public FeatureSetStatistics getFeatureStatistics( + FeatureSetSpec featureSetSpec, List features, Timestamp date) { + FeatureSetStatisticsQueryInfo featureSetStatisticsQueryInfo = + new FeatureSetStatisticsQueryInfo( + featureSetSpec.getProject(), featureSetSpec.getName(), date); + return getFeatureSetStatistics(featureSetSpec, features, featureSetStatisticsQueryInfo); + } + + private FeatureSetStatistics getFeatureSetStatistics( + FeatureSetSpec featureSetSpec, + List features, + FeatureSetStatisticsQueryInfo featureSetStatisticsQueryInfo) { + List featuresList = featureSetSpec.getFeaturesList(); + + FeatureSetSpec.Builder featureSetSpecBuilder = featureSetSpec.toBuilder().clearFeatures(); + for (FeatureSpec featureSpec : featuresList) { + if (features.contains(featureSpec.getName())) { + featureSetStatisticsQueryInfo.addFeature(featureSpec); + featureSetSpecBuilder.addFeatures(featureSpec); + } + } + featureSetSpec = featureSetSpecBuilder.build(); + + try { + // Generate SQL for and retrieve non-histogram statistics + String getFeatureSetStatsQuery = + StatsQueryTemplater.createGetFeatureSetStatsQuery( + featureSetStatisticsQueryInfo, projectId(), datasetId()); + QueryJobConfiguration queryJobConfiguration = + QueryJobConfiguration.newBuilder(getFeatureSetStatsQuery).build(); + TableResult basicStats = bigquery().query(queryJobConfiguration); + + // Generate SQL for and retrieve histogram statistics + String getFeatureSetHistQuery = + StatsQueryTemplater.createGetFeatureSetHistQuery( + featureSetStatisticsQueryInfo, projectId(), datasetId()); + queryJobConfiguration = QueryJobConfiguration.newBuilder(getFeatureSetHistQuery).build(); + TableResult hist = bigquery().query(queryJobConfiguration); + + // Convert to map of feature_name:row containing the statistics + Map basicStatsValues = getTableResultByFeatureName(basicStats); + Map histValues = getTableResultByFeatureName(hist); + + int totalCountIndex = basicStats.getSchema().getFields().getIndex("total_count"); + String ref = features.get(0); + FeatureSetStatistics.Builder featureSetStatisticsBuilder = + FeatureSetStatistics.newBuilder() + .setNumExamples(basicStatsValues.get(ref).get(totalCountIndex).getLongValue()); + + // Convert BQ rows to FeatureNameStatistics + for (FeatureSpec featureSpec : featureSetSpec.getFeaturesList()) { + FeatureNameStatistics featureNameStatistics = + StatsQueryResult.create() + .withBasicStatsResults( + basicStats.getSchema(), basicStatsValues.get(featureSpec.getName())) + .withHistResults(hist.getSchema(), histValues.get(featureSpec.getName())) + .toFeatureNameStatistics(featureSpec.getValueType()); + featureSetStatisticsBuilder.addFeatureNameStatistics(featureNameStatistics); + } + return featureSetStatisticsBuilder.build(); + } catch (IOException | InterruptedException e) { + throw new RuntimeException( + String.format( + "Unable to retrieve statistics from BigQuery for Feature set %s, features %s", + featureSetSpec.getName(), features), + e); + } + } + + private Map getTableResultByFeatureName(TableResult basicStats) { + return Streams.stream(basicStats.getValues()) + .collect( + Collectors.toMap( + fieldValueList -> fieldValueList.get(0).getStringValue(), + fieldValueList -> fieldValueList)); + } +} diff --git a/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/FeatureSetStatisticsQueryInfo.java b/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/FeatureSetStatisticsQueryInfo.java new file mode 100644 index 00000000000..7c9665d92bb --- /dev/null +++ b/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/FeatureSetStatisticsQueryInfo.java @@ -0,0 +1,102 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.storage.connectors.bigquery.statistics; + +import com.google.protobuf.Timestamp; +import feast.proto.core.FeatureSetProto.EntitySpec; +import feast.proto.core.FeatureSetProto.FeatureSpec; +import java.util.ArrayList; +import java.util.List; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; + +/** + * Value class for Feature Sets containing information necessary to template stats-retrieving + * queries. + */ +public class FeatureSetStatisticsQueryInfo { + // Feast project name + private final String project; + + // Feature set name + private final String name; + + // Ingestion ID to retrieve statistics over + private String ingestionId = ""; + + // Date to retrieve statistics over + private String date = ""; + + // List of entity names in this feature set + private final List entityNames; + + // List of fields to get stats for + private final List features; + + public FeatureSetStatisticsQueryInfo(String project, String name, String ingestionId) { + this.project = project; + this.name = name; + this.entityNames = new ArrayList<>(); + this.features = new ArrayList<>(); + this.ingestionId = ingestionId; + } + + public FeatureSetStatisticsQueryInfo(String project, String name, Timestamp date) { + this.project = project; + this.name = name; + this.entityNames = new ArrayList<>(); + this.features = new ArrayList<>(); + DateTime dateTime = new DateTime(date.getSeconds() * 1000, DateTimeZone.UTC); + DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd"); + this.date = fmt.print(dateTime); + } + + public void addFeature(FeatureSpec featureSpec) { + this.features.add(FeatureStatisticsQueryInfo.fromProto(featureSpec)); + } + + public void addEntity(EntitySpec entitySpec) { + this.entityNames.add(entitySpec.getName()); + this.features.add(FeatureStatisticsQueryInfo.fromProto(entitySpec)); + } + + public String getProject() { + return project; + } + + public String getName() { + return name; + } + + public String getIngestionId() { + return ingestionId; + } + + public String getDate() { + return date; + } + + public List getEntityNames() { + return entityNames; + } + + public List getFeatures() { + return features; + } +} diff --git a/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/FeatureStatisticsQueryInfo.java b/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/FeatureStatisticsQueryInfo.java new file mode 100644 index 00000000000..8d73fad5dde --- /dev/null +++ b/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/FeatureStatisticsQueryInfo.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.storage.connectors.bigquery.statistics; + +import feast.proto.core.FeatureSetProto.EntitySpec; +import feast.proto.core.FeatureSetProto.FeatureSpec; +import feast.proto.types.ValueProto.ValueType.Enum; + +/** + * Value class for Features containing information necessary to template stats-retrieving queries. + */ +public class FeatureStatisticsQueryInfo { + // Name of the field + private final String name; + + // Type of the field + private final String type; + + private FeatureStatisticsQueryInfo(String name, String type) { + this.name = name; + this.type = type; + } + + public static FeatureStatisticsQueryInfo fromProto(FeatureSpec featureSpec) { + Enum valueType = featureSpec.getValueType(); + switch (valueType) { + case FLOAT: + case DOUBLE: + case INT32: + case INT64: + case BOOL: + return new FeatureStatisticsQueryInfo(featureSpec.getName(), "NUMERIC"); + case STRING: + return new FeatureStatisticsQueryInfo(featureSpec.getName(), "CATEGORICAL"); + case BYTES: + return new FeatureStatisticsQueryInfo(featureSpec.getName(), "BYTES"); + case BYTES_LIST: + case BOOL_LIST: + case FLOAT_LIST: + case INT32_LIST: + case INT64_LIST: + case DOUBLE_LIST: + case STRING_LIST: + return new FeatureStatisticsQueryInfo(featureSpec.getName(), "LIST"); + default: + throw new IllegalArgumentException( + String.format("Invalid feature type provided: %s", valueType)); + } + } + + public static FeatureStatisticsQueryInfo fromProto(EntitySpec entitySpec) { + Enum valueType = entitySpec.getValueType(); + switch (valueType) { + case FLOAT: + case DOUBLE: + case INT32: + case INT64: + case BOOL: + return new FeatureStatisticsQueryInfo(entitySpec.getName(), "NUMERIC"); + case STRING: + return new FeatureStatisticsQueryInfo(entitySpec.getName(), "CATEGORICAL"); + case BYTES: + return new FeatureStatisticsQueryInfo(entitySpec.getName(), "BYTES"); + case BYTES_LIST: + case BOOL_LIST: + case FLOAT_LIST: + case INT32_LIST: + case INT64_LIST: + case DOUBLE_LIST: + case STRING_LIST: + return new FeatureStatisticsQueryInfo(entitySpec.getName(), "LIST"); + default: + throw new IllegalArgumentException( + String.format("Invalid entity type provided: %s", valueType)); + } + } + + public String getName() { + return name; + } + + public String getType() { + return type; + } +} diff --git a/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/StatsQueryResult.java b/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/StatsQueryResult.java new file mode 100644 index 00000000000..f725f2924f0 --- /dev/null +++ b/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/StatsQueryResult.java @@ -0,0 +1,332 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.storage.connectors.bigquery.statistics; + +import com.google.auto.value.AutoValue; +import com.google.cloud.bigquery.*; +import com.google.cloud.bigquery.Schema; +import com.google.common.collect.ComparisonChain; +import com.google.common.collect.Ordering; +import feast.proto.types.ValueProto.ValueType; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import javax.annotation.Nullable; +import org.tensorflow.metadata.v0.*; +import org.tensorflow.metadata.v0.Histogram.Bucket; +import org.tensorflow.metadata.v0.Histogram.HistogramType; +import org.tensorflow.metadata.v0.StringStatistics.FreqAndValue; + +@AutoValue +public abstract class StatsQueryResult { + // Map converting Feast type to TFDV type + private static final Map TFDV_TYPE_MAP = + new HashMap<>(); + + static { + TFDV_TYPE_MAP.put(ValueType.Enum.INT64, FeatureNameStatistics.Type.INT); + TFDV_TYPE_MAP.put(ValueType.Enum.INT32, FeatureNameStatistics.Type.INT); + TFDV_TYPE_MAP.put(ValueType.Enum.BOOL, FeatureNameStatistics.Type.INT); + TFDV_TYPE_MAP.put(ValueType.Enum.FLOAT, FeatureNameStatistics.Type.FLOAT); + TFDV_TYPE_MAP.put(ValueType.Enum.DOUBLE, FeatureNameStatistics.Type.FLOAT); + TFDV_TYPE_MAP.put(ValueType.Enum.STRING, FeatureNameStatistics.Type.STRING); + TFDV_TYPE_MAP.put(ValueType.Enum.BYTES, FeatureNameStatistics.Type.BYTES); + TFDV_TYPE_MAP.put(ValueType.Enum.BYTES_LIST, FeatureNameStatistics.Type.STRUCT); + TFDV_TYPE_MAP.put(ValueType.Enum.STRING_LIST, FeatureNameStatistics.Type.STRUCT); + TFDV_TYPE_MAP.put(ValueType.Enum.INT32_LIST, FeatureNameStatistics.Type.STRUCT); + TFDV_TYPE_MAP.put(ValueType.Enum.INT64_LIST, FeatureNameStatistics.Type.STRUCT); + TFDV_TYPE_MAP.put(ValueType.Enum.BOOL_LIST, FeatureNameStatistics.Type.STRUCT); + TFDV_TYPE_MAP.put(ValueType.Enum.FLOAT_LIST, FeatureNameStatistics.Type.STRUCT); + TFDV_TYPE_MAP.put(ValueType.Enum.DOUBLE_LIST, FeatureNameStatistics.Type.STRUCT); + } + + // Schema of the table returned by the basic stats retrieval query + @Nullable + abstract Schema basicStatsSchema(); + + // Table values returned by the basic stats retrieval query + @Nullable + abstract FieldValueList basicStatsFieldValues(); + + // Schema of the table returned by the histogram retrieval query + @Nullable + abstract Schema histSchema(); + + // Table values returned by the histogram retrieval query + @Nullable + abstract FieldValueList histFieldValues(); + + public static StatsQueryResult create() { + return StatsQueryResult.newBuilder().build(); + } + + private static StatsQueryResult.Builder newBuilder() { + return new AutoValue_StatsQueryResult.Builder(); + } + + abstract Builder toBuilder(); + + /** + * Add basic stats query results to the StatsQueryResult. + * + * @param basicStatsSchema BigQuery {@link Schema} of the retrieved statistics row for the + * non-histogram statistics. Used to retrieve the column names corresponding to each value in + * the row. + * @param basicStatsFieldValues BigQuery {@link FieldValueList} containing a single row of + * non-histogram statistics retrieved from BigQuery + * @return {@link StatsQueryResult} + */ + public StatsQueryResult withBasicStatsResults( + Schema basicStatsSchema, FieldValueList basicStatsFieldValues) { + return toBuilder() + .setBasicStatsSchema(basicStatsSchema) + .setBasicStatsFieldValues(basicStatsFieldValues) + .build(); + } + + /** + * Add histogram stats query results to the StatsQueryResult. + * + * @param histSchema BigQuery {@link Schema} of the retrieved statistics row for the histogram + * statistics. Used to retrieve the column names corresponding to each value in the row. + * @param histFieldValues BigQuery {@link FieldValueList} containing a single row of histogram + * statistics retrieved from BigQuery + * @return {@link StatsQueryResult} + */ + public StatsQueryResult withHistResults(Schema histSchema, FieldValueList histFieldValues) { + return toBuilder().setHistSchema(histSchema).setHistFieldValues(histFieldValues).build(); + } + + @AutoValue.Builder + abstract static class Builder { + abstract Builder setBasicStatsSchema(Schema basicStatsSchema); + + abstract Builder setBasicStatsFieldValues(FieldValueList basicStatsFieldValues); + + abstract Builder setHistSchema(Schema histSchema); + + abstract Builder setHistFieldValues(FieldValueList histFieldValues); + + public abstract StatsQueryResult build(); + } + + /** + * Convert BQ-retrieved statistics to the corresponding TFDV {@link FeatureNameStatistics} + * specific to the feature type. + * + * @param valueType {@link ValueType.Enum} denoting the value type of the feature + * @return {@link FeatureNameStatistics} + */ + public FeatureNameStatistics toFeatureNameStatistics(ValueType.Enum valueType) { + Map valuesMap = new HashMap<>(); + + // Convert the table values to a map of field name : table value for easy retrieval + FieldList basicStatsfields = basicStatsSchema().getFields(); + for (int i = 0; i < basicStatsSchema().getFields().size(); i++) { + valuesMap.put(basicStatsfields.get(i).getName(), basicStatsFieldValues().get(i)); + } + + FieldList histFields = histSchema().getFields(); + for (int i = 0; i < histSchema().getFields().size(); i++) { + valuesMap.put(histFields.get(i).getName(), histFieldValues().get(i)); + } + + FeatureNameStatistics.Builder featureNameStatisticsBuilder = + FeatureNameStatistics.newBuilder() + .setPath(Path.newBuilder().addStep(valuesMap.get("feature_name").getStringValue())) + .setType(TFDV_TYPE_MAP.get(valueType)); + + switch (valueType) { + case FLOAT: + case BOOL: + case DOUBLE: + case INT32: + case INT64: + NumericStatistics numStats = getNumericStatistics(valuesMap); + featureNameStatisticsBuilder.setNumStats(numStats); + break; + case STRING: + StringStatistics stringStats = getStringStatistics(valuesMap); + featureNameStatisticsBuilder.setStringStats(stringStats); + break; + case BYTES: + BytesStatistics bytesStats = getBytesStatistics(valuesMap); + featureNameStatisticsBuilder.setBytesStats(bytesStats); + break; + case BYTES_LIST: + case BOOL_LIST: + case FLOAT_LIST: + case INT32_LIST: + case INT64_LIST: + case DOUBLE_LIST: + case STRING_LIST: + StructStatistics structStats = getStructStatistics(valuesMap); + featureNameStatisticsBuilder.setStructStats(structStats); + break; + default: + throw new IllegalArgumentException( + "Invalid feature type provided. Only statistics for numeric, bytes, string, boolean and list features are supported."); + } + return featureNameStatisticsBuilder.build(); + } + + private BytesStatistics getBytesStatistics(Map valuesMap) { + if (valuesMap.get("total_count").getLongValue() == 0) { + return BytesStatistics.getDefaultInstance(); + } + + return BytesStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setNumMissing(valuesMap.get("missing_count").getLongValue()) + .setNumNonMissing(valuesMap.get("feature_count").getLongValue()) + .setMinNumValues(1) + .setMaxNumValues(1) + .setAvgNumValues(1) + .setTotNumValues(valuesMap.get("feature_count").getLongValue())) + .setUnique(valuesMap.get("unique").getLongValue()) + .setMaxNumBytes((float) valuesMap.get("max").getDoubleValue()) + .setMinNumBytes((float) valuesMap.get("min").getDoubleValue()) + .setAvgNumBytes((float) valuesMap.get("mean").getDoubleValue()) + .build(); + } + + private StringStatistics getStringStatistics(Map valuesMap) { + if (valuesMap.get("total_count").getLongValue() == 0) { + return StringStatistics.getDefaultInstance(); + } + + RankHistogram.Builder rankHistogram = RankHistogram.newBuilder(); + valuesMap + .get("cat_hist") + .getRepeatedValue() + .forEach( + v -> { + FieldValueList recordValue = v.getRecordValue(); + rankHistogram.addBuckets( + RankHistogram.Bucket.newBuilder() + .setLabel(recordValue.get(0).getStringValue()) + .setSampleCount(recordValue.get(1).getLongValue())); + }); + + List topCount = + rankHistogram.getBucketsList().stream() + .sorted( + (a, b) -> + ComparisonChain.start() + .compare( + a.getSampleCount(), b.getSampleCount(), Ordering.natural().reverse()) + .result()) + .limit(5) + .map( + bucket -> + FreqAndValue.newBuilder() + .setValue(bucket.getLabel()) + .setFrequency(bucket.getSampleCount()) + .build()) + .collect(Collectors.toList()); + + return StringStatistics.newBuilder() + .setUnique(valuesMap.get("unique").getLongValue()) + .setAvgLength((long) valuesMap.get("mean").getDoubleValue()) + .setCommonStats( + CommonStatistics.newBuilder() + .setNumMissing(valuesMap.get("missing_count").getLongValue()) + .setNumNonMissing(valuesMap.get("feature_count").getLongValue()) + .setMinNumValues(1) + .setMaxNumValues(1) + .setAvgNumValues(1) + .setTotNumValues(valuesMap.get("feature_count").getLongValue())) + .setRankHistogram(rankHistogram) + .addAllTopValues(topCount) + .build(); + } + + private NumericStatistics getNumericStatistics(Map valuesMap) { + if (valuesMap.get("total_count").getLongValue() == 0) { + return NumericStatistics.getDefaultInstance(); + } + + // Build quantiles + long quantileCount = valuesMap.get("feature_count").getLongValue() / 10; + Histogram.Builder quantilesBuilder = Histogram.newBuilder().setType(HistogramType.QUANTILES); + + List quantilesRaw = valuesMap.get("quantiles").getRepeatedValue(); + for (int i = 0; i < quantilesRaw.size() - 1; i++) { + quantilesBuilder.addBuckets( + Bucket.newBuilder() + .setLowValue(quantilesRaw.get(i).getDoubleValue()) + .setHighValue(quantilesRaw.get(i + 1).getDoubleValue()) + .setSampleCount(quantileCount)); + } + // Build histogram + Histogram.Builder histBuilder = Histogram.newBuilder().setType(HistogramType.STANDARD); + + // Order of histogram records is defined in the query hist_stats.sql:L35 + valuesMap + .get("num_hist") + .getRepeatedValue() + .forEach( + v -> { + FieldValueList recordValue = v.getRecordValue(); + histBuilder.addBuckets( + Bucket.newBuilder() + .setHighValue(recordValue.get(2).getDoubleValue()) + .setLowValue(recordValue.get(1).getDoubleValue()) + .setSampleCount(recordValue.get(0).getLongValue())); + }); + + return NumericStatistics.newBuilder() + .setMax(valuesMap.get("max").getDoubleValue()) + .setMin(valuesMap.get("min").getDoubleValue()) + .setMedian(quantilesRaw.get(5).getDoubleValue()) + .setNumZeros(valuesMap.get("zeroes").getLongValue()) + .setStdDev(valuesMap.get("stdev").getDoubleValue()) + .setMean(valuesMap.get("mean").getDoubleValue()) + .setCommonStats( + CommonStatistics.newBuilder() + .setNumMissing(valuesMap.get("missing_count").getLongValue()) + .setNumNonMissing(valuesMap.get("feature_count").getLongValue()) + .setMinNumValues(1) + .setMaxNumValues(1) + .setAvgNumValues(1) + .setTotNumValues(valuesMap.get("feature_count").getLongValue())) + .addHistograms(histBuilder) + .addHistograms(quantilesBuilder) + .build(); + } + + private StructStatistics getStructStatistics(Map valuesMap) { + if (valuesMap.get("total_count").getLongValue() == 0) { + return StructStatistics.getDefaultInstance(); + } + + return StructStatistics.newBuilder() + .setCommonStats( + CommonStatistics.newBuilder() + .setNumMissing(valuesMap.get("missing_count").getLongValue()) + .setNumNonMissing(valuesMap.get("feature_count").getLongValue()) + .setMinNumValues(valuesMap.get("min").getLongValue()) + .setMaxNumValues(valuesMap.get("max").getLongValue()) + .setAvgNumValues(valuesMap.get("mean").getLongValue()) + .setTotNumValues( + valuesMap.get("feature_count").getLongValue() + * valuesMap.get("mean").getLongValue())) + .build(); + } +} diff --git a/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/StatsQueryTemplater.java b/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/StatsQueryTemplater.java new file mode 100644 index 00000000000..8103b2cb48a --- /dev/null +++ b/storage/connectors/bigquery/src/main/java/feast/storage/connectors/bigquery/statistics/StatsQueryTemplater.java @@ -0,0 +1,78 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.storage.connectors.bigquery.statistics; + +import com.mitchellbosecke.pebble.PebbleEngine; +import com.mitchellbosecke.pebble.template.PebbleTemplate; +import java.io.IOException; +import java.io.StringWriter; +import java.io.Writer; +import java.util.HashMap; +import java.util.Map; + +public class StatsQueryTemplater { + + private static final PebbleEngine engine = new PebbleEngine.Builder().build(); + private static final String BASIC_STATS_TEMPLATE_NAME = "templates/basic_stats.sql"; + private static final String HIST_STATS_TEMPLATE_NAME = "templates/hist_stats.sql"; + + /** + * Generate the query for getting basic statistics about a given feature set + * + * @param featureSetInfo Information about the feature set necessary for the query templating + * @param projectId google project ID + * @param datasetId feast bigquery dataset ID + * @return point in time correctness join BQ SQL query + */ + public static String createGetFeatureSetStatsQuery( + FeatureSetStatisticsQueryInfo featureSetInfo, String projectId, String datasetId) + throws IOException { + + PebbleTemplate template = engine.getTemplate(BASIC_STATS_TEMPLATE_NAME); + Map context = new HashMap<>(); + context.put("featureSet", featureSetInfo); + context.put("projectId", projectId); + context.put("datasetId", datasetId); + + Writer writer = new StringWriter(); + template.evaluate(writer, context); + return writer.toString(); + } + + /** + * Generate the query for getting histograms for features in a given feature set + * + * @param featureSetInfo Information about the feature set necessary for the query templating + * @param projectId google project ID + * @param datasetId feast bigquery dataset ID + * @return point in time correctness join BQ SQL query + */ + public static String createGetFeatureSetHistQuery( + FeatureSetStatisticsQueryInfo featureSetInfo, String projectId, String datasetId) + throws IOException { + + PebbleTemplate template = engine.getTemplate(HIST_STATS_TEMPLATE_NAME); + Map context = new HashMap<>(); + context.put("featureSet", featureSetInfo); + context.put("projectId", projectId); + context.put("datasetId", datasetId); + + Writer writer = new StringWriter(); + template.evaluate(writer, context); + return writer.toString(); + } +} diff --git a/storage/connectors/bigquery/src/main/resources/templates/basic_stats.sql b/storage/connectors/bigquery/src/main/resources/templates/basic_stats.sql new file mode 100644 index 00000000000..dfc931720bb --- /dev/null +++ b/storage/connectors/bigquery/src/main/resources/templates/basic_stats.sql @@ -0,0 +1,85 @@ +WITH subset AS ( +SELECT * FROM `{{ projectId }}.{{ datasetId }}.{{ featureSet.project }}_{{ featureSet.name }}` +{% if featureSet.ingestionId == "" %} +WHERE DATE(event_timestamp) = '{{ featureSet.date }}' +{% else %} +WHERE ingestion_id='{{ featureSet.ingestionId }}' +{% endif %} +) +{% for feature in featureSet.features %} +SELECT + "{{ feature.name }}" as feature_name, + -- total count + COUNT(*) AS total_count, + -- count + COUNT({{ feature.name }}) as feature_count, + -- missing + COUNT(*) - COUNT({{ feature.name }}) as missing_count, + {% if feature.type equals "NUMERIC" %} + -- mean + AVG({{ feature.name }}) as mean, + -- stdev + STDDEV({{ feature.name }}) as stdev, + -- zeroes + COUNTIF({{ feature.name }} = 0) as zeroes, + -- min + MIN({{ feature.name }}) as min, + -- max + MAX({{ feature.name }}) as max, + -- hist will have to be called separately + -- quantiles + APPROX_QUANTILES(CAST({{ feature.name }} AS FLOAT64), 10) AS quantiles, + -- unique + null as unique + {% elseif feature.type equals "CATEGORICAL" %} + -- mean + AVG(LENGTH({{ feature.name }})) as mean, + -- stdev + null as stdev, + -- zeroes + null as zeroes, + -- min + null as min, + -- max + null as max, + -- quantiles + ARRAY[] AS quantiles, + -- unique + COUNT(DISTINCT({{ feature.name }})) as unique + {% elseif feature.type equals "BYTES" %} + -- mean + AVG(BIT_COUNT({{ feature.name }})) as mean, + -- stdev + null as stdev, + -- zeroes + null as zeroes, + -- min + MIN(BIT_COUNT({{ feature.name }})) as min, + -- max + MAX(BIT_COUNT({{ feature.name }})) as max, + -- hist will have to be called separately + -- quantiles + ARRAY[] AS quantiles, + -- unique + COUNT(DISTINCT({{ feature.name }})) as unique + {% elseif feature.type equals "LIST" %} + -- mean + AVG(ARRAY_LENGTH({{ feature.name }})) as mean, + -- stdev + null as stdev, + -- zeroes + null as zeroes, + -- min + MIN(ARRAY_LENGTH({{ feature.name }})) as min, + -- max + MAX(ARRAY_LENGTH({{ feature.name }})) as max, + -- hist will have to be called separately + -- quantiles + ARRAY[] AS quantiles, + -- unique + null as unique + {% endif %} +FROM subset +{% if loop.last %}{% else %}UNION ALL {% endif %} +{% endfor %} + diff --git a/storage/connectors/bigquery/src/main/resources/templates/hist_stats.sql b/storage/connectors/bigquery/src/main/resources/templates/hist_stats.sql new file mode 100644 index 00000000000..1f0a7605faa --- /dev/null +++ b/storage/connectors/bigquery/src/main/resources/templates/hist_stats.sql @@ -0,0 +1,41 @@ +WITH subset AS ( +SELECT * FROM `{{ projectId }}.{{ datasetId }}.{{ featureSet.project }}_{{ featureSet.name }}` +{% if featureSet.ingestionId == "" %} +WHERE DATE(event_timestamp) = '{{ featureSet.date }}' +{% else %} +WHERE ingestion_id='{{ featureSet.ingestionId }}' +{% endif %} +) +{% for feature in featureSet.features %} +, {{ feature.name }}_stats AS ( +{% if feature.type == 'NUMERIC' %} + WITH stats AS ( + SELECT min+step*i as min, min+step*(i+1) as max + FROM ( + SELECT MIN({{ feature.name }}) as min, MAX({{ feature.name }}) as max, (MAX({{ feature.name }})-MIN({{ feature.name }}))/10 step, GENERATE_ARRAY(0, 10, 1) i + FROM subset + ), UNNEST(i) i + ), counts as ( + SELECT COUNT(*) as count, min, max, + FROM subset + JOIN stats + ON subset.{{ feature.name }} >= stats.min AND subset.{{ feature.name }}>[] as cat_hist FROM counts +{% elseif feature.type == 'CATEGORICAL' %} + WITH counts AS ( + SELECT {{ feature.name }}, COUNT({{ feature.name }}) AS count FROM subset GROUP BY {{ feature.name }} + ) + SELECT '{{ feature.name }}' as feature, ARRAY>[] as num_hist, ARRAY_AGG(STRUCT({{ feature.name }} as value, count as count)) as cat_hist FROM counts +{% elseif feature.type == 'BYTES' %} + SELECT '{{ feature.name }}' as feature, ARRAY>[] as num_hist, ARRAY>[] as cat_hist +{% elseif feature.type == 'LIST' %} + SELECT '{{ feature.name }}' as feature, ARRAY>[] as num_hist, ARRAY>[] as cat_hist +{% endif %} +) +{% endfor %} +{% for feature in featureSet.features %} +SELECT * FROM {{ feature.name }}_stats +{% if loop.last %}{% else %}UNION ALL {% endif %} +{% endfor %} \ No newline at end of file diff --git a/tests/e2e/bq-batch-retrieval.py b/tests/e2e/bq/bq-batch-retrieval.py similarity index 100% rename from tests/e2e/bq-batch-retrieval.py rename to tests/e2e/bq/bq-batch-retrieval.py diff --git a/tests/e2e/bq/feature-stats.py b/tests/e2e/bq/feature-stats.py new file mode 100644 index 00000000000..89f871e63b5 --- /dev/null +++ b/tests/e2e/bq/feature-stats.py @@ -0,0 +1,304 @@ +import pandas as pd +import pytest +import pytz +import uuid +import time +import os +from datetime import datetime, timedelta + +from feast.client import Client +from feast.entity import Entity +from feast.feature import Feature +from feast.feature_set import FeatureSet +from feast.type_map import ValueType +from google.protobuf.duration_pb2 import Duration +import tensorflow_data_validation as tfdv +from deepdiff import DeepDiff +from google.protobuf.json_format import MessageToDict + + +pd.set_option("display.max_columns", None) + +PROJECT_NAME = "batch_" + uuid.uuid4().hex.upper()[0:6] +STORE_NAME = "historical" +os.environ['CUDA_VISIBLE_DEVICES'] = "0" + + +@pytest.fixture(scope="module") +def core_url(pytestconfig): + return pytestconfig.getoption("core_url") + + +@pytest.fixture(scope="module") +def serving_url(pytestconfig): + return pytestconfig.getoption("serving_url") + + +@pytest.fixture(scope="module") +def allow_dirty(pytestconfig): + return True if pytestconfig.getoption("allow_dirty").lower() == "true" else False + + +@pytest.fixture(scope="module") +def gcs_path(pytestconfig): + return pytestconfig.getoption("gcs_path") + + +@pytest.fixture(scope="module") +def client(core_url, allow_dirty): + # Get client for core and serving + client = Client(core_url=core_url) + client.create_project(PROJECT_NAME) + + # Ensure Feast core is active, but empty + if not allow_dirty: + feature_sets = client.list_feature_sets() + if len(feature_sets) > 0: + raise Exception( + "Feast cannot have existing feature sets registered. Exiting tests." + ) + + return client + + +@pytest.fixture(scope="module") +def feature_stats_feature_set(client): + fv_fs = FeatureSet( + "feature_stats", + features=[ + Feature("strings", ValueType.STRING), + Feature("ints", ValueType.INT64), + Feature("floats", ValueType.FLOAT), + ], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=100), + ) + client.apply(fv_fs) + return fv_fs + + +@pytest.fixture(scope="module") +def feature_stats_dataset_basic(client, feature_stats_feature_set): + + N_ROWS = 20 + + time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) + df = pd.DataFrame( + { + "datetime": [time_offset] * N_ROWS, + "entity_id": [i for i in range(N_ROWS)], + "strings": ["a", "b"] * int(N_ROWS / 2), + "ints": [int(i) for i in range(N_ROWS)], + "floats": [10.5 - i for i in range(N_ROWS)], + } + ) + + expected_stats = tfdv.generate_statistics_from_dataframe( + df[["strings", "ints", "floats"]] + ) + clear_unsupported_fields(expected_stats) + + # Since TFDV computes population std dev + for feature in expected_stats.datasets[0].features: + if feature.HasField("num_stats"): + name = feature.path.step[0] + std = combined_df[name].std() + feature.num_stats.std_dev = std + + ingestion_id = client.ingest(feature_stats_feature_set, df) + time.sleep(10) + return { + "df": df, + "id": ingestion_id, + "date": datetime(time_offset.year, time_offset.month, time_offset.day).replace( + tzinfo=pytz.utc + ), + "stats": expected_stats, + } + + +@pytest.fixture(scope="module") +def feature_stats_dataset_agg(client, feature_stats_feature_set): + time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) + start_date = time_offset - timedelta(days=10) + end_date = time_offset - timedelta(days=7) + df1 = pd.DataFrame( + { + "datetime": [start_date] * 5, + "entity_id": [i for i in range(5)], + "strings": ["a", "b", "b", "b", "a"], + "ints": [4, 3, 2, 6, 3], + "floats": [2.1, 5.2, 4.3, 0.6, 0.1], + } + ) + ingestion_id_1 = client.ingest(feature_stats_feature_set, df1) + df2 = pd.DataFrame( + { + "datetime": [start_date + timedelta(days=1)] * 3, + "entity_id": [i for i in range(3)], + "strings": ["a", "b", "c"], + "ints": [2, 6, 7], + "floats": [1.6, 2.4, 2], + } + ) + ingestion_id_2 = client.ingest(feature_stats_feature_set, df2) + + combined_df = pd.concat([df1, df2])[["strings", "ints", "floats"]] + expected_stats = tfdv.generate_statistics_from_dataframe(combined_df) + clear_unsupported_agg_fields(expected_stats) + + # Since TFDV computes population std dev + for feature in expected_stats.datasets[0].features: + if feature.HasField("num_stats"): + name = feature.path.step[0] + std = combined_df[name].std() + feature.num_stats.std_dev = std + + time.sleep(10) + + return { + "ids": [ingestion_id_1, ingestion_id_2], + "start_date": datetime( + start_date.year, start_date.month, start_date.day + ).replace(tzinfo=pytz.utc), + "end_date": datetime(end_date.year, end_date.month, end_date.day).replace( + tzinfo=pytz.utc + ), + "stats": expected_stats, + } + + +def test_feature_stats_retrieval_by_single_dataset(client, feature_stats_dataset_basic): + stats = client.get_statistics( + f"feature_stats", + features=["strings", "ints", "floats"], + store=STORE_NAME, + ingestion_ids=[feature_stats_dataset_basic["id"]], + ) + + assert_stats_equal(feature_stats_dataset_basic["stats"], stats) + + +def test_feature_stats_by_date(client, feature_stats_dataset_basic): + stats = client.get_statistics( + "feature_stats", + features=["strings", "ints", "floats"], + store=STORE_NAME, + start_date=feature_stats_dataset_basic["date"], + end_date=feature_stats_dataset_basic["date"] + timedelta(days=1), + ) + assert_stats_equal(feature_stats_dataset_basic["stats"], stats) + + +def test_feature_stats_agg_over_datasets(client, feature_stats_dataset_agg): + stats = client.get_statistics( + "feature_stats", + features=["strings", "ints", "floats"], + store=STORE_NAME, + ingestion_ids=feature_stats_dataset_agg["ids"], + ) + assert_stats_equal(feature_stats_dataset_agg["stats"], stats) + + +def test_feature_stats_agg_over_dates(client, feature_stats_dataset_agg): + stats = client.get_statistics( + "feature_stats", + features=["strings", "ints", "floats"], + store=STORE_NAME, + start_date=feature_stats_dataset_agg["start_date"], + end_date=feature_stats_dataset_agg["end_date"], + ) + assert_stats_equal(feature_stats_dataset_agg["stats"], stats) + + +def test_feature_stats_force_refresh( + client, feature_stats_dataset_basic, feature_stats_feature_set +): + df = feature_stats_dataset_basic["df"] + + df2 = pd.DataFrame( + { + "datetime": [df.iloc[0].datetime], + "entity_id": [10], + "strings": ["c"], + "ints": [2], + "floats": [1.3], + } + ) + client.ingest(feature_stats_feature_set, df2) + time.sleep(10) + + actual_stats = client.get_statistics( + "feature_stats", + features=["strings", "ints", "floats"], + store="historical", + start_date=feature_stats_dataset_basic["date"], + end_date=feature_stats_dataset_basic["date"] + timedelta(days=1), + force_refresh=True, + ) + + combined_df = pd.concat([df, df2]) + expected_stats = tfdv.generate_statistics_from_dataframe(combined_df) + + clear_unsupported_fields(expected_stats) + + # Since TFDV computes population std dev + for feature in expected_stats.datasets[0].features: + if feature.HasField("num_stats"): + name = feature.path.step[0] + std = combined_df[name].std() + feature.num_stats.std_dev = std + + assert_stats_equal(expected_stats, actual_stats) + + +def clear_unsupported_fields(datasets): + dataset = datasets.datasets[0] + for feature in dataset.features: + if feature.HasField("num_stats"): + feature.num_stats.common_stats.ClearField("num_values_histogram") + for hist in feature.num_stats.histograms: + sorted_buckets = sorted(hist.buckets, key=lambda k: k["highValue"]) + del hist.buckets[:] + hist.buckets.extend(sorted_buckets) + elif feature.HasField("string_stats"): + feature.string_stats.common_stats.ClearField("num_values_histogram") + for bucket in feature.string_stats.rank_histogram.buckets: + bucket.ClearField("low_rank") + bucket.ClearField("high_rank") + elif feature.HasField("struct_stats"): + feature.string_stats.struct_stats.ClearField("num_values_histogram") + elif feature.HasField("bytes_stats"): + feature.string_stats.bytes_stats.ClearField("num_values_histogram") + + +def clear_unsupported_agg_fields(datasets): + dataset = datasets.datasets[0] + for feature in dataset.features: + if feature.HasField("num_stats"): + feature.num_stats.common_stats.ClearField("num_values_histogram") + feature.num_stats.ClearField("histograms") + feature.num_stats.ClearField("median") + elif feature.HasField("string_stats"): + feature.string_stats.common_stats.ClearField("num_values_histogram") + feature.string_stats.ClearField("rank_histogram") + feature.string_stats.ClearField("top_values") + feature.string_stats.ClearField("unique") + elif feature.HasField("struct_stats"): + feature.struct_stats.ClearField("num_values_histogram") + elif feature.HasField("bytes_stats"): + feature.bytes_stats.ClearField("num_values_histogram") + feature.bytes_stats.ClearField("unique") + + +def assert_stats_equal(left, right): + left_stats = MessageToDict(left)["datasets"][0] + right_stats = MessageToDict(right)["datasets"][0] + assert ( + left_stats["numExamples"] == right_stats["numExamples"] + ), f"Number of examples do not match. Expected {left_stats['numExamples']}, got {right_stats['numExamples']}" + + left_features = sorted(left_stats["features"], key=lambda k: k["path"]["step"][0]) + right_features = sorted(right_stats["features"], key=lambda k: k["path"]["step"][0]) + diff = DeepDiff(left_features, right_features, significant_digits=4) + assert len(diff) == 0, f"Feature statistics do not match: \nwanted: {left_features}\n got: {right_features}" diff --git a/tests/e2e/all_types_parquet/all_types_parquet.yaml b/tests/e2e/redis/all_types_parquet/all_types_parquet.yaml similarity index 100% rename from tests/e2e/all_types_parquet/all_types_parquet.yaml rename to tests/e2e/redis/all_types_parquet/all_types_parquet.yaml diff --git a/tests/e2e/basic-ingest-redis-serving.py b/tests/e2e/redis/basic-ingest-redis-serving.py similarity index 98% rename from tests/e2e/basic-ingest-redis-serving.py rename to tests/e2e/redis/basic-ingest-redis-serving.py index da0967fd764..31a7c8d73d7 100644 --- a/tests/e2e/basic-ingest-redis-serving.py +++ b/tests/e2e/redis/basic-ingest-redis-serving.py @@ -29,6 +29,7 @@ FLOAT_TOLERANCE = 0.00001 PROJECT_NAME = 'basic_' + uuid.uuid4().hex.upper()[0:6] +DIR_PATH = os.path.dirname(os.path.realpath(__file__)) @pytest.fixture(scope='module') @@ -108,8 +109,8 @@ def test_version_returns_results(client): @pytest.mark.run(order=10) def test_basic_register_feature_set_success(client): # Register feature set without project - cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml") - driver_fs_expected = FeatureSet.from_yaml("basic/driver_fs.yaml") + cust_trans_fs_expected = FeatureSet.from_yaml(f"{DIR_PATH}/basic/cust_trans_fs.yaml") + driver_fs_expected = FeatureSet.from_yaml(f"{DIR_PATH}/basic/driver_fs.yaml") client.apply(cust_trans_fs_expected) client.apply(driver_fs_expected) cust_trans_fs_actual = client.get_feature_set("customer_transactions") @@ -118,7 +119,7 @@ def test_basic_register_feature_set_success(client): assert driver_fs_actual == driver_fs_expected # Register feature set with project - cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml") + cust_trans_fs_expected = FeatureSet.from_yaml(f"{DIR_PATH}/basic/cust_trans_fs.yaml") client.set_project(PROJECT_NAME) client.apply(cust_trans_fs_expected) cust_trans_fs_actual = client.get_feature_set("customer_transactions", @@ -466,7 +467,7 @@ def large_volume_dataframe(): @pytest.mark.run(order=30) def test_large_volume_register_feature_set_success(client): cust_trans_fs_expected = FeatureSet.from_yaml( - "large_volume/cust_trans_large_fs.yaml") + f"{DIR_PATH}/large_volume/cust_trans_large_fs.yaml") # Register feature set client.apply(cust_trans_fs_expected) @@ -604,7 +605,7 @@ def all_types_parquet_file(): def test_all_types_parquet_register_feature_set_success(client): # Load feature set from file all_types_parquet_expected = FeatureSet.from_yaml( - "all_types_parquet/all_types_parquet.yaml") + f"{DIR_PATH}/all_types_parquet/all_types_parquet.yaml") # Register feature set client.apply(all_types_parquet_expected) diff --git a/tests/e2e/basic/cust_trans_fs.yaml b/tests/e2e/redis/basic/cust_trans_fs.yaml similarity index 100% rename from tests/e2e/basic/cust_trans_fs.yaml rename to tests/e2e/redis/basic/cust_trans_fs.yaml diff --git a/tests/e2e/basic/data.csv b/tests/e2e/redis/basic/data.csv similarity index 100% rename from tests/e2e/basic/data.csv rename to tests/e2e/redis/basic/data.csv diff --git a/tests/e2e/basic/driver_fs.yaml b/tests/e2e/redis/basic/driver_fs.yaml similarity index 100% rename from tests/e2e/basic/driver_fs.yaml rename to tests/e2e/redis/basic/driver_fs.yaml diff --git a/tests/e2e/large_volume/cust_trans_large_fs.yaml b/tests/e2e/redis/large_volume/cust_trans_large_fs.yaml similarity index 100% rename from tests/e2e/large_volume/cust_trans_large_fs.yaml rename to tests/e2e/redis/large_volume/cust_trans_large_fs.yaml diff --git a/tests/e2e/requirements.txt b/tests/e2e/requirements.txt index 0ba345a000f..040febc4983 100644 --- a/tests/e2e/requirements.txt +++ b/tests/e2e/requirements.txt @@ -7,3 +7,7 @@ pytest-benchmark==3.2.2 pytest-mock==1.10.4 pytest-timeout==1.3.3 pytest-ordering==0.6.* +tensorflow-data-validation==0.21.2 +deepdiff==4.3.2 +tensorflow==2.1.0 +tfx-bsl==0.21.* # lock to 0.21 \ No newline at end of file