Skip to content

Commit

Permalink
fix(ava/insight): modify type definition & test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
LAI-X authored and pddpd committed Jun 20, 2023
1 parent 52bf01c commit 3a7c6f5
Show file tree
Hide file tree
Showing 10 changed files with 48 additions and 24 deletions.
28 changes: 28 additions & 0 deletions packages/ava/__tests__/integration/insight/insight.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -460,11 +460,39 @@ describe('test for distribution insight', () => {
});
// 异常值
test('category outlier', async () => {
// 使用默认的IQR方法识别异常值
const result = getInsights(dataWithMajorityAndOutlier, {
insightTypes: ['category_outlier'],
dimensions: [{ fieldName: 'product' }],
});
expect(result.insights).toBeIncludeInsights([
{
measures: [{ fieldName: 'yield', method: 'SUM' }],
dimensions: [{ fieldName: 'product' }],
subspace: [],
patterns: [
{
type: 'category_outlier',
significance: 0.9,
index: 0,
x: 'apple',
y: 160,
},
],
},
]);

// 指定用p-value来检测异常值
const pValueResult = getInsights(dataWithMajorityAndOutlier, {
insightTypes: ['category_outlier'],
dimensions: [{ fieldName: 'product' }],
algorithmParameter: {
outlier: {
method: 'p-value',
},
},
});
expect(pValueResult.insights).toBeIncludeInsights([
{
measures: [{ fieldName: 'yield', method: 'SUM' }],
dimensions: [{ fieldName: 'product' }],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const data = [

describe('extract category-outlier insight', () => {
test('check outliers result', () => {
const result = extractor(data, ['type'], [{ fieldName: 'sales', method: 'SUM' }]);
const result = extractor({ data, dimensions: ['type'], measures: [{ fieldName: 'sales', method: 'SUM' }] });
const outlierIndexes = result?.map((item) => item.index);
expect(outlierIndexes).toStrictEqual([3]);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const data = [

describe('extract change-point insight', () => {
test('check change-point result', () => {
const result = extractor(data, ['year'], [{ fieldName: 'value', method: 'SUM' }]);
const result = extractor({ data, dimensions: ['year'], measures: [{ fieldName: 'value', method: 'SUM' }] });
expect(result[0]?.index).toEqual(5);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ const data = [

describe('extract correlation insight', () => {
test('check correlation result', () => {
const result = extractor(
const result = extractor({
data,
[],
[
dimensions: [],
measures: [
{ fieldName: 'x', method: 'SUM' },
{ fieldName: 'y', method: 'SUM' },
]
);
],
});
expect(result[0]?.pcorr).toBeGreaterThan(0.8);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const data = [

describe('extract low-variance insight', () => {
test('check low-variance result', () => {
const result = extractor(data, ['type'], [{ fieldName: 'sales', method: 'SUM' }]);
const result = extractor({ data, dimensions: ['type'], measures: [{ fieldName: 'sales', method: 'SUM' }] });
expect(result[0]?.significance).toBeGreaterThan(0.85);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const data = [

describe('extract majority insight', () => {
test('check majority result', () => {
const result = extractor(data, ['type'], [{ fieldName: 'sales', method: 'SUM' }]);
const result = extractor({ data, dimensions: ['type'], measures: [{ fieldName: 'sales', method: 'SUM' }] });
expect(result[0]?.index).toEqual(5);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const data = [

describe('extract time-series-outlier insight', () => {
test('check outliers result', () => {
const result = extractor(data, ['year'], [{ fieldName: 'value', method: 'SUM' }]);
const result = extractor({ data, dimensions: ['year'], measures: [{ fieldName: 'value', method: 'SUM' }] });
const outliers = result?.map((item) => item.index);
expect(outliers).toStrictEqual([7]);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const data = [

describe('extract trend insight', () => {
test('check trend result', () => {
const result = extractor(data, ['year'], [{ fieldName: 'value', method: 'SUM' }]);
const result = extractor({ data, dimensions: ['year'], measures: [{ fieldName: 'value', method: 'SUM' }] });
expect(result[0]?.trend).toEqual('increasing');
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,13 @@ export const findOutliers = (
values: number[],
options?: InsightOptions
): { outliers: OutlierItem[]; thresholds: [number, number] } => {
const {
method,
iqrK,
confidenceInterval = SIGNIFICANCE_BENCHMARK,
} = options?.adjustableAlgorithmParameter?.outlier || {};
const { method, iqrK, confidenceInterval = SIGNIFICANCE_BENCHMARK } = options?.algorithmParameter?.outlier || {};
const outliers: OutlierItem[] = [];
const thresholds = [];
const candidates = values.map((item, index) => {
return { index, value: item };
});
if (method !== 'NormalityTest') {
if (method !== 'p-value') {
const IQRResult = categoryOutlier.IQR(values, { k: iqrK ?? IQR_K });
const lowerOutlierIndexes = IQRResult.lower.indexes;
const upperOutlierIndexes = IQRResult.upper.indexes;
Expand Down
14 changes: 7 additions & 7 deletions packages/ava/src/insight/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,19 +124,19 @@ export type InsightExtractorProp = {
};

/** Key parameters in the algorithm for extracting insights */
export type AdjustableAlgorithmParameter = {
export type AlgorithmParameter = {
/**
* Contains both category outlier and time series outlier
* */
outlier?: {
/**
* - IQR: Box and Whisker Plot method which is used by default. A point is considered an outlier when it lies outside of iqrK times the inter quartile range.
* - normalityTest: Assuming that the data follows a normal distribution, a point is considered an outlier if the probability of occurrence is less than 1-confidenceInterval.
* - IQR: Inter Quartile Range method which is used by default. A point is considered an outlier when it lies outside of iqrK times the inter quartile range.
* - p-value: Assuming that the data follows a normal distribution, a point is considered an outlier if the two-sided test p-value is less than 1-confidenceInterval.
* */
method?: 'IQR' | 'NormalityTest';
/** Default value is 1.5. */
method?: 'IQR' | 'p-value';
/** Parameter of Inter Quartile Range method. Default value is 1.5. */
iqrK?: number;
/** Default value is 0.95. */
/** Parameter of p-value method. Default value is 0.95. */
confidenceInterval?: number;
};
};
Expand Down Expand Up @@ -164,7 +164,7 @@ export interface InsightOptions {
/** Parameter passed through to the data frame during data pre-processing */
dataProcessInfo?: Extra;
/** Key parameters in the algorithm for extracting insights */
adjustableAlgorithmParameter?: AdjustableAlgorithmParameter;
algorithmParameter?: AlgorithmParameter;
}

export interface BasePatternInfo<T extends InsightType> {
Expand Down

0 comments on commit 3a7c6f5

Please sign in to comment.