# Merged Incidents Validation

This notebook is just a simple reference to perform the statistical tests described in:
https://docs.google.com/document/d/1egVnAQx9SLDSf9lgf-L4RLSyUyfmB48VqPxU59nghm0



# Two Sample Proportion Z Test

This describes the two porportion z test: https://vitalflux.com/two-sample-z-test-for-proportions-formula-examples/#:~:text=A%20two%2Dproportion%20Z%2Dtest,the%20two%20proportions%20are%20equal.


In [6]:
from dataclasses import dataclass
import numpy as np
import scipy.stats
from math import sqrt


@dataclass
class ProportionTestSample:
    false_positive_rate: float
    n: int


@dataclass
class ZTestResult:
    p_value: float
    z_score: float


def two_tailed_test_z_score_to_p_value(z_score: float) -> float:
    return scipy.stats.norm.sf(abs(z_score)) * 2


def two_proportion_z_test(
    observed_sample: ProportionTestSample, merged_sample: ProportionTestSample
):
    p1_hat = observed_sample.false_positive_rate
    p2_hat = merged_sample.false_positive_rate

    n1 = observed_sample.n
    n2 = merged_sample.n

    p_hat = (p1_hat * n1 + p2_hat * n2) / (n1 + n2)
    z_score = (p1_hat - p2_hat) / sqrt(p_hat * (1 - p_hat) * (1 / n1 + 1 / n2))
    return ZTestResult(
        p_value=two_tailed_test_z_score_to_p_value(z_score), z_score=z_score
    )

In [8]:
example_sample_1 = ProportionTestSample(false_positive_rate=0.5, n=1000)
example_sample_2 = ProportionTestSample(false_positive_rate=0.49, n=950)

print(
    two_proportion_z_test(
        observed_sample=example_sample_1, merged_sample=example_sample_2
    )
)

ZTestResult(p_value=0.6588772543001226, z_score=0.4414638119680947)


In [9]:
example_sample_1 = ProportionTestSample(false_positive_rate=0.5, n=1000)
example_sample_2 = ProportionTestSample(false_positive_rate=0.3, n=950)

print(
    two_proportion_z_test(
        observed_sample=example_sample_1, merged_sample=example_sample_2
    )
)

ZTestResult(p_value=2.2280473322885796e-19, z_score=9.001425946102922)
