In [1]:
from hypothesis import given
import hypothesis.strategies as st

# Finding a majority element in a list

## Is a particular element a majority element?

First, note that checking whether a given element is a majority element can be done in $O(n)$ time where $n$ is the size of the input list.

In [2]:
def is_majority_element(element, a):
    return a.count(element) > len(a) // 2

## Naive solution

The naive solution simply iterates over all of the elements in the list, checks whether an element is a majority element, if so algorithm returns. Since there are $n$ elements in the input list and checking whether an element is a majority element is $O(n)$, then naive solution is $O(n^2)$.

In [38]:
def naive_algorithm(a):
    """Naive implementation of finding a majority element."""
    for element in a:
        if is_majority_element(element, a):
            return element
    return -1

## Divide and Conquer solution

Key insight is that if a list of length $n$ contains a majority element, then that same element is also a majority element for one of its halves.

In [39]:
def _divide_and_conquer(a, left, right):
    """Divide and Conquer algorithm for finding a majority element."""
    if left == right:
        majority_element = -1
    elif left + 1 == right:
        majority_element = a[left]
    else:
        m = left + (right - left) // 2
        left_element = _get_majority_element(a, left, m)
        right_element = _get_majority_element(a, m, right)

        # merge the left and right elements...
        b = a[left:right]
        if left_element == -1 and right_element != -1:
            majority_element = right_element if b.count(right_element) > len(b) // 2 else -1
        elif left_element != -1 and right_element == -1:
            majority_element = left_element if b.count(left_element) > len(b) // 2 else -1
        elif left_element != -1 and right_element != -1:
            if b.count(left_element) > len(b) // 2:
                majority_element = left_element
            elif b.count(right_element) > len(b) // 2:
                majority_element = right_element
            else:
                majority_element = -1
        else:
            majority_element = -1

    return majority_element


def divide_and_conquer_algorithm(a):
    return _get_majority_element(a, 0, len(a))

In [40]:
@given(a=st.lists(st.integers()))
def test_divide_and_conquer_algorithm(a):
    expected = naive_algorithm(a)
    actual = divide_and_conquer_algorithm(a)
    msg = "Expected result {}; actual result {}.".format(expected, actual)
    assert expected == actual, msg

In [41]:
test_divide_and_conquer_algorithm()

## Boyer-Moore Algorithm

The [Boyer-Moore algorithm](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_majority_vote_algorithm) is a $O(n)$ algorithm for find a majority element in a sequence (if one exists).  The algorithm first passes throught the data looking for a majority element. If one exists, it will be found during this pass. However, if there is no majority element, the first pass through the data will return a false positive.  Thus a second pass through the data is required to make certain that the element returned by the first pass is in fact a majority element. Since both parts of the Boyer-Moore algorithm are $O(n)$, the overall algorithm is $O(n)$.

In [35]:
def _candidate_majority_element(a):
    candidate, count = a[0], 1
    for element in a:
        if candidate == element:
            count += 1
        else:
            count -= 1
        if count == 0:
            candidate, count = element, 1
    return candidate


def boyer_moore_algorithm(a):
    if len(a) == 0:
        majority_element = -1
    else:
        candidate_element = _candidate_majority_element(a)
        if is_majority_element(candidate_element, a):
            majority_element = candidate_element
        else:
            majority_element = -1
    return majority_element


In [36]:
@given(a=st.lists(st.integers()))
def test_boyer_moore_algorithm(a):
    expected = naive_get_majority_element(a)
    actual = boyer_moore_algorithm(a)
    msg = "Expected result {}; actual result {}.".format(expected, actual)
    assert expected == actual, msg

In [37]:
test_boyer_moore_algorithm()