In [1]:
import numpy as np

def gini(array):
    """Calculate the Gini coefficient of a numpy array."""
    # based on bottom eq:
    # http://www.statsdirect.com/help/generatedimages/equations/equation154.svg
    # from:
    # http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm
    # All values are treated equally, arrays must be 1d:
    array = array.flatten()
    if np.amin(array) < 0:
        # Values cannot be negative:
        array -= np.amin(array)
    # Values cannot be 0:
    array += 0.0000001
    # Values must be sorted:
    array = np.sort(array)
    # Index per array element:
    index = np.arange(1,array.shape[0]+1)
    # Number of array elements:
    n = array.shape[0]
    # Gini coefficient:
    return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array)))

In [3]:
# DATA

fac09 = [89750, 83011, 88732, 74024, 94361, 96760, 96568, 81916, 76567, 77573, 83383, 65496, 58478, 52485, 55666, 65973, 69229, 58029, 62859, 74651, 49444, 43000, 49057, 57480, 46640, 36821, 56947, 49000, 43580, 57837, 32000, 35726, 38277, 32209, 32000, 31500, 32000, 47761, 37952, 32000, 32000, 44492, 36617, 42654, 35867, 32000]
fac19 = [77558, 88903, 67930, 79258, 73205, 102080, 85693, 88661, 84962, 86423, 70169, 59268, 58464, 59653, 57131, 58564, 53318, 60627, 67775, 50000, 52233, 52782, 50983, 48000, 51000, 50099, 49000, 49000, 48247, 50839, 47018, 54547, 47199, 44283, 40842, 37912, 39953, 37763, 31182, 36136, 39997, 36046, 36106, 38942]
cola09 = [138363, 88850]
cola19 = [170416, 154447]

In [9]:
fac09_array = np.asarray([float(i) for i in fac09])
fac19_array = np.asarray([float(i) for i in fac19])
overhead09 = fac09 + cola09
overhead19 = fac19 + cola19
oh09 = np.asarray([float(i) for i in overhead09])
oh19 = np.asarray([float(i) for i in overhead19])

In [11]:
print(gini(fac09_array), gini(fac19_array))

0.20472553490613568 0.16608426017383915


In [12]:
print(gini(oh09), gini(oh19))

0.22050864853040802 0.21224822377388544
