In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
def gini(array):
    """Calculate the Gini coefficient of a pandas array."""
    # based on Olivia Guest's https://github.com/oliviaguest/gini
    # modified to take the values from a series/pandas array before flatteningb
    # based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif
    # from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm
    array = array.values.flatten() #all values are treated equally, arrays must be 1d
    if np.amin(array) < 0:
        array -= np.amin(array) #values cannot be negative
    array += 0.0000001 #values cannot be 0
    array = np.sort(array) #values must be sorted
    index = np.arange(1,array.shape[0]+1) #index per array element
    n = array.shape[0]#number of array elements
    return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array))) #Gini coefficient

In [3]:
# Here's the 7 x 10 array
scores = np.random.randint(100, size= (7,10))

# Here are the names of the texts/observations
names = ['alpha','bravo','charlie','delta','echo','golf','hotel']

# And here are the observation names
years = [f'year{item}' for item in range(1,11)]

In [4]:
df = pd.DataFrame(data=scores, index=names, columns=years)

In [5]:
df.head(7)
cm = sns.light_palette("red", as_cmap=True)
df.style.background_gradient(cmap='viridis')

Unnamed: 0,year1,year2,year3,year4,year5,year6,year7,year8,year9,year10
alpha,93,9,15,41,42,27,18,79,79,16
bravo,2,64,49,55,75,12,91,6,35,42
charlie,38,42,77,78,0,51,61,74,21,60
delta,71,86,2,60,33,71,10,54,29,81
echo,2,87,18,16,5,97,35,16,5,0
golf,89,26,76,3,15,11,48,69,48,47
hotel,67,90,24,95,68,75,98,49,64,41


Ahead of turning things over to the wizardy of **pandas** I want to establish the math in my own head:

In [6]:
year1 = df.year1.tolist()
print(year1)

[93, 2, 38, 71, 2, 89, 67]


In [7]:
for i in year1:
    print(i/sum(year1))

0.2569060773480663
0.0055248618784530384
0.10497237569060773
0.19613259668508287
0.0055248618784530384
0.24585635359116023
0.1850828729281768


Okay, those are our outcomes for `year1`, and when we wave the **pandas** wand we get the same results:

In [8]:
df = df / df.sum()
df.head(7)

Unnamed: 0,year1,year2,year3,year4,year5,year6,year7,year8,year9,year10
alpha,0.256906,0.022277,0.057471,0.117816,0.176471,0.078488,0.049861,0.227666,0.281139,0.055749
bravo,0.005525,0.158416,0.187739,0.158046,0.315126,0.034884,0.252078,0.017291,0.124555,0.146341
charlie,0.104972,0.10396,0.295019,0.224138,0.0,0.148256,0.168975,0.213256,0.074733,0.209059
delta,0.196133,0.212871,0.007663,0.172414,0.138655,0.206395,0.027701,0.15562,0.103203,0.28223
echo,0.005525,0.215347,0.068966,0.045977,0.021008,0.281977,0.096953,0.04611,0.017794,0.0
golf,0.245856,0.064356,0.291188,0.008621,0.063025,0.031977,0.132964,0.198847,0.170819,0.163763
hotel,0.185083,0.222772,0.091954,0.272989,0.285714,0.218023,0.271468,0.14121,0.227758,0.142857


In [9]:
df['STD'] = df.std(axis=1)
df.head(7)

Unnamed: 0,year1,year2,year3,year4,year5,year6,year7,year8,year9,year10,STD
alpha,0.256906,0.022277,0.057471,0.117816,0.176471,0.078488,0.049861,0.227666,0.281139,0.055749,0.095563
bravo,0.005525,0.158416,0.187739,0.158046,0.315126,0.034884,0.252078,0.017291,0.124555,0.146341,0.100357
charlie,0.104972,0.10396,0.295019,0.224138,0.0,0.148256,0.168975,0.213256,0.074733,0.209059,0.085967
delta,0.196133,0.212871,0.007663,0.172414,0.138655,0.206395,0.027701,0.15562,0.103203,0.28223,0.084863
echo,0.005525,0.215347,0.068966,0.045977,0.021008,0.281977,0.096953,0.04611,0.017794,0.0,0.094941
golf,0.245856,0.064356,0.291188,0.008621,0.063025,0.031977,0.132964,0.198847,0.170819,0.163763,0.094036
hotel,0.185083,0.222772,0.091954,0.272989,0.285714,0.218023,0.271468,0.14121,0.227758,0.142857,0.064684


In [10]:
df['Gini'] = df.apply(gini, axis=1)

# Be careful here. I think your Gini computation is including the St. Dev. column. 

In [11]:
df.head(7)

Unnamed: 0,year1,year2,year3,year4,year5,year6,year7,year8,year9,year10,STD,Gini
alpha,0.256906,0.022277,0.057471,0.117816,0.176471,0.078488,0.049861,0.227666,0.281139,0.055749,0.095563,0.373467
bravo,0.005525,0.158416,0.187739,0.158046,0.315126,0.034884,0.252078,0.017291,0.124555,0.146341,0.100357,0.375101
charlie,0.104972,0.10396,0.295019,0.224138,0.0,0.148256,0.168975,0.213256,0.074733,0.209059,0.085967,0.304693
delta,0.196133,0.212871,0.007663,0.172414,0.138655,0.206395,0.027701,0.15562,0.103203,0.28223,0.084863,0.308926
echo,0.005525,0.215347,0.068966,0.045977,0.021008,0.281977,0.096953,0.04611,0.017794,0.0,0.094941,0.540111
golf,0.245856,0.064356,0.291188,0.008621,0.063025,0.031977,0.132964,0.198847,0.170819,0.163763,0.094036,0.367056
hotel,0.185083,0.222772,0.091954,0.272989,0.285714,0.218023,0.271468,0.14121,0.227758,0.142857,0.064684,0.207753


In [12]:
# df.sort_values(by=['STD'], ascending=True)
df.style.bar(subset=['STD'], color='#d65f5f')

Unnamed: 0,year1,year2,year3,year4,year5,year6,year7,year8,year9,year10,STD,Gini
alpha,0.256906,0.0222772,0.0574713,0.117816,0.176471,0.0784884,0.0498615,0.227666,0.281139,0.0557491,0.0955634,0.373467
bravo,0.00552486,0.158416,0.187739,0.158046,0.315126,0.0348837,0.252078,0.0172911,0.124555,0.146341,0.100357,0.375101
charlie,0.104972,0.10396,0.295019,0.224138,0.0,0.148256,0.168975,0.213256,0.0747331,0.209059,0.0859665,0.304693
delta,0.196133,0.212871,0.00766284,0.172414,0.138655,0.206395,0.0277008,0.15562,0.103203,0.28223,0.0848626,0.308926
echo,0.00552486,0.215347,0.0689655,0.045977,0.0210084,0.281977,0.0969529,0.0461095,0.0177936,0.0,0.0949414,0.540111
golf,0.245856,0.0643564,0.291188,0.00862069,0.0630252,0.0319767,0.132964,0.198847,0.170819,0.163763,0.0940362,0.367056
hotel,0.185083,0.222772,0.091954,0.272989,0.285714,0.218023,0.271468,0.14121,0.227758,0.142857,0.0646838,0.207753
