In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv("Config Rule Inventory.csv")

We're not really interested in anything that is not considered in-scope, and we also don't want anything that was pointed 55 because that was John's way of noting work that was too big to estimate.

We only actually care about the values of the estimated points and the risk score.

In [2]:
df['In scope?'].unique()

array(['No', 'Potential Add', 'Yes', nan], dtype=object)

In [118]:
ndf = df.loc[((df['In scope?'] == 'Yes') | (df['In scope?'] == 'Potential Add')) & (df['Pts to Automate Enforcement'] != 55)]
ndf

Unnamed: 0,Risk,Pts to Automate Enforcement,Feature / Rule Name,In scope?
19,,,Mathilde Moreno,Potential Add
20,,,Konrad Prentice,Potential Add
21,,,Paolo Burnett,Potential Add
22,,,Connah Burt,Potential Add
23,1.0,2.0,Jaskaran Guerrero,Yes
24,3.0,5.0,Alfie-James Tomlinson,Yes
26,,8.0,Layla-Rose Johnson,Yes
27,2.0,5.0,Dina Sawyer,Yes
28,2.0,5.0,Marguerite Novak,Yes
29,1.0,2.0,Maurice Foster,Yes


Create an x and y to build the plot from. We're going to plot story points by risk score.

In [119]:
x = ndf['Pts to Automate Enforcement'].fillna(0)
x

19     0.0
20     0.0
21     0.0
22     0.0
23     2.0
24     5.0
26     8.0
27     5.0
28     5.0
29     2.0
30     5.0
31     5.0
32     3.0
37     8.0
38     3.0
39     2.0
40     0.0
41     0.0
42     2.0
43     2.0
45     5.0
46     3.0
47     3.0
48     2.0
49     8.0
50     5.0
51     5.0
52     2.0
53     2.0
54     5.0
55     5.0
57     0.0
58    13.0
59     8.0
60     0.0
61     0.0
62     5.0
63     5.0
64     5.0
65     5.0
66     8.0
67     2.0
68     5.0
69    10.0
70     3.0
Name: Pts to Automate Enforcement, dtype: float64

In [120]:
y = ndf['Risk'].fillna(0)
y

19    0.0
20    0.0
21    0.0
22    0.0
23    1.0
24    3.0
26    0.0
27    2.0
28    2.0
29    1.0
30    1.0
31    1.0
32    1.0
37    3.0
38    3.0
39    1.0
40    3.0
41    3.0
42    1.0
43    1.0
45    2.0
46    3.0
47    2.0
48    1.0
49    3.0
50    3.0
51    1.0
52    1.0
53    1.0
54    1.0
55    2.0
57    0.0
58    3.0
59    1.0
60    0.0
61    0.0
62    2.0
63    2.0
64    2.0
65    2.0
66    2.0
67    3.0
68    1.0
69    1.0
70    0.0
Name: Risk, dtype: float64

To create the annotations, we'll need to assign a name to each dot. Since we're using a swarm plot, we really just have buckets... as many buckets as there are unique risk scores. So it doesn't matter which name goes to which dot as long as it's in the right bucket.

To get the list of names in the bucket for Risk == 3:

In [121]:
#ndf.loc[(ndf['Risk'] == 3.0)]
ndf[ndf['Risk'] == 3.0][['Feature / Rule Name']]

Unnamed: 0,Feature / Rule Name
24,Alfie-James Tomlinson
37,Emaan Yoder
38,Mohamed Keith
40,Alicja Casey
41,Millie-Rose Black
46,Evie-May Page
49,Naomi Kennedy
50,Harun Fox
58,Kiera Howarth
67,Demi-Lee Blundell


The "buckets" I'm referring to are actually called [collections](https://matplotlib.org/3.2.1/api/collections_api.html) in matplotlib. 

In [122]:
%matplotlib ipympl


sw = sns.swarmplot(x, y)
sw.collections

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.collections.PathCollection at 0x1a266ae790>,
 <matplotlib.collections.PathCollection at 0x1a266ae810>,
 <matplotlib.collections.PathCollection at 0x1a266a9650>,
 <matplotlib.collections.PathCollection at 0x1a266a9390>,
 <matplotlib.collections.PathCollection at 0x1a266a9910>,
 <matplotlib.collections.PathCollection at 0x1a266a9990>,
 <matplotlib.collections.PathCollection at 0x10a9b6fd0>]

We get the specific coordinates of each point in the bucket by calling get_offsets().

In [123]:
pts = np.sort(x.unique())

In [124]:
pts

array([ 0.,  2.,  3.,  5.,  8., 10., 13.])

In [125]:
np.argwhere(pts==13)

array([[6]])

Now, if we had a story point value of n, it would go to bucket...

In [126]:
def ptstobucket(n, x=x, sw=sw):
    pts = np.sort(x.unique())
    t_space = np.linspace(0, len(sw.collections)-1, len(pts))
    idxs = np.argwhere(pts==n)
    if len(idxs) > 0:
        return t_space[idxs[0]]

In [127]:
for n in pts:
    print(f"{n}: {ptstobucket(n)}")

0.0: [0.]
2.0: [1.]
3.0: [2.]
5.0: [3.]
8.0: [4.]
10.0: [5.]
13.0: [6.]


In [128]:
for c in sw.collections:
    print(c.get_offsets().astype(np.float16))

[[ 0.      0.    ]
 [-0.1029  0.    ]
 [ 0.1029  0.    ]
 [-0.2058  0.    ]
 [ 0.2058  0.    ]
 [ 0.3088  0.    ]
 [-0.3088  0.    ]
 [ 0.      3.    ]
 [-0.1029  3.    ]]
[[1.     1.    ]
 [0.897  1.    ]
 [1.103  1.    ]
 [0.794  1.    ]
 [1.206  1.    ]
 [0.6914 1.    ]
 [1.309  1.    ]
 [0.6    1.    ]
 [1.     3.    ]]
[[2.    0.   ]
 [2.    1.   ]
 [2.    2.   ]
 [2.    3.   ]
 [1.897 3.   ]]
[[3.    1.   ]
 [2.896 1.   ]
 [3.104 1.   ]
 [2.795 1.   ]
 [3.205 1.   ]
 [3.    2.   ]
 [2.896 2.   ]
 [3.104 2.   ]
 [2.795 2.   ]
 [3.205 2.   ]
 [2.691 2.   ]
 [3.309 2.   ]
 [2.6   2.   ]
 [3.    3.   ]
 [2.896 3.   ]]
[[4.    0.   ]
 [4.    1.   ]
 [4.    2.   ]
 [4.    3.   ]
 [3.896 3.   ]]
[[5. 1.]]
[[6. 3.]]


Create a translation function that will accurately convert offsets into pts values.

Determine the bucket that is closest to the given value by calculating the distance between each bucket and the value and then taking the lowest distance.

In [129]:
def offsettopts(n):
    buckets = np.linspace(0,len(sw.collections)-1,len(sw.collections))
    distances = np.abs(buckets - n)
    bucket = distances.argmin()
    return pts[bucket]

In [130]:
offsettopts(0)

0.0

In [131]:
offsettopts(6)

13.0

In [132]:
for c in sw.collections:
    for (i,j) in c.get_offsets():
        print(f"{i}: {offsettopts(i)}")

0.0: 0.0
-0.10290658602150549: 0.0
0.10290658602150526: 0.0
-0.20581317204301097: 0.0
0.20581317204301075: 0.0
0.308719758064516: 0.0
-0.30871975806451646: 0.0
0.0: 0.0
-0.10290658602150549: 0.0
1.0: 2.0
0.8970934139784945: 2.0
1.102906586021505: 2.0
0.7941868279569895: 2.0
1.2058131720430105: 2.0
0.6912802419354844: 2.0
1.3087197580645156: 2.0
0.6: 2.0
1.0: 2.0
1.9999999999999996: 3.0
1.9999999999999996: 3.0
1.9999999999999996: 3.0
1.9999999999999996: 3.0
1.8970934139784945: 3.0
3.0: 5.0
2.897093413978494: 5.0
3.102906586021506: 5.0
2.794186827956989: 5.0
3.205813172043011: 5.0
3.0: 5.0
2.897093413978494: 5.0
3.102906586021506: 5.0
2.794186827956989: 5.0
3.205813172043011: 5.0
2.691280241935483: 5.0
3.308719758064517: 5.0
2.6: 5.0
3.0: 5.0
2.897093413978494: 5.0
3.999999999999999: 8.0
3.999999999999999: 8.0
3.999999999999999: 8.0
3.999999999999999: 8.0
3.897093413978493: 8.0
4.999999999999999: 10.0
6.0: 13.0


So, for each offset, we can assign the first name that fits the criteria and then remove it from the list. We'll take the first one by using the head() method and remove it by using drop().

The criteria are... given that (i, j) = offset:
* `offsettopts(i) = pts`
* `j = risk`

In [133]:
j = 1
i = 1.102906586021505
names = ndf[(ndf['Pts to Automate Enforcement'] == offsettopts(i)) & (ndf['Risk'] == j)][['Feature / Rule Name']]
names

Unnamed: 0,Feature / Rule Name
23,Jaskaran Guerrero
29,Maurice Foster
39,Dolly Fuentes
42,Elwood Beattie
43,Ciaran Velasquez
48,Connagh Murillo
52,Nancy Wynn
53,Karl Manning


In [134]:
names.head(1).values.all()

'Jaskaran Guerrero'

We'll call our lookup table, which will be keyed by point coordinate, nlookup.

In [135]:
nlookup = {}
ndf = ndf.fillna(0)
for c in sw.collections:
    for (i, j) in c.get_offsets().astype(np.float16):
        name = ndf[(ndf['Pts to Automate Enforcement'] == offsettopts(i)) & (ndf['Risk'] == j)][['Feature / Rule Name']].head(1)
        print(f"Adding {name.values.all()} for {i},{j}")
        nlookup[(i,j)] = name.values.all()
        ndf = ndf.drop(name.index)


Adding Mathilde Moreno for 0.0,0.0
Adding Konrad Prentice for -0.1029052734375,0.0
Adding Paolo Burnett for 0.1029052734375,0.0
Adding Connah Burt for -0.205810546875,0.0
Adding Marius Kirby for 0.205810546875,0.0
Adding Ellenor Adkins for 0.308837890625,0.0
Adding Lylah Lucas for -0.308837890625,0.0
Adding Alicja Casey for 0.0,3.0
Adding Millie-Rose Black for -0.1029052734375,3.0
Adding Jaskaran Guerrero for 1.0,1.0
Adding Maurice Foster for 0.89697265625,1.0
Adding Dolly Fuentes for 1.1025390625,1.0
Adding Elwood Beattie for 0.7939453125,1.0
Adding Ciaran Velasquez for 1.2060546875,1.0
Adding Connagh Murillo for 0.69140625,1.0
Adding Nancy Wynn for 1.30859375,1.0
Adding Karl Manning for 0.60009765625,1.0
Adding Demi-Lee Blundell for 1.0,3.0
Adding Rhian Floyd for 2.0,0.0
Adding Damien Bull for 2.0,1.0
Adding Wallace Lawrence for 2.0,2.0
Adding Mohamed Keith for 2.0,3.0
Adding Evie-May Page for 1.8974609375,3.0
Adding Kody Vaughan for 3.0,1.0
Adding Imogen Todd for 2.896484375,1.0
Add

At this point, the DataFrame should be empty. 

In [136]:
ndf

Unnamed: 0,Risk,Pts to Automate Enforcement,Feature / Rule Name,In scope?


The nlookup dict should have an entry for every record that was in df.

In [137]:
len(nlookup)

45

Now we need to define our annotation function to lookup the name for the dot being hovered over by the mouse pointer. This code was adapted from the great example given by [ImportanceOfBeingEarnest](https://stackoverflow.com/users/4124317/importanceofbeingernest), who is apparently a member of the matplotlib dev team, in [this Stack Overflow post](https://stackoverflow.com/questions/7908636/possible-to-make-labels-appear-when-hovering-over-a-point-in-matplotlib).

In [138]:
%matplotlib ipympl

sw = sns.swarmplot(x, y)
annot = sw.annotate("", xy=(0,0), xytext=(20,20),textcoords="offset points",
                    bbox=dict(boxstyle="round", fc="w"),
                    arrowprops=dict(arrowstyle="->"))
annot.set_visible(False)
curdot = None

def update_annot(pc, ind):
    pos = pc.get_offsets()[ind["ind"][0]]
    annot.xy = pos
    i, j = pos.astype(np.float16)
    text = nlookup[(i,j)]
    annot.set_text(text)

def hover(event):
    global curdot
    vis = annot.get_visible()
    #print(event)
    for pc in sw.collections:
        (status, ind) = pc.contains(event)
        if status is True:
            update_annot(pc, ind)
            annot.set_visible(True)
            sw.figure.canvas.draw_idle()
            break

    if status is False and vis:
        annot.set_visible(False)
        sw.figure.canvas.draw_idle()
                
sw.figure.canvas.mpl_connect("motion_notify_event", hover)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

7