In [1]:
#packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import os
from math import pi
from itertools import combinations_with_replacement
import pickle
from IPython.display import display
pd.set_option('display.max_rows', 500)
pd.options.display.max_colwidth=100
from natsort import natsorted
import seaborn as sns
import scipy.stats as stats 
from collections import Counter
from itertools import groupby
from operator import itemgetter
from itertools import combinations
import time
import datetime
import choix
import plotly.graph_objects as go
from tqdm import tqdm

In [2]:
#
# @author: Ritesh Agrawal
# @Date: 13 Feb 2013
# @Description: This is an implementation of average overlap measure for 
# comparing two score 
# (Refererence: http://www.umiacs.umd.edu/~wew/papers/wmz10_tois.pdf). 
# This is a modified implementation of  https://github.com/maslinych/linis-scripts/blob/master/rbo_calc.py
# It is a linear implementation of the RBO and assumes there are no
# duplicates and doesn't handle for ties. 
#

def RBOscore(l1, l2, depth):
    """
        Calculates Average Overlap score. 
        l1 -- Ranked List 1
        l2 -- Ranked List 2
        depth -- depth
    """
    if l1 == None: l1 = []
    if l2 == None: l2 = []

    sl, ll = sorted([(len(l1), l1),(len(l2),l2)])
    s, S = sl  # s = length of smaller list, S = Smaller List
    l, L = ll  # l = length of longer list, L = Longer list
    #sanity check
    if s == 0: return 0
    depth = depth if depth < l else l
    
    # Calculate fraction of overlap from rank  at ranks 1 through depth
    # (the longer of the two lists)
    ss = set([])
    ls = set([])
    overlap = {0: 0}  # overlap holds number of common elements at depth d 
    sum1 = 0.0  

    for i in range(depth):
        # get elements from the two list
        x = L[i]
        y = S[i] if i < s else None
        depth = i+1
        # if the two elements are same, then we don't need
        # to them to the list and just increment the 
        if x == y: 
            overlap[depth] = overlap[i] + 2
        #else add items to the two list
        else:
            ls.add(x)
            if y != None: ss.add(y)
            overlap[depth] = overlap[i] + (2 if x in ss else 0) + (2 if y in ls else 0) 
        sum1 = sum1 + float(overlap[depth])/(len(S[0:depth]) + depth)

    return sum1/depth
def radarPlotDF(countsdf,maxpt,title,**kwargs):
    """
    makes a radar plot from a dataframe with the first two columns being
    used as 'pre' and 'post

    countssdf: a 2 column dataframe with counts
    maxpt: how to scale

    optional-
    colorcode: put color code or not? 'on' if yes.
    """
    colorcode=kwargs.get('colorcode',None)
    #set up colors
    hues=np.linspace(0,1,16,endpoint=False)
    hues=['%1.2f' % i for i in hues]
    hues=[float(i) for i in hues]
    colors = plt.cm.hsv(hues)
    # Compute pie slices
    N = 16
    theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
    radii = np.repeat(1,N)
    width = np.repeat(0.25,N)

    #set up data
    counts_df1=countsdf.iloc[:,0]
    counts_df2=countsdf.iloc[:,1]
    categories1=[str(i) for i in counts_df1.index]#cars
    N = len(categories1)
    #cars
    values1=[i for i in counts_df1.values]
    values1 += values1[:1] #makes it circular
    values2=[i for i in counts_df2.values]
    values2 += values2[:1] #makes it circular
    # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
    #this doesn't change for all. all have 16 data points
    angles = [n / float(N) * 2 * pi for n in range(N)]
    angles += angles[:1]
    # Initialise the spider plot
    ax = plt.subplot(111, polar=True)
    # Draw one axe per variable + add labels labels yet
    plt.xticks(angles[:-1], categories1, color='grey', size=8)
    # Draw ylabels
    ax.set_rlabel_position(0)
    plt.yticks([i for i in np.arange(0,maxpt)],[],color="black", size=5)
    plt.ylim(0,maxpt)
    # Plot data
    ax.plot(angles, values1, linewidth=1, linestyle='solid')
    ax.plot(angles, values2, linewidth=1, linestyle='solid')
    # Fill area
    ax.fill(angles, values1, 'b', alpha=0.1)
    ax.fill(angles, values2, 'r', alpha=0.1)
    if colorcode=='on':
      bars = ax.bar(theta, radii, width=width, bottom=15, color=colors)
    ax.legend(['Pre','Post'])
    plt.title(title)
    
def interactiveRadar(countsDF,subname,category):
    counts_df1=countsDF.iloc[:,0]#pre
    counts_df2=countsDF.iloc[:,1]#post
    counts_df3=countsDF.iloc[:,2]#pre
    categories1=[str(i)[0:7] for i in counts_df1.index]#shirts
    N = len(categories1)

    values1=[i for i in counts_df1.values]
    values1 += values1[:1] #makes it circular
    values2=[i for i in counts_df2.values]
    values2 += values2[:1] #makes it circular
    values3=[i for i in counts_df3.values]
    values3 += values3[:1] #makes it circular


    fig = go.Figure()
    
    #plot pre
    fig.add_trace(go.Scatterpolar(
        opacity=0.75,
          r=values1,
          theta=categories1,
          fill='toself',
          name='Pre'
    ))
    #plot post
    fig.add_trace(go.Scatterpolar(
        opacity=0.75,
          r=values2,
          theta=categories1,
          fill='toself',
          name='Post '
    ))
    #plot follow-up
    fig.add_trace(go.Scatterpolar(
        opacity=0.75,
        r=values3,
        theta=categories1,
        fill='toself',
        name='3 Day '
    ))

    fig.update_layout(
    title=subname+' '+ category,
      polar=dict(
        radialaxis=dict(
          visible=True,
            showline=False,
              range=[0, 16]
        )),
      showlegend=True
    )

    fig.show()


def getPercentile(data):
    p=[]
    for q in [np.concatenate((np.arange(1,6,1),np.arange(95,100,1)))]:
        p.append(np.percentile(data,q))
        # print ("{}%% percentile: {}".format (q, np.percentile(randSim.aveAllMag, q)))
    percentiles=pd.DataFrame(p,columns=q)
    return(percentiles)
def cdfTable(data,binz,spacing):
    cdf=plt.hist(data,bins=np.arange(1,binz,spacing),density=True,cumulative=True)[0]
    plt.close()
    cdftable=pd.DataFrame(cdf,index=np.arange(1,binz-spacing,spacing))
    return(cdftable.T)
def mycdf(data):
    data_size=len(data)
    # Set bins edges
    data_set=sorted(set(data))
    bins=np.append(data_set, data_set[-1]+1)
    # Use the histogram function to bin the data
    counts, bin_edges = np.histogram(data, bins=bins, density=False)
    counts=counts.astype(float)/data_size
    # Find the cdf
    cdf = np.cumsum(counts)
    # Plot the cdf
    plt.plot(bin_edges[0:-1], cdf,linestyle='--', marker="o", color='b')
    plt.ylim((0,1))
    plt.ylabel("CDF")
    plt.grid(True)
  # plt.show()
def colorcode(value):
    """
    Colors elements in a dateframe
    green if over 0.95
    """
    ncomp=3
    if value >= 1-(0.01/ncomp) or value <= (0.01/ncomp):
        color = 'green'
    else:
        color = 'black'
    return 'color: %s' % color
def getPscore(data,score):
    pscore=stats.percentileofscore(data,score,kind='mean')
    return(pscore/100)


In [18]:
Aneesha=['Love',
         'Gratitude',
         'Social Intelligence',
         'Spirituality',
         'Kindness',
         'Teamwork',
         'Zest',
         'Humor',
         'Hope',
         'Leadership',
         'Appreciation of Beauty & Excellence',
         'Love of learning',
        'Honesty',
        'Curiosity',
        'Judgement',
        'Perspective',
        'Fairness',
        'Humility',
        'Bravery',
        'Prudence',
        'Perseverance',
        'Forgiveness',
        'Self-Regulation',
        'Creativity']

In [47]:
Ryan=['Hope',
      'Judgement',
      'Love of learning',
      'Curiosity',
      'Zest',
      'Honesty',
      'Gratitude',
      'Self-Regulation',
      'Prudence',
      'Creativity',
      'Perseverance',
      'Humility',
      'Humor',
      'Fairness',
      'Appreciation of Beauty & Excellence',
      'Kindness',
      'Bravery',
      'Perspective',
      'Social Intelligence',
      'Teamwork',
      'Leadership',
      'Love',
      'Forgiveness',
      'Spirituality'
]

In [5]:
Raisa = ['Love',
         'Appreciation of Beauty & Excellence',
         'Leadership',
         'Teamwork',
         'Zest',
         'Kindness',
         'Forgiveness',
         'Curiosity',
         'Prudence',
         'Honesty',
         'Spirituality',
         'Gratitude',
         'Hope',
         'Social Intelligence',
         'Fairness',
         'Judgement',
         'Perspective',
         'Humor',
         'Love of learning',
         'Perseverance',
         'Bravery',
         'Creativity',
         'Humility',
         'Self-Regulation'
]

In [6]:
Hassan = [
    'Humor',
    'Creativity',
    'Love of learning',
    'Appreciation of Beauty & Excellence',
    'Curiosity',
    'Kindness',
    'Social Intelligence',
    'Fairness',
    'Perspective',
    'Teamwork',
    'Honesty',
    'Spirituality',
    'Judgement',
    'Humility',
    'Bravery',
    'Leadership',
    'Forgiveness',
    'Prudence',
    'Gratitude',
    'Hope',
    'Self-Regulation',
    'Perseverance',
    'Zest',
    'Love'
]

In [21]:
Zarah = [
    'Love of learning',
    'Kindness',
    'Love',
    'Curiosity',
    'Zest',
    'Judgement',
    'Social Intelligence',
    'Honesty',
    'Gratitude',
    'Forgiveness',
    'Teamwork',
    'Fairness',
    'Humility',
    'Prudence',
    'Spirituality',
    'Appreciation of Beauty & Excellence',
    'Creativity',
    'Perseverance',
    'Hope',
    'Self-Regulation',
    'Leadership',
    'Humor',
    'Bravery',
    'Perspective',
    
]

In [63]:
Maisoon = [
    'Humility',
    'Teamwork',
    'Love of learning',
    'Fairness',
    'Curiosity',
    'Appreciation of Beauty & Excellence',
    'Love',
    'Gratitude',
    'Kindness',
    'Creativity',
    'Perspective',
    'Social Intelligence',
    'Spirituality',
    'Hope',
    'Forgiveness',
    'Bravery',
    'Leadership',
    'Honesty',
    'Humor',
    'Judgement',
    'Zest',
    'Self-Regulation',
    'Prudence',
    'Perseverance'
]

In [64]:
All=pd.DataFrame([Ryan,Maisoon,Aneesha,Hassan,Zarah,Raisa])
All.index=['Ryan','Maisoon','Aneesha','Hassan','Zarah','Raisa']


In [65]:
All

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
Ryan,Hope,Judgement,Love of learning,Curiosity,Zest,Honesty,Gratitude,Self-Regulation,Prudence,Creativity,...,Appreciation of Beauty & Excellence,Kindness,Bravery,Perspective,Social Intelligence,Teamwork,Leadership,Love,Forgiveness,Spirituality
Maisoon,Humility,Teamwork,Love of learning,Fairness,Curiosity,Appreciation of Beauty & Excellence,Love,Gratitude,Kindness,Creativity,...,Forgiveness,Bravery,Leadership,Honesty,Humor,Judgement,Zest,Self-Regulation,Prudence,Perseverance
Aneesha,Love,Gratitude,Social Intelligence,Spirituality,Kindness,Teamwork,Zest,Humor,Hope,Leadership,...,Judgement,Perspective,Fairness,Humility,Bravery,Prudence,Perseverance,Forgiveness,Self-Regulation,Creativity
Hassan,Humor,Creativity,Love of learning,Appreciation of Beauty & Excellence,Curiosity,Kindness,Social Intelligence,Fairness,Perspective,Teamwork,...,Bravery,Leadership,Forgiveness,Prudence,Gratitude,Hope,Self-Regulation,Perseverance,Zest,Love
Zarah,Love of learning,Kindness,Love,Curiosity,Zest,Judgement,Social intelligence,Honesty,Gratitude,Forgiveness,...,Spirituality,Appreciation of Beauty & Excellence,Creativity,Perseverance,Hope,Self-Regulation,Leadership,Humor,Bravery,Perspective
Raisa,Love,Appreciation of Beauty & Excellence,Leadership,Teamwork,Zest,Kindness,Forgiveness,Curiosity,Prudence,Honesty,...,Fairness,Judgement,Perspective,Humor,Love of learning,Perseverance,Bravery,Creativity,Humility,Self-Regulation


# Rank Based Overlap

In [43]:
list(All.loc['Hassan'].values)

['Humor',
 'Creativity',
 'Love of learning',
 'Appreciation of Beauty & Excellence',
 'Curiosity',
 'Kindness',
 'Social Intelligence',
 'Fairness',
 'Perspective',
 'Teamwork',
 'Honesty',
 'Spirituality',
 'Judgement',
 'Humility',
 'Bravery',
 'Leadership',
 'Forgiveness',
 'Prudence',
 'Gratitude',
 'Hope',
 'Self-Regulation',
 'Perseverance',
 'Zest',
 'Love',
 None]

In [55]:
People=['Ryan','Maisoon','Aneesha','Hassan','Zarah','Raisa']

In [69]:
Combos=[i for i in combinations(People,2)]

In [89]:
temp=[]
for i,k in Combos:
    temp.append(RBOscore(list(All.loc[i].values),
         list(All.loc[k].values),
         24))
    

In [94]:
RBOscores=pd.DataFrame(Combos,columns=['Person 1','Person 2'])

In [95]:
RBOscores['Overlap']=temp

In [97]:
RBOscores.sort_values(by='Overlap')

Unnamed: 0,Person 1,Person 2,Overlap
1,Ryan,Aneesha,0.447532
4,Ryan,Raisa,0.451504
0,Ryan,Maisoon,0.499415
2,Ryan,Hassan,0.504528
13,Hassan,Raisa,0.506263
9,Aneesha,Hassan,0.511702
5,Maisoon,Aneesha,0.523195
12,Hassan,Zarah,0.527882
10,Aneesha,Zarah,0.535115
7,Maisoon,Zarah,0.554377


In [99]:
rev_temp=[]
for i,k in Combos:
    rev_temp.append(RBOscore(list(All.loc[i].values)[::-1],
         list(All.loc[k].values)[::-1],
         24))
    

In [101]:
pd.DataFrame({'names':Combos,'Reverse Overlap':rev_temp}).sort_values(by='Reverse Overlap')

Unnamed: 0,names,Reverse Overlap
4,"(Ryan, Raisa)",0.433
0,"(Ryan, Maisoon)",0.441801
1,"(Ryan, Aneesha)",0.475622
2,"(Ryan, Hassan)",0.493299
13,"(Hassan, Raisa)",0.506174
12,"(Hassan, Zarah)",0.53319
3,"(Ryan, Zarah)",0.539523
7,"(Maisoon, Zarah)",0.548672
9,"(Aneesha, Hassan)",0.55501
10,"(Aneesha, Zarah)",0.555042
