In [2]:
!pip install plotly

Collecting plotly
  Downloading plotly-5.11.0-py2.py3-none-any.whl (15.3 MB)
Collecting tenacity>=6.2.0
  Downloading tenacity-8.1.0-py3-none-any.whl (23 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.11.0 tenacity-8.1.0


In [3]:
import pandas as pd
import plotly.graph_objects as go

from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)

In [4]:
df = pd.read_csv('new.csv')
df.head()

Unnamed: 0,source,target,weight
0,about to sleep,champions,17
1,about to sleep,hibernating,1434
2,about to sleep,new customers,693
3,about to sleep,potential loyalist,94
4,about to sleep,promising,365


In [5]:
def genSankey(df,cat_cols=[],value_cols='',title='Sankey Diagram'):
    colorPalette = ['#4B8BBE','#306998','#FFE873','#FFD43B','#646464']
    labelList = []
    colorNumList = []
    for catCol in cat_cols:
        labelListTemp =  list(set(df[catCol].values))
        colorNumList.append(len(labelListTemp))
        labelList = labelList + labelListTemp
        
    labelList = list(dict.fromkeys(labelList))
    
    colorList = []
    for idx, colorNum in enumerate(colorNumList):
        colorList = colorList + [colorPalette[idx]]*colorNum
        
    for i in range(len(cat_cols)-1):
        if i==0:
            sourceTargetDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
            sourceTargetDf.columns = ['source','target','count']
        else:
            tempDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
            tempDf.columns = ['source','target','count']
            sourceTargetDf = pd.concat([sourceTargetDf,tempDf])
        sourceTargetDf = sourceTargetDf.groupby(['source','target']).agg({'count':'sum'}).reset_index()
        
    sourceTargetDf['sourceID'] = sourceTargetDf['source'].apply(lambda x: labelList.index(x))
    sourceTargetDf['targetID'] = sourceTargetDf['target'].apply(lambda x: labelList.index(x))
    
    data = dict(
        type='sankey',
        node = dict(
          pad = 15,
          thickness = 20,
          line = dict(
            color = "black",
            width = 0.5
          ),
          label = labelList,
          color = colorList
        ),
        link = dict(
          source = sourceTargetDf['sourceID'],
          target = sourceTargetDf['targetID'],
          value = sourceTargetDf['count']
        )
      )
    
    layout =  dict(
        title = title,
        font = dict(
          size = 10
        )
    )
       
    fig = dict(data=[data], layout=layout)
    return fig

In [10]:
sank = genSankey(df,cat_cols=['source','target'],value_cols='weight',title='Transactions')
fig = go.Figure(sank)
iplot(fig)

In [12]:
atl = genSankey(df[df['source']=='about to sleep'],cat_cols=['source','target'],value_cols='weight',title='Transactions')
c = genSankey(df[df['source']=='champions'],cat_cols=['source','target'],value_cols='weight',title='Transactions')
lc = genSankey(df[df['source']=='loyal customers'],cat_cols=['source','target'],value_cols='weight',title='Transactions')
na = genSankey(df[df['source']=='needing attention'],cat_cols=['source','target'],value_cols='weight',title='Transactions')
nc = genSankey(df[df['source']=='new customers'],cat_cols=['source','target'],value_cols='weight',title='Transactions')
pl = genSankey(df[df['source']=='potential loyalist'],cat_cols=['source','target'],value_cols='weight',title='Transactions')
p = genSankey(df[df['source']=='promising'],cat_cols=['source','target'],value_cols='weight',title='Transactions')
h = genSankey(df[df['source']=='hibernating'],cat_cols=['source','target'],value_cols='weight',title='Transactions')
all = genSankey(df,cat_cols=['source','target'],value_cols='weight',title='Transactions')

updatemenus = [{'buttons': [{'method': 'animate',
                             'label': 'source',
                             'args': [all]
                              },
                            {'method': 'animate',
                             'label': 'new customers',
                             'args': [nc]
                              },
                            {'method': 'animate',
                             'label': 'about to sleep',
                             'args': [atl]
                              },
                            {'method': 'animate',
                             'label': 'champions',
                             'args': [c]
                             },
                            {'method': 'animate',
                             'label': 'loyal customers',
                             'args': [lc]
                             },
                            {'method': 'animate',
                             'label': 'needing attention',
                             'args': [na]
                             },
                            {'method': 'animate',
                             'label': 'potential loyalist',
                             'args': [pl]
                              },
                            {'method': 'animate',
                             'label': 'promising',
                             'args': [p]
                              },
                            {'method': 'animate',
                             'label': 'hibernating',
                             'args': [h]
                             }
                            ] } ]

sank = genSankey(df,cat_cols=['source','target'],value_cols='weight',title='Transactions')
fig = go.Figure(sank)
fig.update_layout(updatemenus=updatemenus)
iplot(fig)