In [1]:
!jupyter nbextension enable --py --sys-prefix ipysankeywidget
!jupyter nbextension enable --py --sys-prefix widgetsnbextension

Enabling notebook extension jupyter-sankey-widget/extension...
      - Validating: ok
Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: ok


In [None]:
jupyter nbextension enable --py --sys-prefix ipysankeywidget
jupyter nbextension enable --py --sys-prefix widgetsnbextension

In [1]:
import random
import datetime
#from ipysankeywidget import SankeyWidget
#from ipywidgets import Layout

import ipysankeywidget as sk
import ipywidgets as ipw
import pandas as pd

In [2]:
# Generate sample data

locations = ['Produce', 'Dairy', 'Freezer', 'Chips', 'Cookies', 'Cereal', 'Produce', 'Dairy','Produce', 'Dairy','Cereal' ]

def generate_customers(num_custs=200):
    
    customer_list = []
    while len(customer_list) <= num_custs:
        customer_list.append('CUST-' + str(random.randrange(1,101)))
    print('Unique Customers = ', len(set(customer_list)))
    
    return customer_list

def generate_sample_data(sample_size=10000, num_custs=200):
    sample_data = []
    
    customers = generate_customers(num_custs)
    
    while len(sample_data) < sample_size:
        month = random.randrange(1,7)
        day = random.randrange(1,29)
        hour = random.randrange(8,22)
        
        rand_customer = customers[random.randrange(0,200)]
        
        for i in range(random.randrange(8)):

            minute = random.randrange(1,60)
            seconds = random.randrange(1,60)
            rand_location = locations[random.randrange(0,(len(locations)))]
            rand_timestamp = datetime.datetime(2019, month, day, hour, minute, seconds)

            item = {'timestamp':str(rand_timestamp), 'customer':rand_customer, 'location': rand_location}
            sample_data.append(item)

    print('Data Points Generated = ', len(sample_data))
    return sample_data

In [3]:
raw_data = generate_sample_data()

Unique Customers =  85
Data Points Generated =  10000


In [4]:
df = pd.DataFrame(raw_data)
df['timestamp']= pd.to_datetime(df['timestamp'])
df.sort_values(['customer', 'timestamp'], inplace=True)
df.head(10)

Unnamed: 0,timestamp,customer,location
6488,2019-01-06 08:09:52,CUST-1,Dairy
6486,2019-01-06 08:10:19,CUST-1,Freezer
6487,2019-01-06 08:15:52,CUST-1,Dairy
2617,2019-01-06 12:15:23,CUST-1,Dairy
2618,2019-01-06 12:20:22,CUST-1,Produce
2616,2019-01-06 12:48:32,CUST-1,Produce
2619,2019-01-06 12:52:48,CUST-1,Freezer
1078,2019-01-25 13:34:26,CUST-1,Produce
1079,2019-01-25 13:38:44,CUST-1,Cookies
9171,2019-02-08 15:52:33,CUST-1,Freezer


In [5]:
accounts = list(df['customer'].unique())
area_types = list(df['location'].unique())

# tag the step number of each customer, regardless of visit
all_steps = [] # for total customer tracking over time

day_steps = [] # for daily trip tracking

next_steps = [] # for single area transversal
for account in accounts:
    row = {}
    row['account_num'] = account
    single_account = df[(df['customer'] == account)== True]
    
    steps = list(single_account['location']) 
    
    for count, locs in enumerate(steps, 1):
    
        row[str('col' + str(count))] = locs
        
        if count == 1:
            source = locs
            next
        
        
        next_steps.append({'source':source, 'target': locs+'1', 'value': 1})
        
        source = locs
        
    row['value'] = 1
    all_steps.append(row)
    
    

df_next = pd.DataFrame(next_steps)

In [13]:
# Everything Colors





def custom_colors(categories, opacity='100', exclude=None):


    num_cats = len(categories)

 
    cat_colors = ["1f77b4",
                  "ff7f0e", 
                  "2ca02c", 
                  "d62728", 
                  "9467bd", 
                  "8c564b", 
                  "e377c2", 
                  "7f7f7f", 
                  "bcbd22",
                  "17becf",
                 ]

    cat_colors = ['#' + color for color in cat_colors]


    full_color_map = dict(zip(categories[:num_cats], cat_colors[:num_cats]))

    if exclude in categories:
        full_color_map[exclude] = full_color_map[exclude].replace(str('#'+tranparency[opacity]), "#FF")
        

    return full_color_map


In [14]:
colormap = transparent(locations, opacity='100', exclude=None)

#df_next['color']=df_next['source'].apply(lambda x: colormap[x])
df_next.head(50)

next_grouped = df_next.groupby(['source','target']).sum().reset_index()

In [15]:
next_grouped['color']=next_grouped['source'].apply(lambda x: colormap[x])
links = next_grouped.to_dict(orient='records')

In [16]:
next_grouped

Unnamed: 0,source,target,value,color
0,Cereal,Cereal1,317,#8c564b
1,Cereal,Chips1,150,#8c564b
2,Cereal,Cookies1,174,#8c564b
3,Cereal,Dairy1,528,#8c564b
4,Cereal,Freezer1,167,#8c564b
5,Cereal,Produce1,469,#8c564b
6,Chips,Cereal1,174,#d62728
7,Chips,Chips1,78,#d62728
8,Chips,Cookies1,90,#d62728
9,Chips,Dairy1,244,#d62728


In [19]:
layout = ipw.Layout(width="300", height="200")
def sankey(margin_top=10, **value):
    """Show SankeyWidget with default values for size and margins"""
    return sk.SankeyWidget(layout=layout,
                        margins=dict(top=margin_top, bottom=0, left=90, right=90),
                        **value)





In [20]:
'''links = [
    {'source': 'A', 'target': 'B', 'value': 1},
    {'source': 'B', 'target': 'C', 'value': 1},
    {'source': 'A', 'target': 'D', 'value': 1},
]'''
sankey(links=links)

SankeyWidget(layout=Layout(height='200', width='300'), links=[{'source': 'Cereal', 'target': 'Cereal1', 'value…

In [21]:
transparency = {"100":"FF",
                "99":"FC",
                "98":"FA",
                "97":"F7",
                "96":"F5",
                "95":"F2",
                "94":"F0",
                "93":"ED",
                "92":"EB",
                "91":"E8",
                "90":"E6",
                "89":"E3",
                "88":"E0",
                "87":"DE",
                "86":"DB",
                "85":"D9",
                "84":"D6",
                "83":"D4",
                "82":"D1",
                "81":"CF",
                "80":"CC",
                "79":"C9",
                "78":"C7",
                "77":"C4",
                "76":"C2",
                "75":"BF",
                "74":"BD",
                "73":"BA",
                "72":"B8",
                "71":"B5",
                "70":"B3",
                "69":"B0",
                "68":"AD",
                "67":"AB",
                "66":"A8",
                "65":"A6",
                "64":"A3",
                "63":"A1",
                "62":"9E",
                "61":"9C",
                "60":"99",
                "59":"96",
                "58":"94",
                "57":"91",
                "56":"8F",
                "55":"8C",
                "54":"8A",
                "53":"87",
                "52":"85",
                "51":"82",
                "50":"80",
                "49":"7D",
                "48":"7A",
                "47":"78",
                "46":"75",
                "45":"73",
                "44":"70",
                "43":"6E",
                "42":"6B",
                "41":"69",
                "40":"66",
                "39":"63",
                "38":"61",
                "37":"5E",
                "36":"5C",
                "35":"59",
                "34":"57",
                "33":"54",
                "32":"52",
                "31":"4F",
                "30":"4D",
                "29":"4A",
                "28":"47",
                "27":"45",
                "26":"42",
                "25":"40",
                "24":"3D",
                "23":"3B",
                "22":"38",
                "21":"36",
                "20":"33",
                "19":"30",
                "18":"2E",
                "17":"2B",
                "16":"29",
                "15":"26",
                "14":"24",
                "13":"21",
                "12":"1F",
                "11":"1C",
                "10":"1A",
                "9":"17",
                "8":"14",
                "7":"12",
                "6":"0F",
                "5":"0D",
                "4":"0A",
                "3":"08",
                "2":"05",
                "1":"03",
                "0":"00",
              }

In [12]:
raw_data

[{'timestamp': '2019-01-25 20:53:35',
  'customer': 'CUST-3',
  'location': 'Chips'},
 {'timestamp': '2019-01-25 20:41:12',
  'customer': 'CUST-3',
  'location': 'Chips'},
 {'timestamp': '2019-01-25 20:33:22',
  'customer': 'CUST-3',
  'location': 'Produce'},
 {'timestamp': '2019-05-03 12:30:59',
  'customer': 'CUST-30',
  'location': 'Produce'},
 {'timestamp': '2019-05-03 12:54:22',
  'customer': 'CUST-30',
  'location': 'Freezer'},
 {'timestamp': '2019-05-03 12:41:46',
  'customer': 'CUST-30',
  'location': 'Chips'},
 {'timestamp': '2019-05-03 12:51:48',
  'customer': 'CUST-30',
  'location': 'Cookies'},
 {'timestamp': '2019-06-22 09:17:33',
  'customer': 'CUST-19',
  'location': 'Chips'},
 {'timestamp': '2019-06-22 09:46:12',
  'customer': 'CUST-19',
  'location': 'Chips'},
 {'timestamp': '2019-06-22 09:52:57',
  'customer': 'CUST-19',
  'location': 'Cereal'},
 {'timestamp': '2019-06-22 09:30:51',
  'customer': 'CUST-19',
  'location': 'Dairy'},
 {'timestamp': '2019-04-27 11:54:36',