# This notebook accompanies a Medium blog post introducing users to floWeaver

In [1]:
# import necessary packages
import pandas as pd
from floweaver import *

## A Simple Sankey Diagram: Flow of Refugees from the Syrian Civil War

Note: make sure your values are positive!

In [36]:
# create data set - easiest with column names "source", "target", "value", then additional 
refugees = pd.DataFrame()
refugees['value'] = [3600000, 950000, 670000, 250000, 130000, 1300000, 18000]
refugees['source'] = 'Syria'
refugees['target'] = ['Turkey', 'Lebanon', 'Jordan', 'Iraq', 'Egypt', 'Europe', 'USA']
refugees['region'] = ['Middle East', 'Middle East', 'Middle East', 'Middle East', 'Middle East', 'Europe', 'North America']
refugees = refugees[['source', 'target', 'value', 'region']]
refugees.to_csv('refugees.csv')

In [89]:
refugees

Unnamed: 0,source,target,value,region
0,Syria,Turkey,3600000,Middle East
1,Syria,Lebanon,950000,Middle East
2,Syria,Jordan,670000,Middle East
3,Syria,Iraq,250000,Middle East
4,Syria,Egypt,130000,Middle East
5,Syria,Europe,1300000,Europe
6,Syria,USA,18000,North America


In [37]:
# set the "nodes" - aka grouping spots. (Node names here aren't important)
nodes = {
    'start': ProcessGroup(['Syria']), # one (Syria) at the start 
    'end': ProcessGroup(list(refugees['target'])), # 7 at the end
}

# set the order of the nodes left to right
ordering = [['start'], ['end']]

# set the "bundle" of connections you want to show
bundles = [Bundle('start', 'end')]

In [44]:
# partition the groups for display Partition.Simple(whatever you want, list of unique values)
nodes['start'].partition = Partition.Simple('source', ['Syria'])
nodes['end'].partition = Partition.Simple('target', ['Turkey', 'Lebanon', 'Jordan', 'Iraq', 'Egypt', 'Europe', 'USA'])

In [45]:
# create sankey
sdd = SankeyDefinition(nodes, bundles, ordering)

# display sankey and save as png
weave(sdd, refugees).to_widget().auto_save_png('./images/syria1.png')

SankeyWidget(groups=[{'id': 'start', 'type': 'process', 'title': '', 'nodes': ['start^Syria']}, {'id': 'end', …

## Bundling smaller contributors into "other"

In [40]:
# reset the partition
nodes['end'].partition = Partition.Simple('target', 
                                          ['Turkey', 
                                           'Lebanon', 
                                           'Jordan', 
                                           'Europe',
                                           ('other', ['Iraq', 'Egypt', 'USA'])])

In [41]:
sdd = SankeyDefinition(nodes, bundles, ordering)
weave(sdd, refugees).to_widget().auto_save_png('./images/syria_other.png')

SankeyWidget(groups=[{'id': 'start', 'type': 'process', 'title': '', 'nodes': ['start^Syria']}, {'id': 'end', …

## Visualizing/Partitioning on a different feature

Change the partition

In [51]:
nodes['end'].partition = Partition.Simple('region', 
                                          refugees['region'].unique())

In [52]:
sdd = SankeyDefinition(nodes, bundles, ordering)
weave(sdd, refugees).to_widget().auto_save_png('./images/syria_other.png')

SankeyWidget(groups=[{'id': 'start', 'type': 'process', 'title': '', 'nodes': ['start^Syria']}, {'id': 'end', …

## Showing multiple types of flows

In [54]:
# update the dataset to add categories (this gender/age data is completely fabricated)

refugees_gender = pd.DataFrame(columns = ['source', 'target', 'value', 'region', 'type'])
refugees_gender['value'] = [1000000, 200000, 150000, 70000, 35000, 300000, 3000,
                            1100000, 220000, 180000, 75000, 40000, 330000, 5000,
                            1500000, 530000, 340000, 105000, 65000, 670000, 10000]
refugees_gender['source'] = 'Syria'
refugees_gender['target'] = ['Turkey', 'Lebanon', 'Jordan', 'Iraq', 'Egypt', 'Europe', 'USA', 
                              'Turkey', 'Lebanon', 'Jordan', 'Iraq', 'Egypt', 'Europe', 'USA',
                              'Turkey', 'Lebanon', 'Jordan', 'Iraq', 'Egypt', 'Europe', 'USA']
refugees_gender['region'] = ['Middle East', 'Middle East', 'Middle East', 'Middle East', 'Middle East', 
                              'Europe', 'North America', 'Middle East', 'Middle East', 'Middle East', 
                              'Middle East', 'Middle East', 'Europe', 'North America', 'Middle East', 
                              'Middle East', 'Middle East', 'Middle East', 'Middle East', 'Europe', 
                              'North America']
refugees_gender['type'] = ['men', 'men', 'men', 'men', 'men', 'men', 'men', 'women', 'women',
                           'women', 'women', 'women', 'women', 'women', 'children', 'children',
                           'children', 'children', 'children', 'children', 'children']

In [90]:
refugees_gender

Unnamed: 0,source,target,value,region,type
0,Syria,Turkey,1000000,Middle East,men
1,Syria,Lebanon,200000,Middle East,men
2,Syria,Jordan,150000,Middle East,men
3,Syria,Iraq,70000,Middle East,men
4,Syria,Egypt,35000,Middle East,men
5,Syria,Europe,300000,Europe,men
6,Syria,USA,3000,North America,men
7,Syria,Turkey,1100000,Middle East,women
8,Syria,Lebanon,220000,Middle East,women
9,Syria,Jordan,180000,Middle East,women


In [67]:
# Set the default size to fit the documentation better.
size = dict(width=570, height=300)

# set the "nodes" - aka grouping spots. (Node names here aren't important)
nodes = {
    'start': ProcessGroup(['Syria']), # one (Syria) at the start 
    'end': ProcessGroup(list(refugees_gender['target'])), # 7 at the end
}

# set the order of the nodes left to right
ordering = [['start'], ['end']]

# set the "bundle" of connections you want to show
bundles = [Bundle('start', 'end')]

# partition the groups for display Partition.Simple(whatever you want, list of unique values)
nodes['start'].partition = Partition.Simple('source', ['Syria'])
nodes['end'].partition = Partition.Simple('target', ['Turkey', 'Lebanon', 'Jordan', 'Iraq', 'Egypt', 'Europe', 'USA'])

# extra partition for type
gender = Partition.Simple('type', ['men', 'women', 'children'])
palette = {'men': 'teal', 'women': 'purple', 'children': 'gray'}

# New sankey diagram with the flow_partition set for gender/type
sdd = SankeyDefinition(nodes, bundles, ordering, 
                       flow_partition=gender)

weave(sdd, refugees_gender, palette=palette).to_widget().auto_save_png('./images/syria_mwc.png')

SankeyWidget(groups=[{'id': 'start', 'type': 'process', 'title': '', 'nodes': ['start^Syria']}, {'id': 'end', …

## Adjusting features of the display

Adding a Legend - Not sure how :( manually?

Adjust fontsize?

In [66]:
# image size
size = dict(width=570, height=300)

weave(sdd, refugees_gender, palette=palette).to_widget(**size).auto_save_png('./images/syria_mwc_small.png')

SankeyWidget(groups=[{'id': 'start', 'type': 'process', 'title': '', 'nodes': ['start^Syria']}, {'id': 'end', …

## Adding a Waypoint

In [None]:
sankey.add(flows=[100, -50, -30, -20],
           labels=['household budget', 'necessities', 'fun', 
                   'saving'],
           orientations=[0, 0, 1, -1],
           trunklength = 10,
           edgecolor = '#027368',
           facecolor = '#027368')
sankey.add(flows=[50, -30, -10, -10], 
           labels=['','rent', 'groceries', 'other'],
           trunklength = 2,
           pathlengths = [3,3,3,3],
           orientations=[0, 1, 0, -1], 
           prior=0, #which sankey are you connecting to (0-indexed)
           connect=(1, 0), #flow number to connect: (prior, this)
           edgecolor = '#58A4B0',
           facecolor = '#58A4B0')

In [79]:
# Load new dataset
household = pd.read_csv('household.csv')
household

Unnamed: 0,source,target,value,category
0,partner 1,movies,5,fun
1,partner 1,restaurants,25,fun
2,partner 1,rent,30,necessities
3,partner 1,groceries,10,necessities
4,partner 1,other,10,necessities
5,partner 1,retirement,15,saving
6,partner 1,vacation,5,saving
7,partner 2,movies,10,fun
8,partner 2,restaurants,20,fun
9,partner 2,rent,30,necessities


In [88]:
# set the "nodes" - aka grouping spots. (Node names here aren't important)
nodes = {
    'start': ProcessGroup(list(household['source'])),
    'category': Waypoint(Partition.Simple('category', household['category'].unique())),
    'end': ProcessGroup(list(household['target'])),
}

# set the order of the nodes left to right
ordering = [['start'], 
            ['category'],
            ['end']]

# set the "bundle" of connections you want to show
bundles = [Bundle('start', 'end', waypoints=['category'])]

# add the partitions
nodes['start'].partition = Partition.Simple('source', ['partner 1', 'partner 2'])
nodes['end'].partition = Partition.Simple('target', household['target'].unique())
partner = Partition.Simple('source', household['source'].unique())
palette = {'partner 1': '#027368', 'partner 2': '#58A4B0'}

# create the sankey diagram
sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=partner)

# display the sankey diagram
weave(sdd, household, palette=palette).to_widget().auto_save_png('./images/household.png')

SankeyWidget(groups=[{'id': 'start', 'type': 'process', 'title': '', 'nodes': ['start^partner 1', 'start^partn…