In [None]:
#!pip install folium
import math
import time
from textwrap import wrap

import folium  # plot maps
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.graph_objs as go
import yaml
from branca.element import Figure, MacroElement, Template
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
from matplotlib.ticker import FormatStrFormatter
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from pytz import timezone


ModuleNotFoundError: No module named 'folium'

# Load credentials

In [None]:
df_processed = pd.read_csv("sankey_plot.csv")
print(df_processed.shape)
print(df_processed.columns)

# Sankey

Resources:
- official documentation: https://plotly.com/python/sankey-diagram/
- stackoverflow example: https://stackoverflow.com/questions/50486767/plotly-how-to-draw-a-sankey-diagram-from-a-dataframe

Questions from Phase 2 (miro board: https://miro.com/app/board/uXjVOERypao=/)

In [None]:
# find weight of links between nodes
print("q0 -> q1")
print(
    df_processed.groupby(
        ["ws_category", "ws_value"]
    )["ws_value"].count()
)
print("\n")
print("q1 -> q2")
print(
    df_processed.groupby(["ws_value", "ws_initiation"])[
        "ws_initiation"
    ].count()
)
print("\n")
print("q2 -> q3") 
print(
    df_processed.groupby(["ws_initiation", "ws_focus"])[
        "ws_initiation"
    ].count()
)


In [None]:
init_notebook_mode(connected=True)
# colors
grey = "rgba(211, 211, 211, 0.5)"
mustard = "#e0ad58"

In [None]:
# Nodes
nodes = [
    ["ID", "Label", "Color"],
    [0, "Collaboration", "#4994CE"],  # q0 ws_category
    [1, "Distraction", "#4994CE"],
    [2, "Wanted Socialization", "#4994CE"],
    [3, "Yes", "#8A5988"],  # q1 ws_value
    [4, "No", "#8A5988"],
    [5, "Yes", "#7FC241"],  # q2 ws_initiation
    [6, "No", "#7FC241"],
    [7, "Yes", "#e28743"],  # q3 ws_focus
    [8, "Yes", "#e28743"],
    [9, "Partially", "#e28743"],
    [10, "No", "#e28743"],
]

In [None]:
# Links
links = [
    ["Source", "Target", "Value", "Link Color"],
    # q0 -> q1
    # None
    # Collaboration
    [0, 3, 87, grey],  # Yes
    [0, 4, 10, grey],  # No
    # Distraction
    [1, 3, 23, grey],  # Yes
    [1, 4, 63, grey],  # No
    # Wanted Socialization
    [2, 3, 52, grey],  # Yes
    [2, 4, 28, grey],  # No
    # q1 -> q2
    # Yes
    [3, 5, 85, grey],  # Yes
    [3, 6, 77, grey],  # No
    # No
    [4, 5, 30, grey],  # Yes
    [4, 6, 71, grey],  # No
    # q2 -> q3
    # Yes
    [5, 8, 41, grey],  # Yes
    [5, 9, 16, grey],  # Partially
    [5, 10, 58, grey], # No    
    # No
    [6, 8, 42, grey],  # Yes
    [6, 9, 44, grey],  # Partially
    [6, 10, 62, grey], # No        
]

In [None]:
# plotting
nodes_headers = nodes.pop(0)
links_headers = links.pop(0)
df_nodes = pd.DataFrame(nodes, columns=nodes_headers)
df_links = pd.DataFrame(links, columns=links_headers)

# Sankey plot setup
data_trace = dict(
    type="sankey",
    domain=dict(x=[0, 1], y=[0, 1]),
    orientation="h",
    valueformat=".0f",
    node=dict(
        pad=10,
        # thickness = 30,
        line=dict(color="black", width=0),
        label=df_nodes["Label"].dropna(axis=0, how="any"),
        color=df_nodes["Color"],
    ),
    link=dict(
        source=df_links["Source"].dropna(axis=0, how="any"),
        target=df_links["Target"].dropna(axis=0, how="any"),
        value=df_links["Value"].dropna(axis=0, how="any"),
        color=df_links["Link Color"].dropna(axis=0, how="any"),
    ),
)

layout = dict(
    # height=772, 
    font=dict(size=20), 
    autosize=False,
    width=800,
    height=750,
)

fig = go.Figure(
    data=[data_trace],
    layout=layout,
)
fig.show()
fig.write_image("sankey.pdf")
