# Active shooter Sankey

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black
%matplotlib widget

In [2]:
import pandas as pd
from pySankey.sankey import sankey
import plotly.graph_objects as go
from floweaver import *

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

#### Read data

In [4]:
df = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/e/2PACX-1vRlTYYGmxOebAIVNoJGcK9B-gka167q-wgHHWPq5gcUvNg0LA0Bdewd9epwtbsxBa-C6b-1-UA2XnYy/pub?gid=0&single=true&output=csv"
)

In [5]:
police = df[df["police"] == "yes"]
no_police = df[df["police"] != "yes"]

In [6]:
df["source"] = "Active shooter"

In [7]:
refugees_gender = pd.DataFrame(columns=["source", "target", "value", "region", "type"])
refugees_gender["value"] = [
    1000000,
    200000,
    150000,
    70000,
    35000,
    300000,
    3000,
    1100000,
    220000,
    180000,
    75000,
    40000,
    330000,
    5000,
    1500000,
    530000,
    340000,
    105000,
    65000,
    670000,
    10000,
]
refugees_gender["source"] = "Syria"
refugees_gender["target"] = [
    "Turkey",
    "Lebanon",
    "Jordan",
    "Iraq",
    "Egypt",
    "Europe",
    "USA",
    "Turkey",
    "Lebanon",
    "Jordan",
    "Iraq",
    "Egypt",
    "Europe",
    "USA",
    "Turkey",
    "Lebanon",
    "Jordan",
    "Iraq",
    "Egypt",
    "Europe",
    "USA",
]
refugees_gender["region"] = [
    "Middle East",
    "Middle East",
    "Middle East",
    "Middle East",
    "Middle East",
    "Europe",
    "North America",
    "Middle East",
    "Middle East",
    "Middle East",
    "Middle East",
    "Middle East",
    "Europe",
    "North America",
    "Middle East",
    "Middle East",
    "Middle East",
    "Middle East",
    "Middle East",
    "Europe",
    "North America",
]
refugees_gender["type"] = [
    "men",
    "men",
    "men",
    "men",
    "men",
    "men",
    "men",
    "women",
    "women",
    "women",
    "women",
    "women",
    "women",
    "women",
    "children",
    "children",
    "children",
    "children",
    "children",
    "children",
    "children",
]

In [8]:
# create partition for type
gender = Partition.Simple("type", ["men", "women", "children"])

# set color palette
# palette = {'men': 'teal', 'women': 'purple', 'children': 'gray'}

# set the "nodes" - aka grouping spots. (Node names here aren't important)
nodes = {
    "start": ProcessGroup(["Syria"]),  # one (Syria) at the start
    "type": Waypoint(gender),
    "end": ProcessGroup(list(refugees_gender["target"])),  # 7 at the end
}

# set the order of the nodes left to right
ordering = [["start"], ["type"], ["end"]]

# set the "bundle" of connections you want to show
bundles = [Bundle("start", "end", waypoints=["type"])]

# partition the groups for display Partition.Simple(whatever you want, list of unique values)
nodes["start"].partition = Partition.Simple("source", ["Syria"])
nodes["end"].partition = Partition.Simple(
    "target", ["Turkey", "Lebanon", "Jordan", "Iraq", "Egypt", "Europe", "USA"]
)

# New sankey diagram with the flow_partition set for gender/type
sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=gender)

weave(sdd, refugees_gender).to_widget().auto_save_png("test.png")

SankeyWidget(groups=[{'id': 'start', 'type': 'process', 'title': '', 'nodes': ['start^Syria']}, {'id': 'type',…

In [9]:
nodes = {
    "start": ProcessGroup(["police"]),
    "end": ProcessGroup(list(df["outcome"].unique())),  # 7 at the end
}

In [10]:
# set the order of the nodes left to right
ordering = [["start"], ["end"]]

# set the "bundle" of connections you want to show
bundles = [Bundle("start", "end")]

In [11]:
nodes["start"].partition = Partition.Simple("source", ["Active shooter"])
nodes["end"].partition = Partition.Simple("target", df["outcome"].unique())