In [1]:
from plotlywaterfall.waterfall import Waterfall
import pandas as pd

# simple white background
import plotly.io as pio
pio.templates["custom"] = pio.templates["plotly_white"]
pio.templates["custom"].layout.autosize = False
pio.templates["custom"].layout.width = 1200
pio.templates["custom"].layout.height = 600
pio.templates.default = "custom"

# Simple Examples

Most simple plot: simply having some `X` and `Y` values to plot. Note that values with same `X` are summed:

In [2]:
df = pd.DataFrame({
    "X": ["Product Sales", "Product Sales", "Services Sales", "Product R&D", "Product R&D", "Services COGS"],
    "Y": [3, 4, 1, -1, -2, -0.5]
})
fig = Waterfall(df, x="X", y="Y").get_fig()
fig.update_yaxes(title="Cashflow [MUSD]")
fig

Now let's add the total sum:

In [3]:
df = pd.DataFrame({
    "X": ["Product Sales", "Product Sales", "Services Sales", "Product R&D", "Product R&D", "Services COGS"],
    "Y": [3, 4, 1, -1, -2, -0.5]
})
fig = Waterfall(df, x="X", y="Y", total=True).get_fig()
fig.update_yaxes(title="Cashflow [MUSD]")
fig

More useful is to add the category:

In [4]:
df = pd.DataFrame({
    "X": ["Product Sales", "Product Sales", "Services Sales", "Product R&D", "Product R&D", "Services COGS"],
    "Y": [3, 4, 1, -1, -2, -0.5],
    "category": ["Product A", "Product B", "Plugins", "Product A", "Product B", "Plugins"]
})
fig = Waterfall(df, x="X", y="Y", category="category", total=True).get_fig()
fig.update_yaxes(title="Cashflow [MUSD]")
fig

The total can also be negative, as long as the sign per `X` value is always the same. I.e. here *Product R&D* is both negative:

In [5]:
df = pd.DataFrame({
    "X": ["Product Sales", "Product Sales", "Licenses", "Product R&D", "Product R&D", "License Mgmt"],
    "Y": [3, 4, 1, -4, -5, -2],
    "category": ["Product A", "Product B", "Plugins", "Product A", "Product B", "Plugins"]
})
fig = Waterfall(df, x="X", y="Y", category="category", total=True).get_fig()
fig.update_yaxes(title="Cashflow [MUSD]")
fig

It is currently not implemented to have mixed signs. E.g. if we have mixed signs per `X`, it yields an exception

In [6]:
df = pd.DataFrame({
    "X": ["Product A", "Product B", "Plugins", "R&D", "Sales"],
    "Y": [3, 4, 1, -4, -3],
    "category": ["Product Sales", "Product Sales", "Licenses", "R&D", "Sales"]
})
try:
    Waterfall(df, x="X", y="Y", category="category", total="True").get_fig()
except NotImplementedError as exc:
    print(type(exc), exc)    


<class 'NotImplementedError'> Currently mixed signed categories in totals are not supported (yet)


## Grouping

One can also can create multiple groups. Grouping might be useful to compare data. Let's use in the following this dataframe: 

In [7]:
df = pd.DataFrame({
    "X": ["A", "B", "C"]*4 + ["D", "D" ], 
    "Y": [4, 1, 8, 7, 3, 2] + [i-1 for i in [4, 1, 8, 7, 3, 2]] + [8, 5],
    "category": ["one"]*3+["two"]*3 + ["one"]*3+["two"]*3 + ["two", "three"],
    "group": ["Group1"]*6 + ["Group2"]*6 + ["Group2", "Group3"]
})
df

Unnamed: 0,X,Y,category,group
0,A,4,one,Group1
1,B,1,one,Group1
2,C,8,one,Group1
3,A,7,two,Group1
4,B,3,two,Group1
5,C,2,two,Group1
6,A,3,one,Group2
7,B,0,one,Group2
8,C,7,one,Group2
9,A,6,two,Group2


In [8]:
c = Waterfall(df, x="X", y="Y", category="category", group="group", total=True)
fig = c.get_fig()
fig

## Coloring

One can easily change the coloring by specifying the *colors* keyword argument. It expects a *dict*. For eample if we want to define colors per group and category: 

In [9]:
colors = {
    "Group1": {"one": "red", "two": "blue"},
    "Group2": {"one": "salmon", "two": "lightskyblue"},
    "Group3": "green"
}

c = Waterfall(df, x="X", y="Y", category="category", colors=colors, group="group", total=True)
fig = c.get_fig()
fig

One can also leave out the *group* layer and directly specify the *category* colors:

In [10]:
colors = {
    "one": "red", "two": "blue", "three": "green"
}

c = Waterfall(df, x="X", y="Y", category="category", colors=colors, group="group", total=True, subtotals={"B": "Subtotal"})
fig = c.get_fig()
fig

# Subtotals

It is possible to define where subtotals should be calculated and added:

In [11]:
colors = {
    "Group1": {"one": "red", "two": "blue"},
    "Group2": {"one": "salmon", "two": "lightskyblue"},
    "Group3": "green"
}

c = Waterfall(df, x="X", y="Y", category="category", colors=colors, group="group", total=True, subtotals={"C": "Subtotal"})
fig = c.get_fig()
fig

# Caveats

Usually one can simply give only the colors for the *category* in the *colors* df. These colors will then be applied to all groups. 

This only works reliably if the labels for the *group* differ from the one for the *category*. Otherwise the group takes precedent:

In [12]:
df = pd.DataFrame({
    "X": ["A", "B", "C"]*4 + ["D", "D" ], 
    "Y": [4, 1, 8, 7, 3, 2] + [i-1 for i in [4, 1, 8, 7, 3, 2]] + [8, 5],
    "category": ["one"]*3+["two"]*3 + ["one"]*3+["two"]*3 + ["two", "three"],
    "group": ["one"]*6 + ["two"]*6 + ["two", "three"]
})

colors = {
    "one": "red", "two": "blue", "three": "green"
}

c = Waterfall(df, x="X", y="Y", category="category", colors=colors, group="group", total=True, subtotals={"C": "Subtotal"})
fig = c.get_fig()
fig

If it is absolutely necessary to use the same label texts, than one has to specify it explicitly

In [13]:
colors = {
    "one": {"one": "red", "two": "blue"},
    "two": {"one": "salmon", "two": "lightskyblue"},
    "three": "green"
}


c = Waterfall(df, x="X", y="Y", category="category", colors=colors, group="group", total=True, subtotals={"C": "Subtotal"})
fig = c.get_fig()
fig