In [1]:
import pandas as pd
import plotly.graph_objects as go

# display Plotly objects properly in Jupyter Lab
import plotly.io as pio
pio.renderers.default = "iframe"

In [24]:
def plot_sankey(filename, year1, year2):
    votes = pd.read_csv(filename)
    if "Others" not in votes.columns:
        votes["Others"] = 100 - votes.iloc[:,2:].sum(axis = 1)
    # calculate absolute percentages for each voter transition group
    # we do this because Plotly does not allow us to specify individual bar heights
    for i in range(2, len(votes.columns)):
        votes.iloc[:,i] *= votes["Total"] / 100.0
        
    stacked = votes.drop(["Total"], axis=1).set_index("Previous vote").stack().reset_index()
    stacked.columns = [f"vote_{year1}", f"vote_{year2}", "percentage"]
    stacked[f"vote_{year1}_factorized"] = pd.factorize(stacked[f"vote_{year1}"])[0]
    stacked[f"vote_{year2}_factorized"] = pd.factorize(stacked[f"vote_{year2}"])[0] + len(votes)
    
    fig = go.Figure(data=[go.Sankey(
        node = dict(
          label = list(stacked[f"vote_{year1}"].unique()) + list(stacked[f"vote_{year2}"].unique()),
            pad = 0
        ),
        link = dict(
          source = stacked[f"vote_{year1}_factorized"],
          target = stacked[f"vote_{year2}_factorized"],
          value = stacked["percentage"]
      ))])

    fig.update_layout(title_text=f"Voter transition between the Bulgarian parliametary elections of {year1} and {year2}", font_size=10)
    fig.show()

In [3]:
plot_sankey("2013-2014.csv", 2013, 2014)

In [4]:
plot_sankey("2014-2017.csv", 2014, 2017)

In [5]:
plot_sankey("2017-2021.csv", 2017, 2021)

In [25]:
# make sure to distinguish the two election periods
plot_sankey("2021-2021.csv", "2021-04", "2021-07")