<a href="https://colab.research.google.com/github/aleks-haksly/VIZRO/blob/main/demos/butterfly.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install vizro -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.2/79.2 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m857.0/857.0 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.9/3.9 MB[0m [31m41.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m500.8/500.8 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m222.5/222.5 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from sqlalchemy import text
from sqlalchemy import create_engine
import pandas as pd
from google.colab import userdata


engine = create_engine(userdata.get("supabase"), client_encoding='utf8', )

def select(sql):
    sql = text(sql)
    return pd.read_sql(sql, engine)

In [None]:
sql = """
SELECT date, date_part('hour', ts)::int as hour, platform, query
FROM vizro.yandex_data
"""
query_df = select(sql)

In [None]:
def butterfly_transform_data(df: pd.DataFrame) -> pd.DataFrame:
  query_df = df.copy()
  query_df['date'] = pd.to_datetime(query_df['date'],  format='%y-%m-%d')
  total = query_df.groupby("platform")["date"].count().to_dict()
  filtered = query_df[query_df['query'].isin(set(query_df.groupby(['platform', 'query'], as_index=False)['date'].count().sort_values(by='date',ascending=False).groupby('platform', as_index=False).head(10)["query"]))]
  filtered_group = filtered.groupby(['platform', 'query'])["date"].count().unstack(level=0)
  filtered_group.reset_index(inplace=True)
  for k, v in total.items():
    filtered_group[f'{k}_%'] = filtered_group[k] / v
  return filtered_group

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import vizro.models as vm
from vizro import Vizro
from vizro.models.types import capture


@capture("graph")
def butterfly(data_frame: pd.DataFrame, **kwargs) -> go.Figure:
    fig = px.bar(data_frame, **kwargs)

    orientation = fig.data[0].orientation
    x_or_y = "x" if orientation == "h" else "y"

    fig.update_traces({f"{x_or_y}axis": f"{x_or_y}2"}, selector=1)
    fig.update_layout({f"{x_or_y}axis2": fig.layout[f"{x_or_y}axis"]})
    fig.update_layout(
        {
            f"{x_or_y}axis": {"autorange": "reversed", "domain": [0, 0.5]},
            f"{x_or_y}axis2": {"domain": [0.5, 1]},
        }
    )

    if orientation == "h":
        fig.add_vline(x=0, line_width=2, line_color="grey")
    else:
        fig.add_hline(y=0, line_width=2, line_color="grey")
    fig.data[0].hovertemplate = '<b>%{hovertext}</b><br>% of all=%{x:.2%}<br>qty=%{customdata[0]}<extra></extra>'
    fig.data[0].name = 'desktop'
    fig.data[1].hovertemplate = '<b>%{hovertext}</b><br>% of all=%{x:.2%}<br>qty=%{customdata[1]}<extra></extra>'
    fig.data[1].name = 'touch'
    fig.update_yaxes(categoryorder='min ascending')

    return fig



fig = butterfly(
    butterfly_transform_data(query_df),
    x=["desktop_%", "touch_%"],
    y="query",
    labels={"value": "% of all", "variable": "platform:"},
    hover_name="query", hover_data={'query': False, 'desktop': True, 'touch': True },
)

page = vm.Page(title="My page", components=[vm.Graph(figure=fig)])
dashboard = vm.Dashboard(pages=[page])
Vizro().build(dashboard).run()

<IPython.core.display.Javascript object>