<a href="https://colab.research.google.com/github/aleks-haksly/VIZRO/blob/main/demos/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sqlalchemy import text
from sqlalchemy import create_engine
import pandas as pd
from google.colab import userdata


engine = create_engine(userdata.get("supabase"), client_encoding='utf8', )

def select(sql):
    sql = text(sql)
    return pd.read_sql(sql, engine)

In [2]:
SQL = """
SELECT
    platform,
	query,
	count(*) AS cnt,
	sum(count(query)) over(PARTITION BY platform) AS platform_total,
	sum(count(query)) over(PARTITION BY query) AS query_count_total

FROM
	vizro.yandex_data yd
GROUP BY
	platform,
	query
"""

df = select(SQL)

In [3]:
min_query_cnt = 50
df = df.query("query_count_total >= @min_query_cnt")

In [4]:
df_pivoted = df.pivot(index='query', columns=["platform"], values=["cnt", "platform_total"],).reset_index()
df_pivoted.columns = ["_".join(a).rstrip('_') for a in df_pivoted.columns.to_flat_index()]

In [5]:
df_pivoted

Unnamed: 0,query,cnt_desktop,cnt_touch,platform_total_desktop,platform_total_touch
0,02 аниме,34.0,30.0,411566.0,792258.0
1,1 сентября,277.0,997.0,411566.0,792258.0
2,1 сентября 2021,19.0,71.0,411566.0,792258.0
3,1 сентября день знаний,69.0,137.0,411566.0,792258.0
4,1 сентября картинки,223.0,653.0,411566.0,792258.0
...,...,...,...,...,...
5083,ячмень на глазу лечение,12.0,56.0,411566.0,792258.0
5084,яэ мико,48.0,43.0,411566.0,792258.0
5085,ёжики,107.0,123.0,411566.0,792258.0
5086,سكس,6.0,77.0,411566.0,792258.0


In [6]:
df_pivoted.fillna(value={'cnt_touch':0, 'cnt_desktop':0, 'platform_total_desktop'	: df_pivoted['platform_total_desktop'].max(), 'platform_total_touch':df_pivoted['platform_total_touch'].max()}, inplace=True)

In [7]:
df_pivoted

Unnamed: 0,query,cnt_desktop,cnt_touch,platform_total_desktop,platform_total_touch
0,02 аниме,34.0,30.0,411566.0,792258.0
1,1 сентября,277.0,997.0,411566.0,792258.0
2,1 сентября 2021,19.0,71.0,411566.0,792258.0
3,1 сентября день знаний,69.0,137.0,411566.0,792258.0
4,1 сентября картинки,223.0,653.0,411566.0,792258.0
...,...,...,...,...,...
5083,ячмень на глазу лечение,12.0,56.0,411566.0,792258.0
5084,яэ мико,48.0,43.0,411566.0,792258.0
5085,ёжики,107.0,123.0,411566.0,792258.0
5086,سكس,6.0,77.0,411566.0,792258.0


In [8]:
from statsmodels.stats.proportion import proportions_chisquare

In [9]:
from statsmodels.stats.proportion import proportions_chisquare

def proportions_chi2(df: pd.DataFrame):

  _, pval, _ = proportions_chisquare(
  count=[df['cnt_desktop'], df['cnt_touch']],
  nobs=[df['platform_total_desktop'], df['platform_total_touch']])

  return pval

In [10]:
df_pivoted["pval"] = df_pivoted.apply(lambda x: proportions_chi2(x), axis=1)

In [12]:
df_pivoted['pct_desktop'] = df_pivoted['cnt_desktop'] / df_pivoted['platform_total_desktop']
df_pivoted['pct_touch'] = df_pivoted['cnt_touch'] / df_pivoted['platform_total_touch']

In [13]:
!pip install vizro -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.2/79.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m857.0/857.0 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m73.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.9/3.9 MB[0m [31m61.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m500.8/500.8 kB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m58.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m222.5/222.5 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [14]:
import vizro.models as vm
import vizro.plotly.express as px
from vizro import Vizro
from vizro.tables import dash_ag_grid

df = df_pivoted.reset_index()

cellStyle = {
    "styleConditions": [
        {
            "condition": "params.value < 0.5",
            "style": {"backgroundColor": "#89ff22"},
        }
    ]
}

columnDefs = [
    {"field": "query"},
    {"field": "cnt_touch", "valueFormatter": {"function": "d3.format(',.0f')(params.value)"}},
    {"field": "pct_touch", "valueFormatter": {"function": "d3.format(',.3%')(params.value)"}},
    {"field": "cnt_desktop", "valueFormatter": {"function": "d3.format(',.0f')(params.value)"}},
    {"field": "pct_desktop", "valueFormatter": {"function": "d3.format(',.3%')(params.value)"}},
    {"field": "pval", "valueFormatter": {"function": "d3.format(',.3f')(params.value)",  "cellStyle": cellStyle, "cellDataType": "numeric"}},
]

page = vm.Page(
    title="Statisitical significencs in querries qty",
    components=[
        vm.AgGrid(
            title="Modified Dash AG Grid",
            figure=dash_ag_grid(
                data_frame=df,
                columnDefs=columnDefs,
                defaultColDef={"resizable": False, "filter": False, "editable": False},
                dashGridOptions={"pagination": True}
            ),
        )
    ],
)

dashboard = vm.Dashboard(pages=[page])

Vizro().build(dashboard).run()

<IPython.core.display.Javascript object>