In [None]:
from pathlib import Path

import pandas as pd
import altair as alt
import numpy as np

import ideafast_deviceselection as ifds

source = Path(ifds.__file__).parent.parent.absolute() / 'local/DeviceSelectionScoring.xlsx'


df = pd.read_excel(
    f"{source}",
    index_col=0, header=[0,1], nrows=63,
    sheet_name='MASTER SCORES')
df.drop(df.columns[2:12],axis=1,inplace=True)
df.drop(df.columns[0:1],axis=1,inplace=True)
df.drop(df.index[0],axis=0,inplace=True)
df.drop(df.columns[30:],axis=1,inplace=True)

# extend empty headers to use the multiIndex
a = df.columns.get_level_values(0).to_series().mask(lambda x: x.str.startswith('Unnamed')).ffill()
a[0] = 'relevance'
b = df.columns.get_level_values(1)
df.columns = [a, b]

df.index.names = ['criteria']
df.columns.names = ['device','type']

df.drop('RELEVANCE * SCORE',axis=1,level=1,inplace=True)

df = df.reset_index()

df

In [None]:
categories = ["Data Quality, Reliability & Analytics", "Data Access, Transparency & Handling", "Accessibility, Usability & User Experience", "Regulatory Concerns", "Scalability & Practicality", "Track Record & Data Availability"]

long = pd.melt(df, id_vars=['criteria',('relevance','relevance score')])
long.insert(1,'group',[categories[int(x)-1] for x in long['criteria'].str[0]])
long.rename(columns=str).rename(columns={'(relevance,relevance score)':'relevance'})

renames = long.columns.to_list()
renames[2] = 'relevance'
long.columns = renames

long


In [None]:
# grouped = long.groupby(['group','device','type'])
# grouped

In [None]:
scored = long.loc[long['type'] == "SCORE"]

scored

In [None]:
from altair import datum

box = alt.Chart(scored).mark_boxplot().encode(
    x='device:O',
    y='value:Q',
)

dot = alt.Chart(scored).mark_point().encode(
    x='device:O',
    y='value:Q',
    color=alt.value('red'),
).transform_filter(
    (datum.relevance > 4) & (datum.value < .5)
)

(box + dot).facet(
    column='group'
)


In [None]:
# Experiment with violin plot..
# Result: thicker is more important; BUT - doesn't show all individual points (could be hidden)


alt.Chart(scored.loc[scored['group'] == "Accessibility, Usability & User Experience"]).transform_density(
    'value',
    as_=['value', 'relevance'],
    extent=[0, 1],
    groupby=['device']
).mark_area(orient='horizontal').encode(
    y='value:Q',
    x=alt.X(
        'relevance:Q',
        stack='center',
        impute=None,
        title=None,
        axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
    ),
    column=alt.Column(
        'device:N',
        header=alt.Header(
            titleOrient='bottom',
            labelOrient='bottom',
            labelPadding=0,
        ),
    )
).properties(
    width=75
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
)

In [None]:
pts = alt.selection(type="single", encodings=['x'])

rect = alt.Chart(scored).mark_rect().encode(
    alt.X('device:O'),
    alt.Y('value:Q', bin=True),
    alt.Color('count()',
        scale=alt.Scale(scheme='greenblue'),
        legend=alt.Legend(title='Total Scores')
    )
)

circ = rect.mark_circle().encode(
    alt.ColorValue('red'),
    alt.Size('count()',
        legend=alt.Legend(title='Stoppers present', tickMinStep=1),
    ),
).transform_filter(
    (datum.relevance > 4) & (datum.value < .5)
)

(rect + circ).facet(
    column='group'
)

# rect.facet(
#     column='group'
# )

