In [None]:
import pandas as pd
import altair as alt
# alt.data_transformers.enable("vegafusion")
# !pip install -U altair_viewer

In [None]:
# Define custom theme to be applied to all plots
def theme():
    return {
        "config": {
            "title": {
                "dy": 1,
                "fontSize": 24,
                "fontWeight": 400,
                "align": "center",
                "anchor": "middle",
                "subtitleColor": "grey",
                "subtitleFontSize": 18
            },
            "view": {
                "fill": "#FCFCFC",
            },
            "header": {
                "titleFontSize": 18,
                "labelFontSize": 18,
                "labelFontWeight": 300,
            },
            "axis": {
                "titleFontSize": 18,
                "titleFontWeight": 400,
                "labelFontSize": 18,
                "labelFontWeight": 400,
                "labelLimit": 1000,
                "domainWidth": 1.5,
                "domainColor": "black",
                "tickColor": "white",
                "domain": False,
                "zindex": 1
            },
            # "axisY": {
            #     "domain": False,
            #     "zindex": 100
            # },
            "legend": {
                "titleFontSize": 18,
                "titleFontWeight": 400,
                "labelFontSize": 18,
                "labelLimit": 1000,
                "strokeColor": '#F4F6F7',
                "padding": 15
            }
        }
    }

alt.themes.register("theme", theme)
alt.themes.enable("theme")

In [None]:
"""
The name of the folder you created under `../data/`
"""
TIME_STAMP_FOLDER_NAME = '08-01-2024'

In [None]:
COLORS = {
    'Data Portals': '#56B4E9',
    'Journal Websites': '#CC79A7',
    'government': '#009E73'
}

In [None]:
df_pages = pd.read_csv(f'../data/{TIME_STAMP_FOLDER_NAME}/results/reports-by-page.csv')

In [None]:
df_pages.resource_category = df_pages.resource_category.apply(lambda x: 'Data Portals' if x == 'data-portal' else 'Journal Websites' if x == 'journal' else x)

df = df_pages.groupby('resource_category').agg({'website_id': 'count'}).reset_index()
df.rename(columns={'website_id': 'count'}, inplace=True)

In [None]:
base = alt.Chart(
    df[df.resource_category != 'government']
).mark_bar(size=80, cornerRadiusEnd=6).encode(
    alt.X('count', title='The Number of Pages Evaluated').axis(tickCount=5, orient='bottom'),
    alt.Y('resource_category:N', title=None).axis(None),
    alt.Color('resource_category:N', legend=None).scale(domain=list(COLORS.keys()), range=list(COLORS.values())),
    tooltip=[
        alt.Tooltip('resource_category:N', title='Resource Category'),
        alt.Tooltip('count', format=',', title='Number of Pages Evaluated'),
    ]
)

text = base.mark_text(
    font='SF Pro Text',
    align='right',
    dx=-10,
    size=28,
    fontWeight=600
).encode(
    alt.Text('count', format=','),
    color=alt.value('white')
)

text_resource = text.mark_text(dy=-60, dx=4, size=24, align='left').encode(
    alt.Text('resource_category:N'),
    color=alt.value('black'),
    x=alt.value(0),
)

plot = (base + text + text_resource).configure_axis(
    zindex=1
).properties(    
    # width='container',
    # height='container'
    width=500,
    height=500,
)
# .properties(
# )
# .properties(
#     title={
#         'text': 'The Number of Pages Evaluated',
#         'dy': -10
#     }
# )

"""
Create a `vega-lite` folder first!
"""
plot.save(f'../data/{TIME_STAMP_FOLDER_NAME}/website/statistics-num-pages.json')
plot