In [1]:
import pandas as pd
import numpy as np
import eco_style
import altair as alt
import pycountry

alt.themes.enable('light')

ThemeRegistry.enable('light')

# Where has national service

In [2]:
# let's fetch from wikipedia, filter for Europe and manually verify
dfs = pd.read_html("https://en.wikipedia.org/wiki/Conscription")

In [3]:
df = dfs[2].copy()
# remove all (bracketed) bits in the Country column
df['Country'] = df['Country'].str.replace(r'\[.*?\]|\(.*?\)', '', regex=True)
def get_iso3(country):
    return 

# Timeline Chart

In [5]:
df = pd.read_csv("eur_conscription.csv")
df = df[['iso3', 'started_year', 'ended_year']]
df.columns = ['iso3', 'start', 'end']
df = df.query('end > start')

exclude = [
    "FRA" # France suspended conscription in 1997 but only officially ended it in 2001 but let's exclude it because it's a bit of a mess and lots of countries have suspended it

]

def get_name(x):
    try:
        res = pycountry.countries.get(alpha_3=x)
        # if it has a common_name, use that
        try:
            return res.common_name
        except AttributeError:
            return res.name
    except LookupError:
        return None


df['name'] = df['iso3'].apply(lambda x: get_name(x))

df['start'] = pd.to_datetime(df['start'], format='%Y')
df['end'] = pd.to_datetime(df['end'], format='%Y')

df = df.sort_values('end')
df['rank'] = np.arange(len(df))

df['start_label'] = 'Start'
df['end_label'] = 'End'

base = alt.Chart(df).encode(
    y=alt.X('name:N', sort="-x", 
            axis=alt.Axis(domain=False,
                          labelFontSize=13,
                           labelColor="#1d1d1d"),
            title=None),
    x=alt.X('start:T',
            axis=alt.Axis(
                labelFontSize=13,
            ),
            scale=alt.Scale(zero=False,
                            domain=(pd.Timestamp('2000'),
                                    pd.Timestamp('2024-12-31')),
                                    clamp=True),
                            ),

    x2='end:T'

)

rules = base.mark_rule()

# add filled start and end points
start_points = base.transform_filter(
    'year(datum.start) >1999').mark_circle(
        size=75,
        opacity=1,
        color="#36b7b4")
end_points = base.transform_filter('year(datum.end) <2025').transform_calculate(
    start='datum.end' # this is a hack - if we use the end in the encoding, it'll break the ordering
).mark_circle(color="#e6224b", size=75, opacity=1)

# text on Sweden to annotate the start and end points
swe_start_text = base.transform_filter('datum.iso3 == "SWE" && year(datum.start) > 2005').mark_text(
    align='right',
    baseline='middle',
    dx=-8,
    dy=0,
    fontSize=13,
    color="#36b7b4",
    text='Start'
).encode(
    text=alt.Text('start_label')
)

swe_end_text = base.transform_filter('datum.iso3 == "SWE" && year(datum.end) < 2020').transform_calculate(
    start='datum.end' # same hack as above
).mark_text(
        align='left',
    baseline='middle',
    dx=8,
    dy=0,
    fontSize=13,
    color="#e6224b",
    text='End'
).encode(
    text=alt.Text('end_label')
)
chart = rules + start_points+end_points + swe_start_text + swe_end_text

chart = chart.properties(
    height=600,
    width=400,
    title = alt.TitleParams(
        text="National Service in the 21st Century",
        subtitle=["European countries with national service since 2000", "Various Sources", ""],
        fontSize=16,
        subtitleFontSize=12,
        anchor="start",
        dx=140
    )
)

chart.save("timeline.json")


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# Neet

In [8]:
dfs = pd.read_html("oecd_neet.html")
df = dfs[0]

df.columns = [c.replace("▾", "").strip() for c in df.columns]
df = df.melt(id_vars=['Location'], var_name='date', value_name='value')
df = df.rename(columns={'Location': 'country'})

iso3s = {}
for country in df['country'].unique():
    try:
        result = pycountry.countries.search_fuzzy(country)
        iso3 = result[0].alpha_3
        iso3s[country] = iso3
    except LookupError:
        print(f"Could not find {country}")
        iso3s[country] = None

df['iso3'] = df['country'].apply(lambda x: iso3s[x])

df = df.dropna(subset=['value'])
OECD_iso3 = [  "AUS",  "AUT",  "BEL",  "CAN",  "CHL",  "COL",  "CRI",  "CZE",  "DNK",  "EST",  "FIN",  "FRA",  "DEU",  "GRC",  "HUN",  "ISL",  "IRL",  "ISR",  "ITA",  "JPN",  "KOR",  "LVA",  "LTU",  "LUX",  "MEX",  "NLD",  "NZL",  "NOR",  "POL",  "PRT",  "SVK",  "SVN",  "ESP",  "SWE",  "CHE",  "TUR",  "GBR",  "USA"]

df['value'] = df['value']/100

df = df.query('iso3 in @OECD_iso3')
# get the low and high 10pctiles
# corp_df['low'] = corp_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0.1))
df['low'] = df.groupby(by=["date"])['value'].transform(lambda x: x.quantile(0.1))
df['high'] = df.groupby(by=["date"])['value'].transform(lambda x: x.quantile(0.9))
df['median'] = df.groupby(by=["date"])['value'].transform('median')
df['rank'] = df.groupby(by=["date"])['value'].rank(ascending=False)

df

#      "category" : ["#36B7B4","#E6224B","#F4C245","#0063AF","#00A767","#179FDB","#EB5C2E"],
highlight_countries = {
    "USA": "#36B7B4",
    "GBR": "#E6224B",
    "FRA": "#F4C245",
    "DEU": "#0063AF",
    "other": "rgba(0,0,0,0.1)"
}

df['color'] = df['iso3'].apply(lambda x: highlight_countries.get(x, highlight_countries['other']))
# add a label to the latest value for each country
df['label'] = df.apply(lambda x: x['country'] if x['date'] == '2022' and x['iso3'] in highlight_countries else '', axis=1)
df['highlighted'] = df['iso3'].apply(lambda x: x in highlight_countries)

df = df.sort_values('date')
df['date'] = pd.to_numeric(df['date'])

df = df.query('iso3 in @highlight_countries.keys() and date > 1999') 

df['date'] = pd.to_datetime(df['date'], format='%Y')

base = alt.Chart(df).encode(
    x=alt.X('date:T', axis=alt.Axis(), scale=alt.Scale()),
    y=alt.Y('value:Q', axis=alt.Axis(title=None, format="%")),
    color=alt.Color('color:N', scale=None),
    detail='country:N',
)

lines = base.mark_line().encode(
    size=alt.condition(alt.datum.iso3 == 'GBR', alt.value(2), alt.value(1))
)

end_labels = base.transform_filter('datum.highlighted').mark_text(
    align='left',
    baseline='middle',
    dx=5,
    dy=0,
    fontSize=alt.expr("datum.iso3 == 'GBR' ? 13 : 11"),
).encode(
    text=alt.Text('label'),
)

# shade the end area
shade = base.mark_area(opacity=1, color="rgba(24, 42, 56, 1)").encode(
    x=alt.X('date:T'),
    y=alt.Y('low:Q'),
    y2=alt.Y2('high:Q'),
    color=alt.value("rgb(232, 232, 232)")
)

chart = shade + lines + end_labels

chart = chart.properties(
    width=400,
    height=300,
    title=alt.TitleParams(
        text="NEET Rates in OECD Countries",
        subtitle=["15-19 year-olds not in education, employment or training", "OECD coutries, 10th-90th percentiles shaded", ""],
        anchor="start",
        fontSize=16,
        color="#808080",
        subtitleColor="#676A86",
        subtitleFontSize=12
    )
)

chart.save("neet.json")


Could not find OECD - Average
Could not find Türkiye


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
