In [13]:
import pandas as pd
import numpy as np
import eco_style
import altair as alt
import pycountry
import requests

alt.themes.enable('light')

ThemeRegistry.enable('light')

# Where has national service

In [298]:
# let's fetch from wikipedia, filter for Europe and manually verify
dfs = pd.read_html("https://en.wikipedia.org/wiki/Conscription")

In [299]:
df = dfs[2].copy()
# remove all (bracketed) bits in the Country column
df['Country'] = df['Country'].str.replace(r'\[.*?\]|\(.*?\)', '', regex=True)

# get the iso3s and numeric ids
def get_country_iso3(country):
    try:
        result = pycountry.countries.search_fuzzy(country)
        iso3 = result[0].alpha_3
        numeric = result[0].numeric
        iso2 = result[0].alpha_2
        return iso3, numeric, iso2
    except LookupError:
        print(f"Could not find {country}")
        return None, None, None
    
df['iso3_numeric'] = df['Country'].apply(get_country_iso3)
df['iso3'] = df['iso3_numeric'].apply(lambda x: x[0])
df['numeric'] = df['iso3_numeric'].apply(lambda x: x[1])
df['id'] = df['iso3_numeric'].apply(lambda x: x[2])

In [300]:
req = requests.get("https://raw.githubusercontent.com/jhellingsdata/RADataHub/main/e4e/ch8_Trends/data/chart8_x_europe.topojson")
geo = req.json()
eur_countries = geo['objects']['europe']['geometries']
eur_iso2s = [country['id'] for country in eur_countries]


In [301]:
# filter the df for Europe
df = df[df['id'].isin(eur_iso2s)]
eur_df = df.copy()

In [302]:
df.columns

Index(['Country', 'Conscription[112]', 'Conscription sex', 'iso3_numeric',
       'iso3', 'numeric', 'id'],
      dtype='object')

In [303]:
df = eur_df.copy().query("iso3 != 'ISR'")
df = df.rename(columns={'Conscription[112]': 'Conscription'})
# all the Conscriptions start with No or Yes
df['value'] = df['Conscription'].apply(lambda x: x.split()[0])
df['value'] = df['value'].apply(lambda x: 'Yes' in x)
df = df[['Country', 'value', 'iso3', 'numeric', 'id', 'Conscription']]

manual_additions = [
    {
        "Country": "Belarus",
        "value": True,
        "iso3": "BLR",
        "numeric": 112,
        "id": "BY",
    },
    {
        "Country" : "Montenegro",
        "value": False,
        "iso3": "MNE",
        "numeric": 499,
        "id": "ME",
    }
]


df = pd.concat([df, pd.DataFrame(manual_additions)], ignore_index=True)

df[['iso3', 'id', 'value']].to_csv("conscription.csv", index=False)

In [259]:
len(df.query("value == True"))

18

In [304]:
df

Unnamed: 0,Country,value,iso3,numeric,id,Conscription
0,Albania,False,ALB,8,AL,No (abolished in 2010)[113]
1,Armenia,True,ARM,51,AM,Yes
2,Austria,True,AUT,40,AT,Yes (alternative service available)[116]
3,Azerbaijan,True,AZE,31,AZ,Yes
4,Belgium,False,BEL,56,BE,No (suspended in 1992; service not required of...
5,Bosnia and Herzegovina,False,BIH,70,BA,No (abolished on 1 January 2006)[120]
6,Bulgaria,False,BGR,100,BG,No (abolished by law on 1 January 2008)[123]
7,Croatia,False,HRV,191,HR,No (abolished by law in 2008)[128]
8,Cyprus,True,CYP,196,CY,Yes (alternative service available)
9,Czech Republic,False,CZE,203,CZ,No (abolished in 2005)[129]


# Europe Timeline

In [279]:
df = pd.read_csv("eur_conscription.csv")
df = df[['iso3', 'started_year', 'ended_year']]
df.columns = ['iso3', 'start', 'end']
df = df.query('end > start')

exclude = [
    "FRA" # France suspended conscription in 1997 but only officially ended it in 2001 but let's exclude it because it's a bit of a mess and lots of countries have suspended it

]

def get_name(x):
    if x == 'RUS':
        return "Russia"
    if x == 'TUR':
        return "Türkiye"
    try:
        res = pycountry.countries.get(alpha_3=x)
        # if it has a common_name, use that
        try:
            return res.common_name
        except AttributeError:
            return res.name
    except LookupError:
        return None


df['name'] = df['iso3'].apply(lambda x: get_name(x))

df['start'] = pd.to_datetime(df['start'], format='%Y')
df['end'] = pd.to_datetime(df['end'], format='%Y')

df = df.sort_values('end')
df['rank'] = np.arange(len(df))

df['start_label'] = 'Start'
df['end_label'] = 'End'

base = alt.Chart(df).encode(
    y=alt.X('name:N', sort="-x", 
            axis=alt.Axis(domain=False,
                          labelFontSize=13,
                           labelColor="#1d1d1d"),
            title=None),
    x=alt.X('start:T',
            axis=alt.Axis(
                labelFontSize=13,
            ),
            scale=alt.Scale(zero=False,
                            domain=(pd.Timestamp('2000'),
                                    pd.Timestamp('2024-12-31')),
                                    clamp=True),
                            ),

    x2='end:T'

)

rules = base.mark_rule()

# add filled start and end points
start_points = base.transform_filter(
    'year(datum.start) >1999').mark_circle(
        size=75,
        opacity=1,
        color="#36b7b4")
end_points = base.transform_filter('year(datum.end) <2025').transform_calculate(
    start='datum.end' # this is a hack - if we use the end in the encoding, it'll break the ordering
).mark_circle(color="#e6224b", size=75, opacity=1)

# text on Sweden to annotate the start and end points
swe_start_text = base.transform_filter('datum.iso3 == "SWE" && year(datum.start) > 2005').mark_text(
    align='right',
    baseline='middle',
    dx=-8,
    dy=0,
    fontSize=13,
    color="#36b7b4",
    text='Start'
).encode(
    text=alt.Text('start_label')
)

swe_end_text = base.transform_filter('datum.iso3 == "SWE" && year(datum.end) < 2020').transform_calculate(
    start='datum.end' # same hack as above
).mark_text(
        align='left',
    baseline='middle',
    dx=8,
    dy=0,
    fontSize=13,
    color="#e6224b",
    text='End'
).encode(
    text=alt.Text('end_label')
)
chart = rules + start_points+end_points + swe_start_text + swe_end_text

chart = chart.properties(
    height=600,
    width=400,
    title = alt.TitleParams(
        text="National Service in the 21st Century",
        subtitle=["European countries with national service since 2000", "Various sources", ""],
        fontSize=16,
        subtitleFontSize=12,
        anchor="start",
        dx=140
    )
)

chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [278]:
df

Unnamed: 0,iso3,start,end,name,rank,start_label,end_label
31,ESP,1999-01-01,2001-01-01,Spain,0,Start,End
13,FRA,1999-01-01,2001-01-01,France,1,Start,End
26,PRT,1999-01-01,2004-01-01,Portugal,2,Start,End
16,HUN,1999-01-01,2004-01-01,Hungary,3,Start,End
9,CZE,1999-01-01,2005-01-01,Czechia,4,Start,End
17,ITA,1999-01-01,2005-01-01,Italy,5,Start,End
39,MNE,1999-01-01,2006-01-01,Montenegro,6,Start,End
30,SVK,1999-01-01,2006-01-01,Slovakia,7,Start,End
5,BIH,1999-01-01,2006-01-01,Bosnia and Herzegovina,8,Start,End
23,MKD,1999-01-01,2006-01-01,North Macedonia,9,Start,End


In [280]:
chart.save("conscription_chart.png", scale_factor=3.0)

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
WARN x2 dropped as it is incompatible with "circle".
WARN x2 dropped as it is incompatible with "circle".
WARN x2 dropped as it is incompatible with "text".
WARN x2 dropped as it is incompatible with "text".
WARN Conflicting scale property "domain" ([{"signal":"{data: datetime(\"2000-01-01T00:00:00\")}"},{"signal":"{data: datetime(\"2024-12-31T00:00:00\")}"}] and [["2000-01-01T00:00:00","2024-12-31T00:00:00"]]). Using the union of the two domains.
WARN Conflicting scale property "domains" ([{"signal":"{data: datetime(\"2000-01-01T00:00:00\")}"},{"signal":"{data: datetime(\"2024-12-31T00:00:00\")}"},["2000-01-01T00:00:00","2024-12-31T00:00:00"]] and [{"signal":"{data: datetime(\"2000-01-01T00:00:00\")}"},{"signal":"{data: datetime(\"2024-12-31T00:00:00\")}"}]). Using the union of the two doma

In [274]:
len(df.query("end < 2010"))

15

In [273]:
df.query("end > 2010 and end < 2025")

Unnamed: 0,iso3,start,end,name,rank,start_label,end_label
14,DEU,1999-01-01,2011-01-01,Germany,17,Start,End
29,SRB,1999-01-01,2011-01-01,Serbia,18,Start,End
36,UKR,1999-01-01,2013-01-01,Ukraine,19,Start,End


In [271]:
df.query("end < 2010")

Unnamed: 0,iso3,start,end,name,rank,start_label,end_label
31,ESP,1999-01-01,2001-01-01,Spain,0,Start,End
13,FRA,1999-01-01,2001-01-01,France,1,Start,End
26,PRT,1999-01-01,2004-01-01,Portugal,2,Start,End
16,HUN,1999-01-01,2004-01-01,Hungary,3,Start,End
9,CZE,1999-01-01,2005-01-01,Czechia,4,Start,End
17,ITA,1999-01-01,2005-01-01,Italy,5,Start,End
39,MNE,1999-01-01,2006-01-01,Montenegro,6,Start,End
30,SVK,1999-01-01,2006-01-01,Slovakia,7,Start,End
5,BIH,1999-01-01,2006-01-01,Bosnia and Herzegovina,8,Start,End
23,MKD,1999-01-01,2006-01-01,North Macedonia,9,Start,End


In [268]:
df

Unnamed: 0,iso3,start,end,name,rank,start_label,end_label
31,ESP,1999-01-01,2001-01-01,Spain,0,Start,End
13,FRA,1999-01-01,2001-01-01,France,1,Start,End
26,PRT,1999-01-01,2004-01-01,Portugal,2,Start,End
16,HUN,1999-01-01,2004-01-01,Hungary,3,Start,End
9,CZE,1999-01-01,2005-01-01,Czechia,4,Start,End
17,ITA,1999-01-01,2005-01-01,Italy,5,Start,End
39,MNE,1999-01-01,2006-01-01,Montenegro,6,Start,End
30,SVK,1999-01-01,2006-01-01,Slovakia,7,Start,End
5,BIH,1999-01-01,2006-01-01,Bosnia and Herzegovina,8,Start,End
23,MKD,1999-01-01,2006-01-01,North Macedonia,9,Start,End


# NEET Young People by Country

Could not find OECD - Average
Could not find Türkiye


In [296]:
dfs = pd.read_html("oecd_neet.html")
df = dfs[0]

df.columns = [c.replace("▾", "").strip() for c in df.columns]
df = df.melt(id_vars=['Location'], var_name='date', value_name='value')
df = df.rename(columns={'Location': 'country'})

iso3s = {}
for country in df['country'].unique():
    try:
        result = pycountry.countries.search_fuzzy(country)
        iso3 = result[0].alpha_3
        iso3s[country] = iso3
    except LookupError:
        print(f"Could not find {country}")
        iso3s[country] = None

df['iso3'] = df['country'].apply(lambda x: iso3s[x])

df = df.dropna(subset=['value'])
OECD_iso3 = [  "AUS",  "AUT",  "BEL",  "CAN",  "CHL",  "COL",  "CRI",  "CZE",  "DNK",  "EST",  "FIN",  "FRA",  "DEU",  "GRC",  "HUN",  "ISL",  "IRL",  "ISR",  "ITA",  "JPN",  "KOR",  "LVA",  "LTU",  "LUX",  "MEX",  "NLD",  "NZL",  "NOR",  "POL",  "PRT",  "SVK",  "SVN",  "ESP",  "SWE",  "CHE",  "TUR",  "GBR",  "USA"]

df['value'] = df['value']/100

df = df.query('iso3 in @OECD_iso3')
# get the low and high 10pctiles
# corp_df['low'] = corp_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0.1))
df['low'] = df.groupby(by=["date"])['value'].transform(lambda x: x.quantile(0.1))
df['high'] = df.groupby(by=["date"])['value'].transform(lambda x: x.quantile(0.9))
df['median'] = df.groupby(by=["date"])['value'].transform('median')
df['rank'] = df.groupby(by=["date"])['value'].rank(ascending=False)

df

Could not find OECD - Average
Could not find Türkiye


Unnamed: 0,country,date,value,iso3,low,high,median,rank
1,Australia,1997,0.0807,AUS,0.02592,0.14696,0.0740,8.0
3,Belgium,1997,0.0897,BEL,0.02592,0.14696,0.0740,6.0
6,Canada,1997,0.0774,CAN,0.02592,0.14696,0.0740,9.0
10,Czechia,1997,0.0500,CZE,0.02592,0.14696,0.0740,14.0
11,Denmark,1997,0.0145,DNK,0.02592,0.14696,0.0740,19.0
...,...,...,...,...,...,...,...,...
1138,Spain,2022,0.0748,ESP,0.03302,0.13894,0.0635,12.0
1139,Sweden,2022,0.0371,SWE,0.03302,0.13894,0.0635,28.0
1140,Switzerland,2022,0.2036,CHE,0.03302,0.13894,0.0635,3.0
1142,United Kingdom,2022,0.0744,GBR,0.03302,0.13894,0.0635,13.0


In [297]:

#      "category" : ["#36B7B4","#E6224B","#F4C245","#0063AF","#00A767","#179FDB","#EB5C2E"],
highlight_countries = {
    "USA": "#36B7B4",
    "GBR": "#E6224B",
    "FRA": "#F4C245",
    "DEU": "#0063AF",
    "other": "rgba(0,0,0,0.1)"
}

df['color'] = df['iso3'].apply(lambda x: highlight_countries.get(x, highlight_countries['other']))
# add a label to the latest value for each country
df['label'] = df.apply(lambda x: x['country'] if x['date'] == '2022' and x['iso3'] in highlight_countries else '', axis=1)
df['highlighted'] = df['iso3'].apply(lambda x: x in highlight_countries)

df = df.sort_values('date')
df['date'] = pd.to_numeric(df['date'])

df = df.query('iso3 in @highlight_countries.keys() and date > 1999') 

df['date'] = pd.to_datetime(df['date'], format='%Y')

base = alt.Chart(df).encode(
    x=alt.X('date:T', axis=alt.Axis(), scale=alt.Scale()),
    y=alt.Y('value:Q', axis=alt.Axis(title=None, format="%")),
    color=alt.Color('color:N', scale=None),
    detail='country:N',
)

lines = base.mark_line().encode(
    size=alt.condition(alt.datum.iso3 == 'GBR', alt.value(2), alt.value(1))
)

end_labels = base.transform_filter('datum.highlighted').mark_text(
    align='left',
    baseline='middle',
    dx=5,
    dy=0,
    fontSize=alt.expr("datum.iso3 == 'GBR' ? 13 : 11"),
).encode(
    text=alt.Text('label'),
)

# shade the end area
shade = base.mark_area(opacity=1, color="rgba(24, 42, 56, 1)").encode(
    x=alt.X('date:T'),
    y=alt.Y('low:Q'),
    y2=alt.Y2('high:Q'),
    color=alt.value("rgb(232, 232, 232)")
)

chart = shade + lines + end_labels

chart = chart.properties(
    width=400,
    height=300,
    title=alt.TitleParams(
        text="NEET Rates in OECD Countries",
        subtitle=["15-19 year-olds not in education, employment or training", "OECD coutries, 10th-90th percentiles shaded", ""],
        anchor="start",
        fontSize=16,
        color="#808080",
        subtitleColor="#676A86",
        subtitleFontSize=12
    )
)

chart.save("neet_chart.png", scale_factor=3.0)
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [283]:
df

Unnamed: 0,country,date,value,iso3,low,high,color,label,highlighted
175,United States,2000-01-01,0.0790,USA,0.02811,0.12750,#36B7B4,,True
174,United Kingdom,2000-01-01,0.0804,GBR,0.02811,0.12750,#E6224B,,True
147,Germany,2000-01-01,0.0571,DEU,0.02811,0.12750,#0063AF,,True
146,France,2000-01-01,0.0699,FRA,0.02811,0.12750,#F4C245,,True
218,United Kingdom,2001-01-01,0.0816,GBR,0.03797,0.12144,#E6224B,,True
...,...,...,...,...,...,...,...,...,...
1071,Germany,2021-01-01,0.0519,DEU,0.03913,0.12469,#0063AF,,True
1142,United Kingdom,2022-01-01,0.0744,GBR,0.03302,0.13894,#E6224B,United Kingdom,True
1114,France,2022-01-01,0.0635,FRA,0.03302,0.13894,#F4C245,France,True
1115,Germany,2022-01-01,0.0464,DEU,0.03302,0.13894,#0063AF,Germany,True


In [234]:
df[df.iso3.isin(['GBR', 'USA', 'FRA', 'DEU'])].drop_duplicates(subset=['country'], keep='first')

Unnamed: 0,country,date,value,iso3,color,label,highlighted,low,high
43,United States,1997,7.07,USA,#36B7B4,,True,2.592,14.696
14,France,1997,6.8,FRA,#F4C245,,True,2.592,14.696
15,Germany,1997,4.99,DEU,#0063AF,,True,2.592,14.696
174,United Kingdom,2000,8.04,GBR,#E6224B,,True,2.811,12.75


In [91]:
pycountry.countries.get(alpha_3="MDA").common_name

'Moldova'

In [74]:
df

Unnamed: 0,iso3,start,end
31,ESP,1999-01-01,2001-01-01
26,PRT,1999-01-01,2004-01-01
16,HUN,1999-01-01,2004-01-01
9,CZE,1999-01-01,2005-01-01
17,ITA,1999-01-01,2005-01-01
30,SVK,1999-01-01,2006-01-01
5,BIH,1999-01-01,2006-01-01
23,MKD,1999-01-01,2006-01-01
18,LVA,1999-01-01,2007-01-01
27,ROU,1999-01-01,2007-01-01


In [52]:
base.mark_rule()

SchemaValidationError: `Data` has no parameter named 'arcs'

Existing parameter names are:
args   

See the help for `Data` to read the full description of these parameters

alt.Chart(...)

In [None]:

rules = base.mark_rule()

chart = rules

chart

In [50]:
base.mark_rule()

SchemaValidationError: `Data` has no parameter named 'arcs'

Existing parameter names are:
args   

See the help for `Data` to read the full description of these parameters

alt.Chart(...)

In [44]:
df

Unnamed: 0,Country,value,iso3,numeric,id,Conscription
1,Albania,False,ALB,8,AL,No (abolished in 2010)[113]
5,Armenia,True,ARM,51,AM,Yes
7,Austria,True,AUT,40,AT,Yes (alternative service available)[116]
8,Azerbaijan,True,AZE,31,AZ,Yes
12,Belgium,False,BEL,56,BE,No (suspended in 1992; service not required of...
16,Bosnia and Herzegovina,False,BIH,70,BA,No (abolished on 1 January 2006)[120]
18,Bulgaria,False,BGR,100,BG,No (abolished by law on 1 January 2008)[123]
23,Croatia,False,HRV,191,HR,No (abolished by law in 2008)[128]
25,Cyprus,True,CYP,196,CY,Yes (alternative service available)
26,Czech Republic,False,CZE,203,CZ,No (abolished in 2005)[129]
