In [1]:
import altair as alt
from vega_datasets import data
import pandas as pd
import geopandas as gpd

In [2]:
pd.set_option('display.max_columns', None)
df = pd.read_excel("dataset.xlsx", header=5, usecols="A, D:AA",
                   skiprows=[6,7,50], na_values='..')
# remove 2 whitespace chars at beginning of each column
df.columns = df.columns.str[2:]
# rename the first header to 'Country'
df.rename(columns={'named: 0': 'Country'}, inplace=True)
# correct the naming scheme of the non-OECD countries
df.iloc[[39], [0]] = 'Brazil'
df.iloc[[40], [0]] = 'Russia'
df.iloc[[41], [0]] = 'South Africa'
# correcting country names to conform to UN standard
df.iloc[[37], [0]] = 'United States of America'
df.iloc[[35], [0]] = 'Turkey'
df.iloc[[30], [0]] = 'Slovakia'
df.iloc[[20], [0]] = 'South Korea'
df.iloc[[7], [0]] = 'Czechia'

In [3]:
nulls = df.isna().sum()

# convert the binary dataframe into viewing dataframe and create bar chart
visualise_nulls = pd.DataFrame({'Columns': nulls.index, 'Count': nulls.values})

bar = alt.Chart(visualise_nulls).mark_bar().encode(
    x=alt.X('Count:Q'),
    y=alt.Y("Columns:N"),
    text='Count'
)
# add text to label the values of each bar
text = bar.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text='Count:Q'
)
bar + text

In [4]:
for column in df.columns:
  if column != 'Country':
    mean_value = df[column].mean()
    df[column].fillna(value=mean_value, inplace=True)

# testing imputations have been implemented correctly
#earnings_mean = df['Personal earnings'].mean()
#print(earnings_mean)
#df[df['Personal earnings'] == earnings_mean]
print(df.columns)

Index(['Country', 'Dwellings without basic facilities', 'Housing expenditure',
       'Rooms per person', 'Household net adjusted disposable income',
       'Household net wealth', 'Labour market insecurity', 'Employment rate',
       'Long-term unemployment rate', 'Personal earnings',
       'Quality of support network', 'Educational attainment',
       'Student skills', 'Years in education', 'Air pollution',
       'Water quality', 'Stakeholder engagement for developing regulations',
       'Voter turnout', 'Life expectancy', 'Self-reported health',
       'Life satisfaction', 'Feeling safe walking alone at night',
       'Homicide rate', 'Employees working very long hours',
       'Time devoted to leisure and personal care'],
      dtype='object')


In [13]:
# Source of the cartography background
url = "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"
countries_shape = gpd.read_file(url) # zipped shapefile
countries_shape = countries_shape[['NAME', 'CONTINENT', 'ISO_A3', 'geometry']]

countries = alt.Chart(countries_shape).mark_geoshape(
    fill='gray', stroke='black', strokeWidth=0.5)

#sphere = alt.sphere()

basemap = alt.layer(
    alt.Chart(sphere).mark_geoshape(fill='white'),
    alt.Chart(alt.graticule()).mark_geoshape(stroke='black')
)

# enable selection of countries on the map
click_countries  = alt.selection_point(fields=['NAME'])

# encoding data to map
chloropleth = (
    alt.Chart(countries_shape)
    .mark_geoshape(stroke='black', strokeWidth=0.5)
    .transform_lookup(
        lookup='NAME',
        from_=alt.LookupData(data=df, key='Country', fields=['Life satisfaction'])
    ).encode(
        color='Life satisfaction:Q',
        opacity=alt.condition(click_countries, alt.value(1), alt.value(0.2)),
        tooltip=['NAME', 'Life satisfaction:Q']
    )
    .add_params(click_countries)
    .interactive()
)

bars = (
    alt.Chart(countries_shape)
    .mark_bar()
    .transform_lookup(
        lookup='NAME',
        from_=alt.LookupData(data=df, key='Country', fields=['Life expectancy', 'Country'])
    )
    .encode(
        x=alt.X('Country:N').sort('-y'),
        y=alt.Y('Life expectancy:Q'),
        opacity=alt.condition(click_countries, alt.value(1), alt.value(0.2))
    )
    .add_params(click_countries)
)

legend = alt.Chart(countries_shape).mark_point().encode(
    y=alt.Y('Country:N', axis=alt.Axis(orient='right')),
    shape='Country:N'
).add_params(click_countries)

scatter = (
    alt.Chart(countries_shape)
    .mark_point()
    #.configure_legend(disable=True)
    .transform_lookup(
        lookup='NAME',
        from_=alt.LookupData(data=df, key='Country', fields=['Employment rate', 'Feeling safe walking alone at night', 'Life satisfaction'])
    )
    .encode(
            x=alt.X('Employment rate:Q').scale(zero=False),
            y=alt.Y('Feeling safe walking alone at night:Q').scale(zero=False),
            opacity=alt.condition(click_countries, alt.value(1), alt.value(0.2)),
            #color='NAME:N',
            tooltip=['NAME', 'Life satisfaction:Q']

    )
    .add_params(click_countries)
)

#((basemap + countries + chloropleth).project('equalEarth').properties(width=1000, height=500) & bars | scatter & legend)

mcv = alt.vconcat(
    alt.hconcat((basemap + countries + chloropleth).project('equalEarth').properties(width=700, height=300)),
    alt.hconcat(scatter),
    alt.hconcat(bars)
)
mcv.save('ScatterMCV.html')