In [1]:
import altair as alt
import numpy as np
import pandas as pd
import random

# Blank map

In [2]:
geodata_url = 'https://gist.githubusercontent.com/jandot/ba7eff2e15a38c6f809ba5e8bd8b6977/raw/eb49ce8dd2604e558e10e15d9a3806f114744e80/belgium_municipalities_topojson.json'
geodata_feature = 'BE_municipalities'

Parameters are GeoJSON URL and feature to use.

In [3]:
def create_topo_data(url, feature):
    return alt.topo_feature(geodata_url, feature)

In [4]:
topo_municipalities = create_topo_data(geodata_url, geodata_feature)

Parameters would be GeoJSON URL, stroke color and width.

In [8]:
alt.Chart(topo_municipalities).mark_geoshape(stroke='black', strokeWidth=0.9).encode(
    color=alt.value('white'),
)

# Reading data

Parameters would be data file URL, data file type, and if necessary file encoding, column separator.

In [9]:
def read_data(uri, encoding=None, delimiter=None):
    if uri.lower().endswith('.csv'):
        return pd.read_csv(uri, encoding=encoding, delimiter=delimiter)
    elif uri.lower().endswith('.xlsx'):
        return pd.read_excel(uri)
    else:
        raise ValueError(f'file type of ""{uri}"" is unknown')

In [10]:
def validate_data(data):
    if len(data.columns) < 2:
        raise ValueError('data should have at least a NIS code and a data column')
    nis_code_pos = [idx for idx, col_name in enumerate(data.columns) if col_name.lower() == 'niscode']
    if len(nis_code_pos) > 1:
        raise ValueError('multiple columns have NIS code name')
    elif len(nis_code_pos) == 0:
        raise ValueError('NIS code column is missing')
    else:
        columns = list(data.columns)
        columns[nis_code_pos[0]] = 'niscode'
        data.columns = columns
    if data.niscode.dtype != np.int64:
        raise ValueError('data type for NIS code is incorret, should be integer')

In [11]:
data = read_data('data/Gemeenten-latest.csv', encoding='iso-8859-1', delimiter=';')

In [12]:
validate_data(data)

In [13]:
data.head()

Unnamed: 0,ID,Gemeente,Bevolking,niscode,DATE,COUNT
0,1,AARTSELAAR,14423,11001,2020-03-31,0
1,2,ANTWERPEN,527763,11002,2020-03-31,52
2,3,BOECHOUT,13373,11004,2020-03-31,0
3,4,BOOM,18526,11005,2020-03-31,1
4,5,BORSBEEK,10943,11007,2020-03-31,0


Select only data from March 31th, 2020.

In [14]:
data = data.query('DATE == "2020-03-31"').copy()

In [15]:
data.head()

Unnamed: 0,ID,Gemeente,Bevolking,niscode,DATE,COUNT
0,1,AARTSELAAR,14423,11001,2020-03-31,0
1,2,ANTWERPEN,527763,11002,2020-03-31,52
2,3,BOECHOUT,13373,11004,2020-03-31,0
3,4,BOOM,18526,11005,2020-03-31,1
4,5,BORSBEEK,10943,11007,2020-03-31,0


Add a column with random qualitative data.

In [16]:
data['color'] = random.choices(['red', 'green', 'blue', 'yellow'], k=len(data))

In [17]:
data.head()

Unnamed: 0,ID,Gemeente,Bevolking,niscode,DATE,COUNT,color
0,1,AARTSELAAR,14423,11001,2020-03-31,0,red
1,2,ANTWERPEN,527763,11002,2020-03-31,52,green
2,3,BOECHOUT,13373,11004,2020-03-31,0,red
3,4,BOOM,18526,11005,2020-03-31,1,blue
4,5,BORSBEEK,10943,11007,2020-03-31,0,yellow


In [18]:
data.to_csv('data/data.csv')

# Putting data on the map

## Quantivative data

Parameters would be legend title, color scheme, data domain (min. value, max. value).

In [19]:
def guess_data_type(data, columns):
    types = dict()
    for column in columns:
        pandas_type = data[column].dtype
        if pandas_type in [np.int32, np.int64, np.float32, np.float64]:
            types[column] = 'Q'
        else:
            types[column] = 'N'
    return types

In [141]:
def create_plot(topo_data, data, column_name, data_type, tooltips = None,
                stroke='lightgrey', strokeWidth=0.5, legend_title=None,
                schema='reds'):
    if legend_title is None:
        legend_title = column_name
    if tooltips is None:
        lookup = [column_name]
    else:
        lookup = [column_name] + list(tooltips)
    base = alt.Chart(topo_data).mark_geoshape().encode(
                color=alt.Color(f'{column_name}:{data_type}',
                                legend=alt.Legend(title=legend_title), 
                                scale=alt.Scale(scheme=schema)),
                tooltip=[f'{cname}:N' for cname in lookup],
        ).transform_lookup(
            lookup='properties.CODE_INS',
            from_=alt.LookupData(data, 'niscode', lookup)
        )
    return alt.Chart(topo_data).mark_geoshape(stroke=stroke, strokeWidth=strokeWidth).encode(
        # color=alt.value('white'),
        opacity=alt.value(0.1),
    ) + base


In [142]:
def create_quantitative_plot(topo_data, data, column_name, tooltips=None,
                             stroke='lightgrey', strokeWidth=0.5, legend_title=None,
                             schema='reds'):
    return create_plot(topo_data, data, column_name, 'Q',
                tooltips, stroke, strokeWidth, legend_title, schema)

In [143]:
create_quantitative_plot(topo_data=topo_municipalities, data=data, column_name='Bevolking',
                         tooltips=['Gemeente', 'COUNT'])

In [144]:
test_data = pd.read_excel('data/Test-map.xlsx')

In [145]:
test_data.rename(columns={'NISCODE': 'niscode'}, inplace=True)

In [146]:
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 589 entries, 0 to 588
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   niscode  589 non-null    int64  
 1   Data     425 non-null    float64
dtypes: float64(1), int64(1)
memory usage: 9.3 KB


In [147]:
create_quantitative_plot(topo_data=topo_municipalities, data=test_data, column_name='Data', stroke='black', strokeWidth=1.0)

## Nominal data

Parameters would be legend title, color scheme.

In [None]:
def create_nominal_plot(topo_data, data, column_name,
                             stroke='lightgrey', strokeWidth=0.5, legend_title=None,
                             schema='reds'):
    return create_plot(topo_data, data, column_name, 'N',
                stroke, strokeWidth, legend_title, schema)

In [None]:
create_nominal_plot(topo_data=topo_municipalities, data=data.query('DATE == "2020-03-31"'),
                    column_name='color', legend_title='Kleur', schema='category10')