# Using categorical counts

In [None]:
import colorcet as cc
import datashader as ds
import datashader.transfer_functions as tf
import pandas as pd

## 1. Read data from parquet file into pandas DataFrame

In [None]:
df = pd.read_parquet("parq")
#df = pd.read_parquet("parq/wales.parq")
df

## 2. Highest qualification column

In [None]:
df.groupby("highest_qualification").size()

## 3. Height and width of image using correct aspect ratio

In [None]:
height = 750
aspect = (df.x.max() - df.x.min()) / (df.y.max() - df.y.min())
width = int(height*aspect)

## 4. Calculate counts per pixel for each category

In [None]:
cvs = ds.Canvas(width, height)
counts = cvs.points(df, "x", "y", ds.by("highest_qualification"))
type(counts), counts.shape, counts.dtype, counts.min().item(), counts.max().item()

## 5. Apply colours to obtain an image

### Colours are Paul Tol's discrete rainbow colour scheme (https://personal.sron.nl/~pault/) with 5 colours: dark blue, light blue, green, yellow, red

In [None]:
color_key = {"none": "#1965B0", "level1": "#7BAFDE", "level2": "#4EB265", "level3": "#F7F056",
             "level4": "#DC050C", "other": "black"}
im = tf.shade(counts, how="eq_hist", color_key=color_key)
tf.set_background(im, "black")

## Datashader creates static images.
## Would be nice to be able to zoom and pan, and have a legend.