In [86]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [87]:
import holoviews as hv
import holoviews.operation.datashader as hd
import numpy as np
import pandas as pd
import datashader as ds
import imageio

from matplotlib.cm import coolwarm, bwr, Blues
from numba import jit
hv.extension("bokeh", "matplotlib") 
hd.shade.cmap = coolwarm

# Helper functions

In [88]:
# Shaded point plot
def shaded_scatter(df, x, y=None, label="", dynspread=0.5):
    """
    Shaded x-y scatter plot.
    x: tuple (x1, x2)
    y: column to aggregate. Default=None
    label: plot label
    dynspread: turn on dynamic spreading if the value > 0
    """
    points = hv.Points(df, kdims=x, vdims=y, label=label)
    if y is not None:
        shaded = hd.datashade(points, aggregator=ds.mean(y), cmap=bwr)
    else:
        shaded = hd.datashade(points, aggregator=ds.count(), cmap=Blues)
    if dynspread != 0:
        return hd.dynspread(shaded, threshold=dynspread)
    else:
        return shaded

# Construct data set

In [89]:
# Set parameters
dims = [4000, 2500]
noise_level = 0.1
x_period = 20
y_period = 40

In [90]:
index = pd.MultiIndex.from_product([range(dims[0]), range(dims[1])], names=['x', 'y'])

In [91]:
df = pd.DataFrame(index=index).reset_index()

In [92]:
df['z'] = np.sin(2 * np.pi * df.x / x_period) * np.sin(2 * np.pi * df.y / y_period) + noise_level * np.random.randn(df.shape[0])

In [93]:
# Data size
df.shape

(10000000, 3)

In [94]:
df.head()

Unnamed: 0,x,y,z
0,0,0,0.016893
1,0,1,-0.003428
2,0,2,0.08808
3,0,3,-0.114137
4,0,4,0.155619


In [95]:
# Data visualization via datashader
shaded_scatter(df, ['x', 'y'], 'z')

# Save as a image

In [96]:
piv_df = pd.pivot_table(df, index='x', columns='y', values='z')

In [97]:
piv_df.head()

y,0,1,2,3,4,5,6,7,8,9,...,2490,2491,2492,2493,2494,2495,2496,2497,2498,2499
x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.016893,-0.003428,0.08808,-0.114137,0.155619,-0.096554,0.080171,0.224839,0.205065,-0.040458,...,-0.003908,0.044684,-0.014694,0.145402,-0.063992,0.001981,0.016234,0.01837,-0.078111,-0.041675
1,0.191576,-0.025178,0.083195,0.09442,-0.002104,0.031953,0.314319,0.165415,0.240154,0.406914,...,0.215057,0.251042,0.316503,0.304656,0.187751,0.406891,0.314429,0.155196,-0.004378,0.080641
2,0.025927,0.287899,0.156858,0.298933,0.392249,0.61819,0.556385,0.538237,0.435182,0.687011,...,0.525934,0.584023,0.507877,0.4547,0.607999,0.40088,0.47512,0.15528,0.187772,0.192038
3,-0.060594,-0.013681,0.395982,0.435675,0.351089,0.369089,0.433559,0.637592,0.551111,0.672943,...,0.690224,0.797765,0.799087,0.550293,0.834658,0.653636,0.368115,0.280554,0.037398,0.139078
4,-0.029256,0.074312,0.23022,0.636402,0.555965,0.595621,0.820224,0.994411,0.840959,0.961758,...,0.814136,1.035885,0.658615,0.808379,0.906734,0.7313,0.561638,0.500791,0.340748,0.239149


In [98]:
# Nomralization to the range of 0~255. Not necessary but will raise a warning
# piv_df = (piv_df - np.min(piv_df.values)) / (np.max(piv_df.values) - np.min(piv_df.values)) * 255
# piv_df = piv_df.astype(np.uint8)

In [100]:
# Save to an image. JPEG seems to be much more efficent than PNG
imageio.imsave('2d_map_as_image.jpeg', piv_df.values)



# Draft