# Plotting questions

First I will generate some dummy data and show my existing plots.
Then I will outline my questions of how to improve these plots and keep them still interactive!

In [None]:
%pylab inline

import pandas as pd
import numpy as np
import seaborn as sns; sns.set()
import matplotlib.dates as mdates

aut_locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
aut_formatter = mdates.ConciseDateFormatter(aut_locator)

import random
random_seed = 47
np.random.seed(random_seed)

random.seed(random_seed)

## data generation

In [None]:
def generate_df_for_device(n_observations, n_metrics, device_id, geo_id, topology_id, cohort_id):
        df = pd.DataFrame(np.random.randn(n_observations,n_metrics), index=pd.date_range('2020', freq='H', periods=n_observations))
        df.columns = [f'metrik_{c}' for c in df.columns]
        df['geospatial_id'] = geo_id
        df['topology_id'] = topology_id
        df['cohort_id'] = cohort_id
        df['device_id'] = device_id
        return df
    
def generate_multi_device(n_observations, n_metrics, n_devices, cohort_levels, topo_levels):
    results = []
    for i in range(1, n_devices +1):
        #print(i)
        r = random.randrange(1, n_devices)
        cohort = random.randrange(1, cohort_levels)
        topo = random.randrange(1, topo_levels)
        df_single_dvice = generate_df_for_device(n_observations, n_metrics, i, r, topo, cohort)
        results.append(df_single_dvice)
        #print(r)
    return pd.concat(results)

# hourly data, 1 week of data
n_observations = 7 * 24
n_metrics = 3
n_devices = 20
cohort_levels = 3
topo_levels = 5

df = generate_multi_device(n_observations, n_metrics, n_devices, cohort_levels, topo_levels)
df = df.sort_index()
df = df.reset_index().rename(columns={'index':'hour'})
df['dt'] = df.hour.dt.date
df.head()

In [None]:
marker_labels = pd.DataFrame({'cohort_id':[1,1, 1], 'marker_type':['a', 'b', 'a'], 'start':['2020-01-2', '2020-01-04 05', '2020-01-06'], 'end':[np.nan, '2020-01-05 16', np.nan]})
marker_labels['start'] = pd.to_datetime(marker_labels['start'])
marker_labels['end'] = pd.to_datetime(marker_labels['end'])
marker_labels.loc[marker_labels['end'].isnull(), 'end'] =  marker_labels.start + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
marker_labels

## working exsisting plots

### Basic Label Plot

In [None]:
for cohort_id in sorted(df.cohort_id.unique()):
    print(cohort_id)
    figsize = (25, 9)
    fig, ax = plt.subplots(figsize=figsize)
    ax = sns.lineplot(x='hour', y='metrik_0', data=df[df.cohort_id == cohort_id], ax=ax)
    ax.xaxis.set_major_locator(aut_locator)
    ax.xaxis.set_major_formatter(aut_formatter)
    
    plt.title(f'cohort_id: {cohort_id}', fontsize=45)
    plt.xlabel('')
    plt.ylabel('metrik_0', fontsize=35)
    
    for index, row in marker_labels.iterrows():
        start = row.start
        end = row.end
        marker_type = row.marker_type
        if marker_type == 'b':
            ax.axvspan(start, end, color='gray', alpha=0.2)
        else:
            ax.axvspan(start, end, color='orange', alpha=0.5)
        
    plt.show()



### Basic interactive plot

Variant 1: multiple metrics

In [None]:
metrik_columns = df.columns[df.columns.str.contains('metrik')].to_list()
metrik_columns

In [None]:
width=1200
height=500

import holoviews as hv
import hvplot.pandas

for cohort_id in sorted(df.cohort_id.unique()):
    print(cohort_id)
    current_plot = df[df.cohort_id == cohort_id].set_index(['hour'])[metrik_columns].hvplot(width=width, height=height).opts(active_tools=['box_zoom'])
    
    for index, row in marker_labels.iterrows():
        start = row.start
        end = row.end
        marker_type = row.marker_type
        vspan = hv.VSpan(start, end)

        if marker_type == 'b':
            current_plot = current_plot * vspan.opts(color='grey', alpha=0.2)
        else:
            current_plot = current_plot * vspan.opts(color='orange', alpha=0.4)
            
    display(current_plot)

Variant 2: single metrik for the different devices

In [None]:
for cohort_id in sorted(df.cohort_id.unique()):
    print(cohort_id)
    current_plot = df[df.cohort_id == cohort_id].set_index(['hour'])[['metrik_0', 'device_id']].hvplot(by='device_id', width=width, height=height).opts(active_tools=['box_zoom'])
    
    for index, row in marker_labels.iterrows():
        start = row.start
        end = row.end
        marker_type = row.marker_type
        vspan = hv.VSpan(start, end)

        if marker_type == 'b':
            current_plot = current_plot * vspan.opts(color='grey', alpha=0.2)
        else:
            current_plot = current_plot * vspan.opts(color='orange', alpha=0.4)
            
    display(current_plot)

**Question 1:**

How can I move the legend of the interactive plot down and also display more than a single column? I could not get it to work so far https://github.com/holoviz/holoviews/issues/3780

### Basic cycleplot

In [None]:
for cohort_id in sorted(df.cohort_id.unique()):
    print(cohort_id)
    
    figsize = (25, 9)
    fig, ax = plt.subplots(figsize=figsize)
    a1 = sns.lineplot(x=df['hour'].dt.hour, y='metrik_0', hue='device_id', units='dt', style='dt', estimator=None, data=df[(df.cohort_id == cohort_id)], ax=ax)
    handles, labels = a1.get_legend_handles_labels()
    a1.legend(handles=handles[1:], labels=labels[1:], loc='center', bbox_to_anchor=(0.5, -0.25), ncol=6, fontsize=20)

    plt.title(f'cohort_id: {cohort_id}', fontsize=35)
    plt.xlabel('hour of the day', fontsize=35)
    plt.ylabel('metrik_0', fontsize=35)
    plt.show()

**Question 2:**

How can I choose a single color for the lines (maybe separate them by style per device) but highlight the individual lines / time segments of the lines with a matching ticket in i.e. red.
    
**Question 3:**

How can I create an interactive plot like the one above (for the periodicities) which also encompasses Question 2

### Scatterplots of a large quantity of points - with legend

In [None]:
import datashader as ds
import datashader.transfer_functions as tf

import holoviews as hv
from holoviews.operation.datashader import datashade
hv.extension('bokeh')
from datashader.colors import Sets1to3

lab_s = df.cohort_id.unique()
color_key = [(name,color) for name,color in zip(lab_s, Sets1to3)]
color_points = hv.NdOverlay({n: hv.Points([0,0], label=str(n)).opts(style=dict(color=c)) for n,c in color_key})
datashade(hv.Points(df, kdims=['metrik_0', 'metrik_1'], vdims=['cohort_id']), aggregator=ds.count_cat('cohort_id') , color_key=Sets1to3).opts(width=width, height=height) * color_points