In [1]:
pip install .

Processing /home/jovyan
Building wheels for collected packages: edacolors
  Building wheel for edacolors (setup.py) ... [?25ldone
[?25h  Created wheel for edacolors: filename=edacolors-0.0.1-py3-none-any.whl size=3529 sha256=bb3c9a9c09aa0bb0e08364ddc0d716b24d3c26a6dc2e4a2e4497079253b25e99
  Stored in directory: /tmp/pip-ephem-wheel-cache-37bs383_/wheels/fc/c4/49/78b5bd16ca276f2916d0829d47c131046b6e4575f7dd51e987
Successfully built edacolors
Installing collected packages: edacolors
  Attempting uninstall: edacolors
    Found existing installation: edacolors 0.0.1
    Uninstalling edacolors-0.0.1:
      Successfully uninstalled edacolors-0.0.1
Successfully installed edacolors-0.0.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
## Load packages 

# General packages
import numpy as np
import sys
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt 

# Plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import random
import string

# eda colors
from edacolors import eda_themes
from edacolors.eda_themes import *

# Load eda visualization theme
pio.templates.default = "edacolors"

In [3]:
## Choose parameters for data and colormap

# Parameters
NSAMPLES = 1000
NCOLORS = 8   # MUST be <= 8 for ito, tol
QUALITATIVEMAP = tol  # ito or tol
QUALITATIVEMAP_VALUES = tol_values

In [7]:
from colorsys import rgb_to_hls, hls_to_rgb

def adjust_color_lightness(r, g, b, adjustment):
    h, l, s = rgb_to_hls(r / 255.0, g / 255.0, b / 255.0)
    l = max(min(l + adjustment, 1.0), 0.0)
    r, g, b = hls_to_rgb(h, l, s)
    return int(r * 255), int(g * 255), int(b * 255)

In [10]:
tol_values[1][1]

204

In [11]:
adjust_color_lightness(tol_values[0][0],tol_values[0][1], tol_values[0][2], -.05)

(115, 28, 72)

In [None]:
## Generate data
# Note - assuming no negative data.

# Create different types of variables
cat1_vars = random.sample(string.ascii_lowercase,NCOLORS)
cat2_vars = random.sample(string.ascii_uppercase,NCOLORS)

cat1 = [random.choice(cat1_vars) for i in np.arange(NSAMPLES)]
cat2 = [random.choice(cat2_vars) for i in np.arange(NSAMPLES)]
binary1 = [random.choice([0, 1]) for i in np.arange(NSAMPLES)]
binary2 = [random.choice([0, 1]) for i in np.arange(NSAMPLES)]
ordinal1 = [random.choice(np.arange(NCOLORS)) for i in np.arange(NSAMPLES)]
ordinal2 = [random.choice(np.arange(NCOLORS)) for i in np.arange(NSAMPLES)]
cont1 = np.abs([random.gauss(5,2) for i in np.arange(NSAMPLES)])
cont2 = np.abs([cont1[i] + random.gauss(1,1) for i in np.arange(NSAMPLES)])
cont3 = np.abs([random.gauss(5,2) for i in np.arange(NSAMPLES)])

df = pd.DataFrame({"cat1": cat1, "cat2": cat2, "binary1": binary1, "binary2": binary2,
                          "ordinal1": ordinal1, "ordinal2": ordinal2, 
                          "cont1": cont1, "cont2": cont2, "cont3": cont3})

# Add a little shift to continuous variables so they don't sit all on top of each other
for (i,var) in enumerate(cat1_vars):
    df.cont2[df.cat1 == var] += 5*i


df.head()

In [None]:
# Great. We have data. Now let's start making plots!


In [None]:
# Overplotted scatter
# For now, colors separated

fig = px.scatter(df, x='cont1', y='cont2', color='cat1', color_discrete_sequence=QUALITATIVEMAP, trendline='ols')

fig.update_traces(marker_size=10, marker_opacity=1)
fig.show()

In [None]:
# Overplotted scatter
# Simple adjust opacity - fails for large NSAMPLES (try 100000)

fig = px.scatter(df, x='cont1', y='cont2', color='cat1', color_discrete_sequence=QUALITATIVEMAP, trendline='ols')

fig.update_traces(marker_size=10, marker_opacity=0.2)
fig.show()

In [None]:
# Overplotted scatter
# Make lines slightly, slightly darker

fig = px.scatter(df, x='cont1', y='cont2', color='cat1', color_discrete_sequence=QUALITATIVEMAP, trendline='ols')

fig.update_traces(marker_size=10, marker_opacity=0.2)
fig.show()

In [None]:
# Seaborn
ax = sns.regplot(x="cont1", y="cont2", data=df,
                 scatter_kws={"color": "pink"}, line_kws={"color": "red"})

plt.show()

In [None]:
df=pd.DataFrame(dict(A=np.random.uniform(low=-1, high=2, size=25).tolist(),
                    B=np.random.uniform(low=-4, high=3, size=25).tolist(),
                    C=np.random.uniform(low=-1, high=3, size=25).tolist(),
                    ))

# plotly  figure
fig = go.Figure()

# add line and shaded area for each series and standards deviation
for i, col in enumerate(df):
    new_col = QUALITATIVEMAP[i]
    print('rgba' + new_col[3:-1]+',0.2)')
    x = list(df.index.values+1)
    y1 = df[col]
    y1_upper = [(y + np.std(df[col])) for y in df[col]]
    y1_lower = [(y - np.std(df[col])) for y in df[col]]
    y1_lower = y1_lower[::-1]
    
    # scatter
    fig.add_traces(go.Scatter(x=x,
                                y=y1,
                                showlegend=False,
                                mode='markers',
                                name=col))

    # standard deviation area
    fig.add_traces(go.Scatter(x=x+x[::-1],
                                y=y1_upper+y1_lower,
                                fill='tozerox',
                                fillcolor='rgba' + new_col[3:-1]+',0.2)',
                                line=dict(color='rgba(255,255,255,0)'),
                                showlegend=False,
                                opacity=0.1,
                                name=col))

    # line trace
    fig.add_traces(go.Scatter(x=x,
                              y=y1,
                              line= {"color": 'rgba' + new_col[3:-1]+',0.2)'},
                              mode='lines',
                              name=col)
                                )
# set x-axis
fig.update_layout(xaxis=dict(range=[1,len(df)]))

fig.show()

In [None]:
QUALITATIVEMAP[1][0:-1]