# Altair Example 6 - Bob Ross Paintings by Similarity
This notebook demonstrates methods to display multiple charts and using multi-dimensionality reduction to display multiple features on a 2D plan and using image marks.  The data used to create this visualization is [FiveThirtyEight]()'s data used in the article [A Statistical Analysis of the Work of Bob Ross](https://fivethirtyeight.com/features/a-statistical-analysis-of-the-work-of-bob-ross/) (Hickey, 2014).  The original dataset can be found at FiveThirtyEight's Github: [bob-ross](https://github.com/fivethirtyeight/data/tree/master/bob-ross).  Images of the painting and their color analysis from each show can be found in jwilber's Github: [Bob_Ross_Paintings](https://github.com/jwilber/Bob_Ross_Paintings).

In [1]:
# import urllib.request
# import os.path
# from os import path
import pandas as pd
import altair as alt
import numpy as np
from sklearn import manifold
from sklearn.metrics import euclidean_distances
from sklearn.decomposition import PCA
import ipywidgets as widgets
# from IPython.display import display
# from PIL import Image

## Conditional Probability Plot
Displaying multiple plots, each showing the conditional probability of a painting containing feature given another feature and how that has changed over the years.

In [2]:
# The code in this cell was written and provided by the instruction team of 
# University of Michigan - School of Information - SIADS-622 - Information Visualization II
# Taught by Professor Eytan Adar (2021)

# the paints Bob used
rosspaints = ['alizarin crimson','bright red','burnt umber','cadmium yellow','dark sienna', 
              'indian yellow','indian red','liquid black','liquid clear','black gesso',
              'midnight black','phthalo blue','phthalo green','prussian blue','sap green',
              'titanium white','van dyke brown','yellow ochre']

# hex values for the paints above
rosspainthex = ['#94261f','#c06341','#614f4b','#f8ed57','#5c2f08','#e6ba25','#cd5c5c',
                '#000000','#ffffff','#000000','#36373c','#2a64ad','#215c2c','#325fa3',
                '#364e00','#f9f7eb','#2d1a0c','#b28426']

# boolean features about what an image includes
imgfeatures = ['Apple frame', 'Aurora borealis', 'Barn', 'Beach', 'Boat', 
               'Bridge', 'Building', 'Bushes', 'Cabin', 'Cactus', 
               'Circle frame', 'Cirrus clouds', 'Cliff', 'Clouds', 
               'Coniferous tree', 'Cumulis clouds', 'Decidious tree', 
               'Diane andre', 'Dock', 'Double oval frame', 'Farm', 
               'Fence', 'Fire', 'Florida frame', 'Flowers', 'Fog', 
               'Framed', 'Grass', 'Guest', 'Half circle frame', 
               'Half oval frame', 'Hills', 'Lake', 'Lakes', 'Lighthouse', 
               'Mill', 'Moon', 'At least one mountain', 'At least two mountains', 
               'Nighttime', 'Ocean', 'Oval frame', 'Palm trees', 'Path', 
               'Person', 'Portrait', 'Rectangle 3d frame', 'Rectangular frame', 
               'River or stream', 'Rocks', 'Seashell frame', 'Snow', 
               'Snow-covered mountain', 'Split frame', 'Steve ross', 
               'Man-made structure', 'Sun', 'Tomb frame', 'At least one tree', 
               'At least two trees', 'Triple frame', 'Waterfall', 'Waves', 
               'Windmill', 'Window frame', 'Winter setting', 'Wood framed']

# load the data frame
bobross = pd.read_csv("assets/bobross.csv")

# enable correct rendering (unnecessary in later versions of Altair)
alt.renderers.enable('default')

# uses intermediate json files to speed things up
alt.data_transformers.enable('json')

def build_condprob_df(keys):
    cond_list = []
    for y in bobross.year.unique():
        for k1 in keys:
            for k2 in keys:
                if k1==k2:
                    cond_list.append({'key1': k1, 'key2': k2, 'year':y, 'prob':1})  # make it that little bit faster
                else:
                    cond_list.append({'key1': k1, 'key2': k2, 'year':y, 'prob':condprobability(bobross, k1, k2, y)})
    return pd.DataFrame(cond_list)

# Enable FiveThirtyEight theme
alt.themes.enable('fivethirtyeight')

ThemeRegistry.enable('fivethirtyeight')

In [3]:
# This is code written by Nicholas Miller

def condprobability(frame,column1,column2,year):
    # we suggest you implement this function to make your life easier. It should take a datafame as input,
    # the two columns we want the conditional probability for, and the year for which we want to compare
    # you can make variants of this function as you see fit
    
    # YOUR CODE HERE
    try:
        return frame[frame.year == year].groupby(column2)[column1].apply(lambda x: x.value_counts()/len(x))[1,1]
    except:
        #raise Exception(f'Failed on column1: {column1} column2: {column2} year: {year}')
        return 0

def makeBobRossCondProb(totest=['At least one tree','At least two trees','Clouds','Grass','At least one mountain','Lake']):
    # implement this function to return an altair chart
    # note that we have created a default 'totest' variable that has the columns for which 
    # we want the pairwise analysis
    
    # return alt.Chart(...)
    
    # YOUR CODE HERE
    df = build_condprob_df(totest)
    
    output = alt.Chart(df).mark_line().encode(
        x=alt.X(
            'year:O',
            axis=alt.Axis(
                title='',
                values=list(range(1983,1995)),
            ),
        ),
        y=alt.Y(
            'prob:Q',
            axis=alt.Axis(title='')
        ),
        column=alt.Column(
            'key2:N',
            sort=totest,
            title='Given...',
            header=alt.Header(titleFontSize=14, labelFontSize=12)
        ),
        row=alt.Row(
            'key1:N',
            sort=totest,
            title='Probability of...',
            header=alt.Header(titleFontSize=14, labelFontSize=12)
        ),
    ).configure_view(
        strokeOpacity=0
    ).properties(
        width=100,
        height=100,
        title={"text": "Conditional Probability of Image Features Appearing Together",
               "subtitle": ["Given feature X, how has the probability of feature Y changed over the years?"],
               "fontSize":24,
               "subtitleFontSize":16,
               "anchor":"start",
               "offset":15
              }
    ).configure_axis(
        domain=False,
    )
    
    return output
makeBobRossCondProb()

![example6_1](images/example6_1.png)

## Multi-Dimensionality Reduction Plot
Using sklearn's MDS function to reduce the features to a 2D plot and adding interactive elements to show how plots containing a feature are grouped near eachother.

In [4]:
# The code in this cell was written and provided by the instruction team of 
# University of Michigan - School of Information - SIADS-622 - Information Visualization II
# Taught by Professor Eytan Adar (2021)

# create the seed
seed = np.random.RandomState(seed=3)

# generate the MDS configuration, we want 2 components, etc. You can tweak this if you want to see how
# the settings change the layout
mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed, n_jobs=1)

# fit the data. At the end, 'pos' will hold the x,y coordinates
pos = mds.fit(bobross[imgfeatures]).embedding_

# we'll now load those values into the bobross data frame, giving us a new x column and y column
bobross['x'] = [x[0] for x in pos]
bobross['y'] = [x[1] for x in pos]

In [5]:
# This is code written by Nicholas Miller

def genMDSPlot(key):
    # return an altair chart (e.g., return alt.Chart(...))
    # key is a string indicating which images should be visually highlighted (i.e., images containing the feature
    # should be made salient)
    
    # YOUR CODE HERE
    
    img_size_ratio = 300/224
    img_size = 30
    rect_buffer=5
    
    # rectangles
    chart = alt.Chart(bobross).mark_rect(
        width=img_size+rect_buffer,
        height=img_size/img_size_ratio+rect_buffer,
    ).encode(
        x='x',
        y='y',
        color=alt.Color(
            key,
            type='ordinal',
            scale=alt.Scale(
                domain=[0,1],
                range=['black','red']
            ),
            legend=None,
        ),
    )
    
    # images
    chart += alt.Chart(bobross).mark_image(
        width=img_size,
        height=img_size,
    ).encode(
        x=alt.X(
            'x',
            title=None,
            axis=alt.Axis(
                title='',
                values=[0],
                labels=False,
            )
        ),
        y=alt.Y(
            'y',
            title=None,
            axis=alt.Axis(
                title='',
                values=[0],
                labels=False,
            )
        ),
        url='img_url'
    )
    
    return chart.configure_view(
        strokeOpacity=0
    ).properties(
        width=800,
        height=800,
        title={"text": "Bob Ross Paintings by Feature Simularity",
               "subtitle": [f"Paintings containing {key} are highlighted in red"],
               "fontSize":24,
               "subtitleFontSize":16,
               "anchor":"start",
               "offset":15
              }
    ).configure_axis(
        domain=False,
    )

In [6]:
# The code in this cell was written and provided by the instruction team of 
# University of Michigan - School of Information - SIADS-622 - Information Visualization II
# Taught by Professor Eytan Adar (2021)

# note that it might take a few seconds for the images to download
# depending on your internet connection

output = widgets.Output()

def clicked(b):
    output.clear_output()
    with output:
        highlight = filterdrop.value
        if (highlight == ""):
            print("please enter a query")
        else:
            genMDSPlot(highlight).display()


featurecount = bobross[imgfeatures].sum()

filterdrop = widgets.Dropdown(
    options=list(featurecount[featurecount > 2].keys()),
    description='Highlight:',
    disabled=False,
)

filterdrop.observe(clicked)

display(filterdrop,output)

with output:
    genMDSPlot('Barn').display()

Dropdown(description='Highlight:', options=('Barn', 'Beach', 'Bridge', 'Bushes', 'Cabin', 'Cactus', 'Cirrus cl…

Output()

![example6_1](images/example6_2.png)