# Visualization Techniques (Scatterplots - 1)

## Venustiano Soancatl Aguilar
### Center for Information Technology
### University of Groningen, the Netherlands

### Scatterplots
- Show the relationship between two variables
- Can be used to create matrix scatterplots
- Works fine for a limited number of variables

### Defining a template

In [1]:
# https://github.com/plotly/plotly.py/issues/1664#issuecomment-511773518
import plotly.graph_objects as go
import plotly.io as pio

# Set default renderer
pio.renderers.default = 'notebook+jupyterlab'  #  See [1]
# Set default template
pio.templates['slides'] = go.layout.Template(layout=dict(width=800, height=600))
pio.templates.default = 'plotly+slides'  # See [2]

### Data

Our goal is to visualize differences between older and younger

In [5]:
import pandas as pd
# Data
tableRes = pd.read_csv('data/results.txt',sep=' ')
tableRes

Unnamed: 0,iSubj,trial,Age,Decade,medLrms51,medTI51,medK,meanK,medSpeed,medTIms51,medLcovD51,medLsd51,medLcov51,medLrmslD51,medLsdD51
1,1,1,76,7,0.189493,0.589529,207.800939,333.861515,0.488721,0.806345,0.744766,0.020602,0.108687,1.259172,0.129068
2,1,2,76,7,0.204152,0.697338,223.294322,319.601546,0.518278,0.775451,0.506944,0.016446,0.078140,1.263366,0.105902
3,1,3,76,7,0.217347,0.786131,136.400753,272.460861,0.584193,0.511902,0.706127,0.047372,0.236467,0.782796,0.133917
4,1,4,76,7,0.169569,1.020695,162.034626,289.927862,0.590151,0.416398,0.701803,0.039017,0.265361,0.487570,0.108637
5,1,5,76,7,0.170786,0.769747,141.767674,300.625818,0.551706,0.550029,0.753161,0.036798,0.235907,0.646194,0.119610
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,40,6,58,5,0.304933,0.715350,263.249490,393.654382,0.419596,0.989819,0.248116,0.009786,0.031676,2.273710,0.079016
397,40,7,58,5,0.298630,0.758093,285.346624,436.866203,0.364656,1.154292,0.162838,0.005467,0.017858,2.618494,0.050360
398,40,8,58,5,0.295032,0.715015,292.177991,422.242295,0.398149,1.077907,0.266188,0.009360,0.031146,2.475323,0.079460
399,40,9,58,5,0.310277,0.710607,255.888346,392.841818,0.413573,1.023273,0.242811,0.009074,0.028856,2.338997,0.075648


### Speed vs curvature scatterplot

In [6]:
# Normalize curvature and speed
tableRes['norm_curvature'] = tableRes['medK'].transform(lambda value: (value - value.mean()) / value.std())
tableRes['norm_speed'] = tableRes['medSpeed'].transform(lambda value: (value - value.mean()) / value.std())
tableRes

Unnamed: 0,iSubj,trial,Age,Decade,medLrms51,medTI51,medK,meanK,medSpeed,medTIms51,medLcovD51,medLsd51,medLcov51,medLrmslD51,medLsdD51,norm_curvature,norm_speed
1,1,1,76,7,0.189493,0.589529,207.800939,333.861515,0.488721,0.806345,0.744766,0.020602,0.108687,1.259172,0.129068,-0.053610,-0.206204
2,1,2,76,7,0.204152,0.697338,223.294322,319.601546,0.518278,0.775451,0.506944,0.016446,0.078140,1.263366,0.105902,0.228271,0.155747
3,1,3,76,7,0.217347,0.786131,136.400753,272.460861,0.584193,0.511902,0.706127,0.047372,0.236467,0.782796,0.133917,-1.352643,0.962942
4,1,4,76,7,0.169569,1.020695,162.034626,289.927862,0.590151,0.416398,0.701803,0.039017,0.265361,0.487570,0.108637,-0.886268,1.035904
5,1,5,76,7,0.170786,0.769747,141.767674,300.625818,0.551706,0.550029,0.753161,0.036798,0.235907,0.646194,0.119610,-1.254999,0.565108
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,40,6,58,5,0.304933,0.715350,263.249490,393.654382,0.419596,0.989819,0.248116,0.009786,0.031676,2.273710,0.079016,0.955203,-1.052715
397,40,7,58,5,0.298630,0.758093,285.346624,436.866203,0.364656,1.154292,0.162838,0.005467,0.017858,2.618494,0.050360,1.357231,-1.725497
398,40,8,58,5,0.295032,0.715015,292.177991,422.242295,0.398149,1.077907,0.266188,0.009360,0.031146,2.475323,0.079460,1.481519,-1.315347
399,40,9,58,5,0.310277,0.710607,255.888346,392.841818,0.413573,1.023273,0.242811,0.009074,0.028856,2.338997,0.075648,0.821277,-1.126462


In [7]:
import plotly.express as px
fig = px.scatter(tableRes, x="norm_curvature", y="norm_speed", color="Decade",
                 size='Age', hover_data=['Age'])
fig.show()

### Optional assignment
- Use discrete colors for `Decade`. Tip: https://plotly.com/python/discrete-color/
- Select appropriate colors from https://colorbrewer2.org/
- Tip: use `color_discrete_sequence` (See DataVis-01Py)

### Matrix scatterplot
- Visualize pairwise relation between the 11 variables
- Based on the plotly documentation https://plotly.com/python/v3/legacy/scatterplot-matrix/
- We need wide format
- We need normalized data columns

### Normalize columns

In [8]:
# Select columns to normalize
cols_to_norm = tableRes.columns[4:15]
cols_to_norm

Index(['medLrms51', 'medTI51', 'medK', 'meanK', 'medSpeed', 'medTIms51',
       'medLcovD51', 'medLsd51', 'medLcov51', 'medLrmslD51', 'medLsdD51'],
      dtype='object')

In [9]:
tableRes[cols_to_norm] = tableRes[cols_to_norm].transform(lambda value: (value - value.mean()) / value.std())
tableRes.head()

Unnamed: 0,iSubj,trial,Age,Decade,medLrms51,medTI51,medK,meanK,medSpeed,medTIms51,medLcovD51,medLsd51,medLcov51,medLrmslD51,medLsdD51,norm_curvature,norm_speed
1,1,1,76,7,-0.406276,-1.179195,-0.05361,0.024586,-0.206204,0.09515,0.332978,-0.194534,-0.178508,0.045147,0.614619,-0.05361,-0.206204
2,1,2,76,7,-0.168225,-0.344193,0.228271,-0.226344,0.155747,-0.062731,-0.389216,-0.465559,-0.544866,0.054335,-0.174366,0.228271,0.155747
3,1,3,76,7,0.046049,0.343531,-1.352643,-1.05587,0.962942,-1.40957,0.215643,1.551026,1.353981,-0.998491,0.779784,-1.352643,0.962942
4,1,4,76,7,-0.729817,2.16029,-0.886268,-0.748506,1.035904,-1.897633,0.202514,1.006211,1.700509,-1.645271,-0.081213,-0.886268,1.035904
5,1,5,76,7,-0.710069,0.216632,-1.254999,-0.560256,0.565108,-1.214724,0.358471,0.861507,1.347267,-1.297759,0.292489,-1.254999,0.565108


In [10]:
#import chart_studio.plotly as py
from plotly.offline import iplot
import plotly.figure_factory as ff
fig = ff.create_scatterplotmatrix(tableRes[cols_to_norm], height=1000, width=1000)
fig.update_traces(marker=dict(size=2));

In [11]:
iplot(fig, filename='Basic Scatterplot Matrix')

### End of scatterplots part 1