In [None]:
# For Google Colab (other requirements should be fine, but can be found in requirements.txt)
!pip install plotly==4.14.3
from google.colab import drive
drive.mount('/content/drive')

# Set sys path
import sys
dir_location = '/content/drive/MyDrive/py_data_visualization' # TODO: Specify here the path of the repository
sys.path.append(dir_location)

In [None]:
# We need those packages for all tasks
import pandas as pd
import numpy as np
import pickle
from core.validation import check

#  Data visualization with Python

A participant of this year's Hüttenseminar has forgotton to evaluate his phantom measurements and to prepare for the next presentation about cryptocurrency. 
Can you help to solve his tasks such that the participant can join the games night?

## Task 1: Classic plotting with Matplotlib

Matplotlib is probably the standard visualization tool in Python and can be used by beginners or experienced professionals.

- High amount of flexibility
- Pyplot interface is similar to MATLAB programming
- Graphs are of publication quality

### Proton-Density Fat Fraction (PDFF) and slice visualization

We would like to visualize the measured fat fraction for a single slice. We can therefore use the `imshow` function from matplotlib.

<b>Question: Which vial is placed directly next to the 40(-50)% FF vial?</b>

<b>a) 20 %</b>
<b>b) 60 %</b>

In [None]:
# Data loading
with open(f'{dir_location}/data/fat_fraction.pickle', 'rb') as file:
    img = pickle.load(file)
print(img.shape)

In [None]:
# Import matplotlib and set default values
# Matplotlib also has an interactive mode, but it is not available in Google Colab
import matplotlib.pyplot as plt
%matplotlib inline
colormap = 'inferno' # We are using the BMRR's corporate colormap for PDFF
lim = [-5, 100]

### TODO: Display the data (img) as an image
#
#
#
###

ax.set_title('PDFF (%)')
plt.colorbar(im, ax=ax)
plt.show()

## Task 2: Beautiful visualization with Seaborn

Seaborn is built on top of matplotlib and simplifies the interface for plotting.

- Data visualization with less code
- Supports many advanced plots (swarm plot, categorical plotting, distribution plotting, relational plotting)

### Phantom homogeneity

Next, we are interested in the homogeneity of the measured phantom. We can evaluate the homogeneity based on a kernel density estimate using the `boxplot`/`violinplot` of seaborn.

<b>Question: Which vial is the most homogenous?</b>

<b>a) 0 %</b>
<b>b) 20 %</b>
<b>c) 60 %</b>

In [None]:
# We use a 10x10 pixel ROI
centers = []
# The center for each vial
centers.append(([175, 181], 60))
centers.append(([119, 154], 40))
centers.append(([120, 127], 20))
centers.append(([174, 99], 0))

# Constructing the pandas DataFrame
size = 5
values = np.array([])
labels = np.array([])
for center in centers:
    x, y = center[0]
    values_label = img[x-size:x+size, y-size:y+size].flatten()
    labels = np.concatenate((labels, np.ones_like(values_label)*center[1]))
    values = np.concatenate((values, values_label))
    
df = pd.DataFrame({"Measured PDFF (%)": values, "Expected PDFF (%)": labels})
print(df)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(5,5))

### TODO: Visualize the homogeneity for each vial/ROI
#
#
#
###

plt.show()

## Task 3: Modern and interactive plotting with Plotly

Plotly is a graphing library for interactive and publication-quality graphs.
- Interactive plotting for Jupyter Notebook and Web Browsers
- Supports a wide range of plots
- Allows storytelling by animations

### Simple image viewer

Something went wrong with the DICOM export! A colleague said that for one specific echo time and slice, the image is wrong (no signal at all). Now, we need to find this slice.

We can build a simple animated image viewer using the `imshow` function from plotly. 

<b>Question: Which is the echo and slice number for the wrong image?</b>

The first answer is the echo number and the second answer is the slice number (animation_frame).

In [None]:
# Data loading
with open(f'{dir_location}/data/signal.pickle', 'rb') as file:
    sig = pickle.load(file)
print(sig.shape) # (n_rows, n_columns, n_slices, n_echoes)

In [None]:
import plotly.express as px

### TODO: Implement a simple image viewer
#
#
#
###

fig.for_each_annotation(lambda a: a.update(text=f'Echo: {int(a.text.split("=")[1])+1}')) # Change the text for each column
fig.show()

## Task 4: Interactive and flexible plotting with Bokeh

Bokeh is a data visualization library for modern web interfaces.

- Interactive visualizations without writing any JavaScript
- Supports many unique visualizations (i.e. geospatial plots, network graphs)
- Easy to link plots

### Bitcoin and Ethereum 

Finally, all tasks related to the phantom are solved. Let's prepare for the next presentation. We would like to  interactively visualize the price for Bitcoin and Ethereum using the line plot from Bokeh.

<b>Question: What was the price for Ethereum for the day when Bitcoin had its heighest value?</b>

<b>a) 2.300 USD</b>
<b>b) 4.080 USD</b>
<b>c) 2.000 USD</b>
<b>d) 1.600 USD</b>

In [None]:
df_BTC=pd.read_csv(f'{dir_location}/data/BTC-USD.csv', sep=',')
df_ETH=pd.read_csv(f'{dir_location}/data/ETH-USD.csv', sep=',')
df_BTC

In [None]:
# Simple plot using matplotlib and pandas
df_BTC.drop(['Volume'], axis=1).plot.line()

In [None]:
from bokeh.plotting import figure, show
from bokeh.models.tools import HoverTool
from bokeh.io import output_notebook
output_notebook() # generate output in notebook cells 

### TODO: create a new plot with a title and axis labels and 
# add line renderer to the plot
#
#
#
###

# add an interactive inspector tool
hover = HoverTool(
    tooltips = [
        ("Date", "@x{%F}"),
        ("Value", "@y")
    ], 
    formatters={"@x":"datetime"},
    mode='vline'
)
p.add_tools(hover)

In [None]:
show(p)

## Task 5: Declarative visualization with Altair

Altair is a tool designed for visualization with a minimum amount of code.
- Data visualization with less code
- Easy to link plots

### Craft beers

Games night! But we want to choose a special beer. We are therefore using two linked plot to visualize different parameters for our beer selection. Only US craft beers are available. We need to add an interactive `scatter` plot from Altair.

<b>Question: We are interested in a beer from the US state with the highest count of breweries. What is the beer with the highest amount of bitterness (highest ibu)? If there are several beers, you should choose the beer with the highest amount of alcohol (abv).</b>

The answer is the first letter of the beer's name (lower case).

In [None]:
df_beers=pd.read_csv(f'{dir_location}/data/beers.csv', sep=',', index_col=0)
df_breweries=pd.read_csv(f'{dir_location}/data/breweries.csv', sep=',', index_col=0)

# drop missing values
df_beers = df_beers.dropna(subset=['abv', 'ibu', 'style'])
# add state information to each beer
df_beers['state'] = df_breweries.iloc[df_beers.brewery_id.values].state.values
df_beers

In [None]:
import altair as alt
selection = alt.selection_single(encodings=['x'])

### TODO: Add an interactive scatter plot. A 'tooltip' showing the beer's name would be useful
#
#
#
###

bar = alt.Chart(df_beers).transform_aggregate(
    count='count()',
    groupby=['state']
).mark_bar().encode(
    x='state:O',
    y='count:Q',
    color=alt.condition(selection, alt.value('steelblue'), alt.value('lightgray')) # The color is chosen based on the selection
).add_selection(
    selection
)

bar
#scatter | bar

## Final Task: Are you a python data visualization expert?

Now, you need to combine the answers from each task. 
For example:

Task 1: c <br>
Task 2: e <br>
Task 3: 5, 60 <br>
Task 4: e <br>
Task 5: z <br>
 --> ce560ez

In [None]:
check(your_answer='ce560ez')