In [1]:
import zipfile

import numpy as np
import pandas as pd

#with zipfile.ZipFile('bakery.csv.zip', 'r') as zip_ref:
#    zip_ref.extractall('.')
with zipfile.ZipFile('melb_clean.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('.')
with zipfile.ZipFile('nba.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('.')
#with zipfile.ZipFile('stocks_cleaned.csv.zip', 'r') as zip_ref:
#    zip_ref.extractall('.')

nba_org = pd.read_csv("nba.csv")

melb_clean = pd.read_csv("melb_clean.csv")
north = melb_clean[melb_clean["region"] == "Northern"]
south = melb_clean[melb_clean["region"] == "Southern"]

## Switch bokeh to the notebook mode
from bokeh.io import output_notebook
output_notebook()

## Import the libraries we need
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, NumeralTickFormatter, FactorRange
from bokeh.io import output_file, show

## Shooting guards versus small forwards

The sports media agency will be producing a blog comparing the importance of shooting guards and small forwards in points production. They have asked you to produce a scatter plot displaying points and assists per game for each of the two positions, using different glyph colors, sizes, and transparency.

The nba dataset has been filtered for "SG" and "SF", and preloaded for you as two Bokeh source objects called shooting_guards and small_forwards. A HoverTool has also been created to display "player", "team", and "field_goal_perc".

### Instructions
    - Add circle glyphs for points per game versus average assists using shooting_guards, setting size to 16 pixels, fill_color to red, and assigning 0.2 to the relevent keyword argument for glyph transparency.
    - Add circle glyphs to represent points versus average assists for small forwards, filled in green, 6 pixels in size, and with 0.6 glyph transparency.

In [2]:
shooting_guards = ColumnDataSource(nba_org[nba_org["position"] == "SG"])
small_forwards = ColumnDataSource(nba_org[nba_org["position"] == "SF"])

In [3]:
TOOLTIPS = [("Name", "@player"), ("Team", "@team"), ("Field Goal %", "@field_goal_perc{0.2f}")]
fig = figure(x_axis_label="Assists", y_axis_label="Points", title="Shooting Guard vs Small Forward", tooltips=TOOLTIPS)

# Add glyphs for shooting guards
fig.circle(x="assists", y="points", source=shooting_guards, legend_label="Shooting Guard", size=16, fill_color="red", fill_alpha=0.2)

# Add glyphs for small forwards
fig.circle(x="assists", y="points", source=small_forwards, legend_label="Small Forward", size=6, fill_color="green", fill_alpha=0.6)

output_file(filename="sg_vs_sf.html")
show(fig)

## Big shooters

Traditionally, the tallest basketball players are in the center position, and they primarily shoot close to the basket. However, there has been a trend towards all positions shooting more three-point field goals in recent years.

The agency has a scatter plot visualizing "three_point_perc" versus "field_goal_perc" for centers and power forwards in the NBA. The dataset has been filtered for each position, and Bokeh source objects named centers and power_forwards have been preloaded for you. TOOLTIPS has also been created to display player names and average points per game.

The agency has asked you to change the plot's glyph settings to aid interpretation.

### Instructions
    - Update the glyph attribute of center_glyphs and power_forward_glyphs to 20 pixels and 10 pixels, respectively.
    - Update the fill color attribute of center_glyphs and power_forward_glyphs to red and yellow, respectively.

In [4]:
centers = ColumnDataSource(nba_org[nba_org["position"] == "C"])
power_forwards = ColumnDataSource(nba_org[nba_org["position"] == "PF"])

In [5]:
fig = figure(x_axis_label="Field Goal Percentage", y_axis_label="Three Point Field Goal Percentage", tooltips = TOOLTIPS)
center_glyphs = fig.circle(x="field_goal_perc", y="three_point_perc", source=centers, legend_label="Center", fill_alpha=0.2)
power_forward_glyphs = fig.circle(x="field_goal_perc", y="three_point_perc", source=power_forwards, legend_label="Power Forward", fill_color="green", fill_alpha=0.6)

# Update glyph size
center_glyphs.glyph.size = 20
power_forward_glyphs.glyph.size = 10

# Update glyph fill_color
center_glyphs.glyph.fill_color = "red"
power_forward_glyphs.glyph.fill_color = "yellow"
output_file(filename="big_shooters.html")
show(fig)

## Evolution of the point guard

The agency is going to run an article on the evolution of the Point Guard position in basketball.

They have asked you to produce a line plot displaying points and assists for two players who have redefined the standards of this position - Steph Curry and Chris Paul. Two Bokeh source objects, steph and chris, have been preloaded for you along with a figure.

You will add line glyphs representing points and assists for the two players, using different glyph settings.

### Instructions
    - Add line glyphs to represent Steph Curry's average points, filling in green, and setting the width to 2 and transparency to 0.5.
    - Display Steph Curry's average assists as line glyphs filled in purple, setting the width to 4 and transparency to 0.3.
    - For Chris Paul's average points, add line glyphs filled in red, setting the width to 1 and transparency to 0.8.
    - Add line glyphs to represent Chris Paul's average assists, filled in orange, with a width of 3 and transparency of 0.2.

In [6]:
## Data doesn't seem to come from nba.csv, so once again I don't know how to do this graph.  Good job DataCamp on not sharing your data.

#steph = ColumnDataSource(nba_org[nba_org["player"] == "Stephen Curry"])
#chris = ColumnDataSource(nba_org[nba_org["player"] == "Chris Paul"])

In [None]:
fig = figure(x_axis_label="Season", y_axis_label="Performance")

# Add line glyphs for Steph Curry
fig.line(x="season", y="points", source=steph, line_width=2, line_color="green", alpha=0.5, legend_label="Steph Curry Points")
fig.line(x="season", y="assists", source=steph, line_width=4, line_color="purple", alpha=0.3, legend_label="Steph Curry Assists")

# Add line glyphs for Chris Paul
fig.line(x="season", y="points", source=chris, line_width=1, line_color="red", alpha=0.8, legend_label="Chris Paul Points")
fig.line(x="season", y="assists", source=chris, line_width=3, line_color="orange", alpha=0.2, legend_label="Chris Paul Assists")

output_file(filename="point_guards.html")
show(fig)

## Highlighting by glyph size

The sports media agency you worked with previously has contacted you as they would like some more visualizations! They've requested a plot that uses different size glyphs to communicate about player statistics.

The nba dataset has been preloaded for you, and subset into two DataFrames, east and west, for the East and West conferences. You'll create a plot visualizing points against assists, with the glyph size depending on how many blocks per game a player averages.

### Instructions
    - Use east and west to create east_sizes and west_sizes—dividing east["blocks"] and west["blocks"] by 5, respectively.
    - Add circle glyphs to fig representing points versus assists for east; use a blue fill color, a fill_alpha of 0.3, a legend label of "East", and set the radius to east_sizes.
    - Repeat the above for west, but fill in red, set a legend label of "West", and use west_sizes to change the glyph size.

In [8]:
east = nba_org[nba_org["conference"] == "East"]
west = nba_org[nba_org["conference"] == "West"]

In [9]:
# Create sizes
east_sizes = east["blocks"] / 5
west_sizes = west["blocks"] / 5
fig = figure(x_axis_label="Assists", y_axis_label="Points", title="NBA Points, Blocks, and Assists by Conference")

# Add circle glyphs
fig.circle(x=east["assists"], y=east["points"], fill_color="blue", radius=east_sizes, fill_alpha=0.3, legend_label="East")

# Add circle glyphs
fig.circle(x=west["assists"], y=west["points"], fill_color="red", radius=west_sizes, fill_alpha=0.3, legend_label="West")

output_file(filename="size_contrast.html")
show(fig)

## Steals vs. assists

The agency has heard about linear color mapping and would like you to incorporate it into a plot visualizing steals versus assists. You will use linear color mapping to change the glyph color as assists increase.

A source object called source has been created from the nba dataset and preloaded for you.

### Instructions
    - Import RdBu8 and linear_cmap from their respective classes.
    - Create mapper, adjusting color as the values of "assists" change using RdBu8.
    - Create a figure, with the title of "Steals vs. Assists".
    - Complete the call of circle glyphs to add the mapper.

In [10]:
source = ColumnDataSource(nba_org)
nba = nba_org

In [11]:
# Import required modules
from bokeh.palettes import RdBu8
from bokeh.transform import linear_cmap

# Create mapper
mapper = linear_cmap(field_name="assists", palette=RdBu8, low=min(nba["assists"]), high=max(nba["assists"]))

# Create the figure
fig = figure(x_axis_label="Steals", y_axis_label="Assists", title="Steals vs. Assists")

# Add circle glyphs
fig.circle(x="steals", y="assists", source=source, color=mapper)
output_file(filename="steals_vs_assists.html")
show(fig)

## Adding a color bar

The agency has requested you include a ColorBar so people viewing the plot will understand the thresholds at which the glyph color changes.

The figure from the previous exercise, a mapper, and glyphs, have all been provided for you.

### Instructions
    - Import ColorBar.
    - Create color_bar using the relevant key from mapper, and set width to 8 pixels.
    - Add color_bar to the figure's layout on the right-hand side.

In [12]:
# Import ColorBar
from bokeh.models import ColorBar

mapper = linear_cmap(field_name="assists", palette=RdBu8, low=min(nba["assists"]), high=max(nba["assists"]))
fig = figure(x_axis_label="Steals", y_axis_label="Assists", title="Steals vs. Assists")
fig.circle(x="steals", y="assists", source=source, color=mapper)

# Create the color_bar
color_bar = ColorBar(color_mapper=mapper['transform'], width=8)

# Update layout with color_bar on the right
fig.add_layout(color_bar, "right")
output_file(filename="steals_vs_assists_color_mapped.html")
show(fig)

## Free throw percentage by position

The agency has asked for one final plot from you. You'll use factor_cmap to build a scatter plot visualizing free throw percentage versus average points, displaying each player position as a different color.

A source object called source has been created from the nba dataset and preloaded for you. The variable TOOLTIPS, containing the name of the player, has also been created, so it can be viewed when hovering the mouse over the plot.

### Instructions
    - Import Category10_5 and factor_cmap.
    - Create positions, a list containing "PG", "SG", "SF", "PF", and "C".
    - Pass "position" to the legend_field argument and complete the fill_color argument within fig.circle().

In [13]:
# Import modules
from bokeh.palettes import Category10_5
from bokeh.transform import factor_cmap

# Create positions
positions = ["PG", "SG", "SF", "PF", "C"]
fig = figure(x_axis_label="Free Throw Percentage", y_axis_label="Points", title="Free Throw Percentage vs. Average Points", tooltips=TOOLTIPS)

# Add circle glyphs
fig.circle(x="free_throw_perc", y="points", source=source, legend_field="position", fill_color=factor_cmap("position", palette=Category10_5, factors=positions))

output_file(filename="average_points_vs_free_throw_percentage.html")
show(fig)

## Sales by time and type of day

The bakery you are working with is considering a review of their opening hours. As such, they have asked you to produce a visualization displaying sales information by the time of day for weekdays and weekends.

The day_time column of bakery contains four values: "Morning", "Afternoon", "Evening", and "Night".

The dataset also contains "Weekend" and "Weekday" values for the day_type column.

You will produce a grouped bar plot visualizing sales by both time and type of day. FactorRange has been imported for you.

The bakery dataset has been grouped by day_time and day_type, stored as grouped_bakery, and preloaded for you. A tuple containing every variation of these two columns has been stored as factors and also preloaded for you.

### Instructions
    - Create fig, setting the x_range by calling FactorRange() and passing *factors, assigning "Sales" to the y-axis label, and giving a title of "Sales by type of day".
    - Add bar glyphs for factors, with the top represented by grouped_bakery["sales"], and bar width to 90%.
    - Update the title font size to "25px".
    - Change the format of the title to center alignment.

In [14]:
factors = [('Weekday', 'Morning'), ('Weekday', 'Afternoon'), ('Weekday', 'Evening'),
             ('Weekday', 'Night'), ('Weekend', 'Morning'), ('Weekend', 'Afternoon'), ('Weekend', 'Evening'),
             ('Weekend', 'Night')]

with zipfile.ZipFile('bakery.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('.')
bakery_org = pd.read_csv("bakery.csv")
grouped_bakery = bakery_org[["day_time", "day_type", "sales"]].groupby(["day_time", "day_type"]).sum().reset_index()

In [15]:
# Create figure
fig = figure(x_range=FactorRange(*factors), y_axis_label="Sales", title="Sales by type of day")

# Create bar glyphs
fig.vbar(x=factors, top=grouped_bakery["sales"], width=0.9)
fig.yaxis[0].formatter = NumeralTickFormatter(format="$0,0")

# Update title text size
fig.title.text_font_size = "25px"

# Update title alignment
fig.title.align = "center"

output_file("sales_by_type_of_day.html")
show(fig)

## Products sold by the time of day

The bakery would like a view of how many products are sold at different times of the day.

A figure, fig, has been set up and preloaded, including a HoverTool to display the time of day, item name, and the number of items sold.

You will need to modify add a title to the legend so stakeholders understand its meaning, move the legend to avoid obstructing the view of observations and change the legend to hide observations upon clicking.

### Instructions
    - Add a title to the legend called "Time of Day".
    - Move the legend to the top left corner.
    - Make the legend interactive, hiding observations by their legend label upon click.

In [None]:
fig = figure(x_axis_label="Count of Products Sold", y_axis_label="Sales", title="Bakery Product Sales", tooltips=TOOLTIPS)
fig.circle(x="count", y="sales", source=morning, line_color="red", size=12, fill_alpha=0.4, legend_label="Morning")
fig.circle(x="count", y="sales", source=afternoon, fill_color="purple", size=10, fill_alpha=0.6, legend_label="Afternoon")
fig.circle(x="count", y="sales", source=evening, fill_color="yellow", size=8, fill_alpha=0.6, legend_label="Evening")

# Add legend title
fig.legend.title = "Time of Day"

# Move the legend
fig.legend.location = "top_left"

# Make the legend interactive
fig.legend.click_policy = "hide"
fig.yaxis[0].formatter = NumeralTickFormatter(format="$0.00")
output_file("Sales_by_time_of_day")
show(fig)

## Box annotations for sales performance

The bakery has asked for one last plot from you. The visualization will display sales by date with two box annotations, so they can easily see which dates are under their revenue target of $250.

A figure, fig, with line glyphs, has been created using the code below:

```python
sales = bakery.groupby("date", as_index=False)["sales"].sum()
source = ColumnDataSource(data=sales)
fig = figure(x_axis_label="Date", y_axis_label="Revenue ($)")
fig.line(x="date", y="sales", source=source)
fig.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")
```

fig is preloaded for you. Your task is to create box annotations to show where sales in the bakery dataset are above or below $250.

### Instructions 1/2
    - Create low_box, setting the top limit equal to $250, transparency to 0.1, and fill_color to "red".
    - Create high_box, setting the bottom limit equal to $250, transparency to 0.2, and fill_color to "green".

In [None]:
# Create low_box
low_box = BoxAnnotation(top=250, fill_alpha=0.1, fill_color='red')

# Create high_box
high_box = BoxAnnotation(bottom=250, fill_alpha=0.2, fill_color='green')

### Instructions 2/2
    - Add low_box to the figure.
    - Update the figure to include high_box.

In [None]:
# Create low_box
low_box = BoxAnnotation(top=250, fill_alpha=0.1, fill_color='red')

# Create high_box
high_box = BoxAnnotation(bottom=250, fill_alpha=0.2, fill_color='green')

# Add low_box
fig.add_layout(low_box)

# Add high_box
fig.add_layout(high_box)

output_file(filename="sales_annotated.html")
show(fig)

## Setting up a polygon annotation

A member of a hedge fund, who is an avid sports fan, saw your work for the sports media agency and has reached out as they need some plots produced for stock market analysis.

They are looking into the online media market and have provided you with a dataset called netflix, containing stock prices for Netflix. It has been stored as a source object called source and preloaded for you.

A figure, fig, has been created containing line glyphs. They would like you to highlight a period of significant growth. You plan to use a polygon annotation to draw attention to changes in Netflix's stock price in mid-2017 and add it to the line plot. To start, you need to create the start and end dates and timestamps.

### Instructions
    - Import PolyAnnotation.
    - Set start_date as 30th June 2017 and end_date as 27th July 2017.
    - Create start_float and end_float by calling .timestamp() and multiplying the result by 1000.

In [None]:
# Import PolyAnnotation
from bokeh.models import PolyAnnotation

# Create start and end dates
start_date = dt.datetime(2017, 6, 30)
end_date = dt.datetime(2017, 7, 27)

# Create start and end floats
start_float = start_date.timestamp() * 1000
end_float = end_date.timestamp() * 1000

## Annotating Netflix stock price growth

As a reminder, you previously created dates and timestamps, displayed below:

```python
start_date = dt.datetime(2017, 6, 30)
end_date = dt.datetime(2017, 7, 27)
start_float = start_date.timestamp() * 1000
end_float = end_date.timestamp() * 1000
```

The final steps to display the Netflix line plot with a polygon annotation are to subset the data for the stock price, call PolyAnnotation(), and add the annotation to the figure's layout.

### Instructions
    - Create start_data by subsetting netflix for the row where "date" equals start_date.
    - Repeat for end_data to get the close value from end_date.
    - Create polygon, fill in "green", with 0.4 transparency, and finish the xs and ys arguments.
    - Add polygon to the figure's layout.

In [None]:
# Create start and end data
start_data = netflix.loc[netflix["date"] == start_date]["close"].values[0]
end_data = netflix.loc[netflix["date"] == end_date]["close"].values[0]

# Create polygon annotation
polygon = PolyAnnotation(fill_color="green", fill_alpha=0.4,
                         xs=[start_float, start_float, end_float, end_float],
                         ys=[start_data - 10, start_data + 10, end_data + 15, end_data - 15])

# Add polygon to figure and display
fig.add_layout(polygon)
output_file(filename="netflix_annotated.html")
show(fig)