# Basic charts
## Load libraries

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.data as data
import pandas as pd
import numpy as np
import collections

## Scatter Plots

### General scatterplot framework

In [None]:
fig = px.scatter(x=[0, 1, 2, 3, 4], y=[0, 1, 4, 9, 16])
fig.show()

### Scatterplot using a dataframe

In [None]:
df = px.data.iris()
fig = px.scatter(df, x="sepal_width", y="sepal_length")
fig.show()

### Add another hover variable

In [None]:
fig = px.scatter(
    data_frame=px.data.iris(),
    x="sepal_width",
    y="sepal_length",
    color="species",
    size="petal_length",
    hover_data=["petal_width"]
)
fig.show()

### Colors based on a continuous variable

In [None]:
fig = px.scatter(
    data_frame=px.data.iris(),
    x="sepal_width",
    y="sepal_length",
    color="petal_length"
)
fig.show()

### Add a plotting symbol

In [None]:
fig = px.scatter(
    data_frame=px.data.iris(),
    x="sepal_width",
    y="sepal_length",
    color="species",
    symbol="species"
)
fig.show()

### Scatter plots and Categorical Axes

In [None]:
df = px.data.medals_long()
fig = px.scatter(
    data_frame=df,
    y='nation',
    x='count',
    color='medal',
    symbol='medal'
)
fig.show()

### Grouped Scatter Points

In [None]:
from matplotlib.pyplot import scatter
fig = px.scatter(
    data_frame=df,
    y='count',
    x='nation',
    color='medal'
)
fig.update_traces(marker_size=10)
fig.update_layout(scattermode='group')
fig.show()

### Control the width of the group

In [None]:
from matplotlib.pyplot import scatter
fig = px.scatter(
    data_frame=df,
    y='count',
    x='nation',
    color='medal'
)
fig.update_traces(marker_size=10)
fig.update_layout(scattermode='group', scattergap=0.75)
fig.show()

### Error bars

In [None]:
df = px.data.iris()
df['e'] = df['sepal_width'] / 100
fig = px.scatter(
    data_frame=df,
    x='sepal_width',
    y='sepal_length',
    color='species',
    error_x='e',
    error_y='e'
)
fig.show()

### Marginal Distribution Plots

In [None]:
df = px.data.iris()
fig = px.scatter(
    data_frame=df,
    x='sepal_width',
    y='sepal_length',
    marginal_x='histogram',
    marginal_y='rug'
)
fig.show()

### Facetting

In [None]:
df = px.data.tips()
fig = px.scatter(
    data_frame=df,
    x='total_bill',
    y='tip',
    color='smoker',
    facet_col='sex',
    facet_row='time'
)
fig.show()

### Linear Regression and Other Trendlines

In [None]:
fig = px.scatter(
    data_frame=df,
    x='total_bill',
    y='tip',
    trendline='ols'
)
fig.show()

### Line plots with Plotly Express

In [None]:
t = np.linspace(0, 2*np.pi, 100)
fig = px.line(
    x=t,
    y=np.cos(t),
    labels={'x': 't', 'y': 'cos(t)'},
    title='Cosine Wave'
)
fig.show()

In [None]:
df = px.data.gapminder().query('continent == "Oceania"')
fig = px.line(
    data_frame=df,
    x='year',
    y='lifeExp',
    color='country'
)
fig.show()

In [None]:
fig = px.line(
    data_frame=df,
    x='year',
    y='lifeExp',
    color='country',
    markers=True
)
fig.show()

In [None]:
fig = px.line(
    data_frame=df,
    x='year',
    y='lifeExp',
    color='country',
    symbol='country'
)
fig.show()

### Line plots on Date axes

In [None]:
df = px.data.stocks()
fig = px.line(
    data_frame=df,
    x='date',
    y='GOOG'
)
fig.show()

### Date Order in Scatter amd :ome Charts

In [None]:
df = pd.DataFrame(dict(
    x=[1, 3, 2, 4],
    y=[1, 2, 3, 4]
))
fig = px.line(df, x="x", y="y", title="Unsorted Input")
fig.show()

df = df.sort_values(by="x")
fig = px.line(df, x="x", y="y", title="Sorted Input")
fig.show()

### Connected Scatterplots

In [None]:
df = px.data.gapminder().query("country in ['Canada', 'Botswana']")

fig = px.line(
    data_frame=df,
    x='lifeExp',
    y='gdpPercap',
    color='country',
    text='year'
)
fig.update_traces(textposition='bottom right')
fig.show()

### Swarm (or Beeswarm) Plots

In [None]:
def negative_1_if_count_is_odd(count):
    # if this is an odd numbered entry in its bin, make its y coordinate negative
    # the y coordinate of the first entry is 0, so entries 3, 5, and 7 get
    # negative y coordinates
    if count % 2 == 1:
        return -1
    else:
        return 1


def swarm(
    X_series,
    fig_title,
    point_size=16,
    fig_width=800,
    gap_multiplier=1.2,
    bin_fraction=0.95,  # slightly undersizes the bins to avoid collisions
):
    # sorting will align columns in attractive c-shaped arcs rather than having
    # columns that vary unpredictably in the x-dimension.
    # We also exploit the fact that sorting means we see bins sequentially when
    # we add collision prevention offsets.
    X_series = X_series.copy().sort_values()

    # we need to reason in terms of the marker size that is measured in px
    # so we need to think about each x-coordinate as being a fraction of the way from the
    # minimum X value to the maximum X value
    min_x = min(X_series)
    max_x = max(X_series)

    list_of_rows = []
    # we will count the number of points in each "bin" / vertical strip of the graph
    # to be able to assign a y-coordinate that avoids overlapping
    bin_counter = collections.Counter()

    for x_val in X_series:
        # assign this x_value to bin number
        # each bin is a vertical strip slightly narrower than one marker
        bin = (((fig_width*bin_fraction*(x_val-min_x))/(max_x-min_x)) // point_size)

        # update the count of dots in that strip
        bin_counter.update([bin])

        # remember the "y-slot" which tells us the number of points in this bin and is sufficient to compute the y coordinate unless there's a collision with the point to its left
        list_of_rows.append(
            {"x": x_val, "y_slot": bin_counter[bin], "bin": bin})

    # iterate through the points and "offset" any that are colliding with a
    # point to their left apply the offsets to all subsequent points in the same bin.
    # this arranges points in an attractive swarm c-curve where the points
    # toward the edges are (weakly) further right.
    bin = 0
    offset = 0
    for row in list_of_rows:
        if bin != row["bin"]:
            # we have moved to a new bin, so we need to reset the offset
            bin = row["bin"]
            offset = 0
        # see if we need to "look left" to avoid a possible collision
        for other_row in list_of_rows:
            if (other_row["bin"] == bin-1):
                # "bubble" the entry up until we find a slot that avoids a collision
                while ((other_row["y_slot"] == row["y_slot"]+offset)
                       and (((fig_width*(row["x"]-other_row["x"]))/(max_x-min_x)
                             // point_size) < 1)):
                    offset += 1
                    # update the bin count so we know whether the number of
                    # *used* slots is even or odd
                    bin_counter.update([bin])

        row["y_slot"] += offset
        # The collision free y coordinate gives the items in a vertical bin
        # y-coordinates to evenly spread their locations above and below the
        # y-axis (we'll make a correction below to deal with even numbers of
        # entries).  For now, we'll assign 0, 1, -1, 2, -2, 3, -3 ... and so on.
        # We scale this by the point_size*gap_multiplier to get a y coordinate
        # in px.
        row["y"] = (row["y_slot"]//2) * \
            negative_1_if_count_is_odd(row["y_slot"])*point_size*gap_multiplier

    # if the number of points is even, move y-coordinates down to put an equal
    # number of entries above and below the axis
    for row in list_of_rows:
        if bin_counter[row["bin"]] % 2 == 0:
            row["y"] -= point_size*gap_multiplier/2

    df = pd.DataFrame(list_of_rows)
    # One way to make this code more flexible to e.g. handle multiple categories
    # would be to return a list of "swarmified" y coordinates here and then plot
    # outside the function.
    # That generalization would let you "swarmify" y coordinates for each
    # category and add category specific offsets to put the each category in its
    # own row

    fig = px.scatter(
        df,
        x="x",
        y="y",
        title=fig_title,
    )
    # we want to suppress the y coordinate in the hover value because the
    # y-coordinate is irrelevant/misleading
    fig.update_traces(
        marker_size=point_size,
        # suppress the y coordinate because the y-coordinate is irrelevant
        hovertemplate="<b>value</b>: %{x}",
    )
    # we have to set the width and height because we aim to avoid icon collisions
    # and we specify the icon size in the same units as the width and height
    fig.update_layout(width=fig_width, height=(
        point_size*max(bin_counter.values())+200))
    fig.update_yaxes(
        showticklabels=False,  # Turn off y-axis labels
        ticks='',               # Remove the ticks
        title=""
    )
    return fig


df = px.data.iris()  # iris is a pandas DataFrame
fig = swarm(df["sepal_length"],
            "Sepal length distribution from 150 iris samples")
# The iris data set entries are rounded so there are no collisions.
# a more interesting test case for collision avoidance is:
# fig = swarm(pd.Series([1, 1.5, 1.78, 1.79, 1.85, 2,
#            2, 2, 2, 3, 3, 2.05, 2.1, 2.2, 2.5, 12]))
fig.show()

## Scatter and line plots with go.Scatter
### Simple Scatter Plot

In [None]:
N = 1000
t = np.linspace(0, 10, 100)
y = np.sin(t)

fig = go.Figure(
    data=go.Scatter(x=t, y=y, mode='markers+lines')
)

fig.show()

In [None]:
np.random.seed(1)

N = 100
random_x = np.linspace(0, 1, N)
random_y0 = np.random.randn(N) + 5
random_y1 = np.random.randn(N)
random_y2 = np.random.randn(N) - 5

fig = go.Figure()

# Add traces

fig.add_trace(go.Scatter(
    x=random_x,
    y=random_y0,
    mode='markers',
    name='markers'
))
fig.add_trace(go.Scatter(
    x=random_x,
    y=random_y1,
    mode='lines+markers',
    name='lines+markers'
))
fig.add_trace(go.Scatter(
    x=random_x,
    y=random_y2,
    mode='lines',
    name='lines'
))

fig.show()

### Bubble Scatter Plots

In [None]:
x = [1, 2, 3, 4, 5]
y = [10, 11, 12, 13, 14]

fig = go.Figure(
    data=go.Scatter(x=x,
                    y=y,
                    mode='markers',
                    marker=dict(
                        size=[40, 60, 80, 100, 120],
                        color=[0, 1, 2, 3, 4]
                    ))
)

fig.show()

### Style Scatter Plots

In [None]:
t = np.linspace(0, 10, 100)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=t,
    y=np.sin(t),
    mode='markers',
    name='sin(t)',
    marker_color='rgba(152, 0, 0, .8)'
))

fig.add_trace(go.Scatter(
    x=t,
    y=np.cos(t),
    mode='markers',
    name='cos(t)',
    marker_color='rgba(255, 182, 193, .8)'
))

fig.update_traces(
    mode='markers',
    marker_line_width=2,
    marker_size=10
)

fig.update_layout(
    title=dict(
        text="Marker Styling"),
    yaxis_zeroline=False,
    xaxis_zeroline=False
)

fig.show()

### Data Labels on Hover

In [None]:
data = pd.read_csv(
    "https://raw.githubusercontent.com/plotly/datasets/master/2014_usa_states.csv")

fig = go.Figure(data=go.Scatter(x=data['Postal'],
                                y=data['Population'],
                                mode='markers',
                                marker_color=data['Population'],
                                text=data['State']))  # hover text goes here

fig.update_layout(title=dict(text='Population of USA States'))
fig.show()

### Scatter with Color Dimension

In [None]:
from _operator import mod
ray_style = dict(
    size=16,
    color=np.random.randn(500),
    colorscale='Viridis',
    showscale=True
)
fig = go.Figure(data=go.Scatter(
    y = np.random.randn(500),
    mode='markers',
    marker=ray_style
))
fig.show()


### Trace Zorder

In [None]:
df_europe = data.gapminder().query("continent == 'Europe'")

trace1 = go.Scatter(x=df_europe.query("country == 'France'")['year'],
                    y=df_europe.query("country == 'France'")['lifeExp'],
                    mode='lines+markers',
                    zorder=3,
                    name='France',
                    marker=dict(size=15))

trace2 = go.Scatter(x=df_europe.query("country == 'Germany'")['year'],
                    y=df_europe.query("country == 'Germany'")['lifeExp'],
                    mode='lines+markers',
                    zorder=1,
                    name='Germany',
                    marker=dict(size=15))

trace3 = go.Scatter(x=df_europe.query("country == 'Spain'")['year'],
                    y=df_europe.query("country == 'Spain'")['lifeExp'],
                    mode='lines+markers',
                    zorder=2,
                    name='Spain',
                    marker=dict(size=15))

layout = go.Layout(title=dict(text='Life Expectancy in Europe Over Time'))

fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)

fig.show()

### Large Data Sets

In [None]:
N = 100000

fig = go.Figure(data=go.Scatter(
    x=np.random.randn(N),
    y=np.random.randn(N),
    mode='markers',
    marker=dict(
        color=np.random.randn(N),
        colorscale='Viridis',
        line_width=1,
    )
))

fig.show()

In [None]:
N = 100000
r = np.random.uniform(0, 1, N)
theta = np.random.uniform(0, 2*np.pi, N)
fig = go.Figure(data=go.Scattergl(
    x=r * np.cos(theta),
    y=r * np.sin(theta),
    mode='markers',
    marker=dict(
        color=np.random.randn(N),
        colorscale='Viridis',
        line_width=1,
    )
))

fig.show()