# Scatter plots

In [None]:
%matplotlib inline

# standard
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# seaborn
import seaborn as sns
sns.set()
sns.set_context('notebook', font_scale=1.5)
cp = sns.color_palette()

# ggplot
import ggplot

# altair
import altair

# registering converters
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

## matplotlib

In [None]:
df = pd.read_csv('data/iris.csv')
df.head()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7.5, 5))

for i, s in enumerate(df.species.unique()):
    tmp = df[df.species == s]
    ax.scatter(tmp.petalLength, tmp.petalWidth,
               label=s)

ax.set(xlabel='Petal Length',
       ylabel='Petal Width',
       title='Petal Width v. Length -- by Species')

ax.legend(loc=2)
fig.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7.5, 5))

def scatter(group):
    plt.plot(group['petalLength'],
             group['petalWidth'],
             'o', label=group.name)

df.groupby('species').apply(scatter)

ax.set(xlabel='Petal Length',
       ylabel='Petal Width',
       title='Petal Width v. Length -- by Species')

ax.legend(loc=2)
fig.show()

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 5))

for i, s in enumerate(df.species.unique()):
    tmp = df[df.species == s]

    ax[i].scatter(tmp.petalLength, tmp.petalWidth, c=cp[i])

    ax[i].set(xlabel='Petal Length',
              ylabel='Petal Width',
              title=s)

fig.tight_layout()

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 5))

for i, s in enumerate(df.species.unique()):
    tmp = df[df.species == s]

    ax[i].scatter(tmp.petalLength,
                  tmp.petalWidth,
                  c=cp[i])

    ax[i].set(xlabel='Petal Length',
              ylabel='Petal Width',
              title=s)

    ax[i].set_ylim(bottom=0, top=1.05*np.max(df.petalWidth))
    ax[i].set_xlim(left=0, right=1.05*np.max(df.petalLength))
    
fig.tight_layout()

In [None]:
tmp_n = df.shape[0] - df.shape[0]/2

df['random_factor'] = np.random.permutation(['A'] * int(tmp_n) + ['B'] * int(df.shape[0] - tmp_n))
df.head()

In [None]:
fig, ax = plt.subplots(2, 3, figsize=(15, 10))

# this is preposterous -- don't do this
for i, s in enumerate(df.species.unique()):
    for j, r in enumerate(df.random_factor.sort_values().unique()):
        tmp = df[(df.species == s) & (df.random_factor == r)]

        ax[j][i].scatter(tmp.petalLength,
                         tmp.petalWidth,
                         c=cp[i+j])

        ax[j][i].set(xlabel='Petal Length',
                     ylabel='Petal Width',
                     title=s + '--' + r)

        ax[j][i].set_ylim(bottom=0, top=1.05*np.max(df.petalWidth))
        ax[j][i].set_xlim(left=0, right=1.05*np.max(df.petalLength))
    
fig.tight_layout()

## seaborn

In [None]:
g = sns.FacetGrid(df, hue='species', height=7.5)
g.map(plt.scatter, 'petalLength', 'petalWidth').add_legend()
g.ax.set_title('Petal Width v. Length -- by Species')

In [None]:
g = sns.FacetGrid(df, col='species', hue='species', height=5)
g.map(plt.scatter, 'petalLength', 'petalWidth')

In [None]:
g = sns.FacetGrid(df.assign(tmp=df.species + df.random_factor).\
                      sort_values(['species', 'random_factor']),
                  col='species', row='random_factor', hue='tmp', height=6)
g.map(plt.scatter, 'petalLength', 'petalWidth')

## ggplot

In [None]:
g = ggplot.ggplot(df, ggplot.aes(x='petalLength',
                   y='petalWidth',
                   color='species')) + \
        ggplot.geom_point(size=40.0) + \
        ggplot.ggtitle('Petal Width v. Length -- by Species')
g.show()

In [None]:
g = ggplot.ggplot(df, ggplot.aes(x='petalLength',
                   y='petalWidth',
                   color='species')) + \
        ggplot.facet_grid(y='species') + \
        ggplot.geom_point(size=40.0)
g.show()

In [None]:
g = ggplot.ggplot(df, ggplot.aes(x='petalLength',
                   y='petalWidth',
                   color='species')) + \
        ggplot.facet_grid(x='random_factor', y='species') + \
        ggplot.geom_point(size=40.0)
g.show()

## altair

In [None]:
c = altair.Chart(df).mark_point(filled=True).encode(
    x='petalLength',
    y='petalWidth',
    color='species'
)
c

In [None]:
c = altair.Chart(df).mark_point().encode(
    x='petalLength',
    y='petalWidth',
    color='species',
    column=altair.Column('species',
                  title='Petal Width v. Length by Species')
)
# c.configure_cell(height=300, width=300)
c.properties(width=300, height=300)

In [None]:
c = altair.Chart(df).mark_point().encode(
    x='petalLength',
    y='petalWidth',
    color='species',
    column=altair.Column('species',
                  title='Petal Width v. Length by Species'),
    row='random_factor'
)
# c.configure_cell(height=200, width=200)
c.properties(width=200, height=200)

## plotly

In [None]:
# from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
# init_notebook_mode(connected=True)
import plotly.express as px

In [None]:
fig = px.scatter(df, x='petalLength', y='petalWidth', color="species")
fig.show()

In [None]:
fig = px.scatter(df, x='petalLength', y='petalWidth', color="species",
                 size='sepalLength', hover_data=['sepalWidth'])
fig.show()

In [None]:
px.scatter_3d(df,x='petalLength',y='petalWidth',z='sepalLength',color='species')