# Grammar of Graphics Plotting
This tutorial is based on [Data Carpentry Python Ecology Lesson](https://datacarpentry.org/python-ecology-lesson)
Use plotnine library to create plots of structured data based on the R implementation of ggplot2 and [The Grammar of Graphics](http://link.springer.com/book/10.1007%2F0-387-28695-0) by Leland Wilkinson

In [None]:
%matplotlib inline
import plotnine as p9

In [None]:
import pandas as pd

surveys_complete = pd.read_csv('data/surveys.csv')
surveys_complete = surveys_complete.dropna()
surveys_complete.head()

## Introduction

In [None]:
# Bind plot to a dataframe
(p9.ggplot(data=surveys_complete))

In [None]:
# Define aes (Aesthetics) of the plot
# The most important aes mappings are: x, y, alpha, color, colour, fill, linetype, shape, size and stroke.
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length')))

In [None]:
# Choose how to plot data (points)
# Create
surveys_plot = p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
# Plot  
surveys_plot + p9.geom_point()

## Iterative building

In [None]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
    + p9.geom_point()
)

In [None]:
# Add transparency
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
    + p9.geom_point(alpha=0.1)
)

In [None]:
# Add colors
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
    + p9.geom_point(alpha=0.1, color='blue')
)

In [None]:
# Color each species differently
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
)

In [None]:
# Add elements like axis labels
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length', color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.xlab("Weight, g")
    + p9.ylab("Hind Foot Length, mm")
)

In [None]:
# Log scale for X-axis
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length', color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.xlab("Weight, g")
    + p9.ylab("Hind Foot Length, mm")
    + p9.scale_x_log10()
)

In [None]:
# Modify the plot theme e.g. white background
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length', color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.xlab("Weight, g")
    + p9.ylab("Hind Foot Length, mm")
    + p9.scale_x_log10()
    + p9.theme_bw()
    + p9.theme(text=p9.element_text(size=16))
)

In [None]:
# Bar Chart
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='plot_id'))
    + p9.geom_bar()
)

In [None]:
# Map a variable to fill color of bars
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='plot_id',
                          fill='sex'))
    + p9.geom_bar()
    + p9.scale_fill_manual(["blue", "orange"])
)

In [None]:
# Alternative to box plot - violin plot to show the shape of the distribution
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='species_id',
                          y='weight',
                          color='factor(plot_id)'))
    + p9.geom_jitter(alpha=0.3)
    + p9.geom_violin(alpha=0, color="0.7")
    + p9.scale_y_log10()
)

## Plotting Distributions

In [None]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='species_id',
                          y='weight'))
    + p9.geom_boxplot()
)

In [None]:
# Show points
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='species_id',
                          y='weight'))
    + p9.geom_jitter(alpha=0.2)
    + p9.geom_boxplot(alpha=0.)
)

## Plotting Time Series

In [None]:
yearly_counts = surveys_complete.groupby(['year', 'species_id'])['species_id'].count()
yearly_counts = yearly_counts.reset_index(name='counts')
yearly_counts.head()

In [None]:
(p9.ggplot(data=yearly_counts,
           mapping=p9.aes(x='year',
                          y='counts',
                          color='species_id'))
    + p9.geom_line()
)

## Faceting

In [None]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
)

In [None]:
# Wrap facet on a chosen variable, like 'sex'
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.facet_wrap("sex")
)

In [None]:
# Any category will work
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.facet_wrap("plot_id")
)

In [None]:
my_plot = (p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length', color='species_id')                                        )
    + p9.geom_point(alpha=0.2)
    + p9.scale_x_log10()
)
my_plot

In [None]:
my_plot.save('img/scatterplot.png', width=10, height=10, dpi=300)