https://aeturrell.github.io/python4DS/vis-layers.html

In [1]:
import pandas as pd
from lets_plot import *

LetsPlot.setup_html()

In [2]:
mpg = pd.read_csv(
    "https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/mpg.csv", index_col=0
)

mpg = mpg.astype(
    {
        "manufacturer": "category",
        "model": "category",
        "displ": "double",
        "year": "int64",
        "cyl": "int64",
        "trans": "category",
        "drv": "category",
        "cty": "double",
        "hwy": "double",
        "fl": "category",
        "class": "category",
    }
)
mpg.head()

Unnamed: 0_level_0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,audi,a4,1.8,1999,4,auto(l5),f,18.0,29.0,p,compact
2,audi,a4,1.8,1999,4,manual(m5),f,21.0,29.0,p,compact
3,audi,a4,2.0,2008,4,manual(m6),f,20.0,31.0,p,compact
4,audi,a4,2.0,2008,4,auto(av),f,21.0,30.0,p,compact
5,audi,a4,2.8,1999,6,auto(l5),f,16.0,26.0,p,compact


In [3]:
# Let’s start by visualising the relationship between displ and hwy for various classes of cars. We can do this with a scatterplot where the numerical variables are mapped to the x and y aesthetics and the categorical variable is mapped to an aesthetic like color or shape.

(ggplot(mpg, aes(x="displ", y="hwy", color="class")) + geom_point())

In [4]:
(ggplot(mpg, aes(x="displ", y="hwy", shape="class")) + geom_point())

In [7]:
# Similarly, we can map class to size or alpha aesthetics as well, which control the shape and the transparency of the points, respectively.

(ggplot(mpg, aes(x="displ", y="hwy", size="class")) + geom_point())

In [8]:
(ggplot(mpg, aes(x="displ", y="hwy", alpha="class")) + geom_point())

In [None]:
# Here, geom_smooth() separates the cars into three lines based on their drv value, which describes a car’s drive train. One line describes all of the points that have a 4 value, one line describes all of the points that have an f value, and one line describes all of the points that have an r value. Here, 4 stands for four-wheel drive, f for front-wheel drive, and r for rear-wheel drive.

(ggplot(mpg, aes(x="displ", y="hwy", linetype="drv")) + geom_smooth(method="loess"))

In [10]:
# If this is too confusing, we can make it clearer by overlaying the lines on top of the raw data and then coloring everything according to drv.

(
    ggplot(mpg, aes(x="displ", y="hwy", color="drv"))
    + geom_point()
    + geom_smooth(aes(linetype="drv"), method="loess")
)

In [11]:
# Note that if you place mappings in a geom function, lets-plot will treat them as local mappings for the layer. It will use these mappings to extend or overwrite the global mappings for that layer only. This makes it possible to display different aesthetics in different layers.

(ggplot(mpg, aes(x="displ", y="hwy")) + geom_point(aes(color="class")) + geom_smooth())