In [14]:
import numpy as np
import pandas as pd
import random
import math
from lets_plot import *
LetsPlot.setup_html()

In [3]:
np.random.seed(12)
data = dict(
    cond=np.repeat(['A','B'], 200),
    rating=np.concatenate((np.random.normal(0, 1, 200), np.random.normal(1, 1.5, 200)))
)

ggplot(data, aes(x='rating', fill='cond')) + ggsize(500, 250) \
+ geom_density(color='dark_green', alpha=.7) + scale_fill_brewer(type='seq') \
+ theme(axis_line_y='blank')

In [6]:
# This example was found at: www.cookbook-r.com/Graphs/Scatterplots_(ggplot2)
random.seed(123)
data = dict(
    cond=np.repeat(['A','B'], 10),
    xvar=[i + random.normalvariate(0, 3) for i in range(0,20)],
    yvar=[i + random.normalvariate(0, 3) for i in range(0,20)]
)

In [7]:
p = ggplot(data, aes(x='xvar', y='yvar')) + ggsize(300, 250)
p + geom_point(shape=1) 

In [8]:
p + geom_point(shape=1) + geom_smooth() 

In [9]:
p1 = ggplot(data, aes(x='xvar', y='yvar', color='cond')) + ggsize(500, 250)
p1 + geom_point(shape=1) + geom_smooth(se=False)

In [15]:
n = 150
x_range = np.arange(-2 * math.pi, 2 * math.pi, 4 * math.pi / n)
y_range = np.sin(x_range) + np.array([random.uniform(-.5, .5) for i in range(n)])
df = pd.DataFrame({ 'x' : x_range, 'y' : y_range })

In [16]:
p = ggplot(df, aes(x='x', y='y')) + geom_point(shape=21, fill='yellow', color='#8c564b')
p1 = p + geom_smooth(method='loess', size=1.5, color='#d62728') + ggtitle('default (span = 0.5)')
p2 = p + geom_smooth(method='loess', span=.2, size=1.5, color='#9467bd') + ggtitle('span = 0.2')
p3 = p + geom_smooth(method='loess', span=.7, size=1.5, color='#1f77b4') + ggtitle('span = 0.7')
p4 = p + geom_smooth(method='loess', span=1, size=1.5, color='#2ca02c') + ggtitle('span = 1')

bunch = GGBunch()
bunch.add_plot(p1, 0, 0, 400, 300)
bunch.add_plot(p2, 400, 0, 400, 300)
bunch.add_plot(p3, 0, 300, 400, 300)
bunch.add_plot(p4, 400, 300, 400, 300)
bunch.show()

In [18]:
np.random.seed(123)
data = pd.DataFrame(dict(
    cond=np.repeat(['A','B'], 200),
    rating=np.concatenate((np.random.normal(0, 1, 200), np.random.normal(.8, 1, 200)))
))

In [19]:
# Basic histogram of "rating"
p = ggplot(data, aes(x='rating')) + ggsize(500, 250)
p + geom_histogram(binwidth=.5)

In [20]:
# Histogram overlaid with kernel density curve
#  - histogram with density instead of count on y-axis
#  - overlay with transparent density plot
(p 
 + geom_histogram(aes(y='..density..'), binwidth=.5, colour="black", fill="white") 
 + geom_density(alpha=.2, fill=0xFF6666)
) 

In [21]:
(p 
 + geom_histogram(binwidth=.5, colour="black", fill="white") \
 + geom_vline(
     xintercept=np.mean(data['rating']), 
     color="red", linetype="dashed", size=1)
) 

In [22]:
p1 = ggplot(data, aes(x='rating', fill='cond')) + ggsize(500, 250)

# Default histogram (stacked)
p1 + geom_histogram(binwidth=.5)

In [23]:
# Overlaid histograms
p1 + geom_histogram(binwidth=.5, alpha=.7, position="identity")

In [25]:
# Density plot
p2 = ggplot(data, aes(x='rating', color='cond')) + ggsize(500, 250)
p2 + geom_density()

In [26]:
# Density plot with semi-transparent fill
p2 + geom_density(aes(fill='cond'), alpha=.7)

In [27]:
# Find the mean of each group
cdat = data.groupby(['cond'], as_index=False).mean()
cdat

Unnamed: 0,cond,rating
0,A,0.003787
1,B,0.685638


In [28]:
# Overlaid histograms with means
(p2 
 + geom_histogram(aes(fill='cond'), alpha=.5, position="identity", size=0)
 + geom_vline(data=cdat, 
              mapping=aes(xintercept='rating',  color='cond'), 
              linetype="dashed", size=1)
 + scale_color_brewer(palette="Dark2")
)

### Box Plots

In [29]:
# A basic box plot
p3 = ggplot(data, aes(x='cond', y='rating')) + ggsize(300, 200)
p3 + geom_boxplot()

In [30]:
# A basic box with the conditions colored
p3 + geom_boxplot(aes(fill='cond'))

In [31]:
# Style outliers
p3 + geom_boxplot(outlier_color='red', outlier_shape=8, outlier_size=5)