In [1]:
%matplotlib notebook

import matplotlib.pyplot as plt
import numpy as np
plt.subplot?

In [2]:
plt.figure()
# subplot with 1 row, 2 columns, and current axis is 1st subplot axes
plt.subplot(1, 2, 1)

linear_data = np.array([1,2,3,4,5,6,7,8])

plt.plot(linear_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f9f48180e10>]

In [3]:
exponential_data = linear_data**2 

# subplot with 1 row, 2 columns, and current axis is 2nd subplot axes
plt.subplot(1, 2, 2)
plt.plot(exponential_data, '-o')

[<matplotlib.lines.Line2D at 0x7f9f4823e6d8>]

In [4]:
# plot exponential data on 1st subplot axes
plt.subplot(1, 2, 1)
plt.plot(exponential_data, '-x')

  


[<matplotlib.lines.Line2D at 0x7f9f4823ec88>]

In [5]:
plt.figure()
ax1 = plt.subplot(1, 2, 1)
plt.plot(linear_data, '-o')
# pass sharey=ax1 to ensure the two subplots share the same y axis
ax2 = plt.subplot(1, 2, 2, sharey=ax1)
plt.plot(exponential_data, '-x')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f9f482825f8>]

In [6]:
plt.figure()
# the right hand side is equivalent shorthand syntax
plt.subplot(1,2,1) == plt.subplot(121)

<IPython.core.display.Javascript object>

  This is separate from the ipykernel package so we can avoid doing imports until


True

In [7]:
# create a 3x3 grid of subplots
fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True)
# plot the linear_data on the 5th subplot axes 
ax5.plot(linear_data, '-')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f9f482fb160>]

In [8]:
# set inside tick labels to visible
for ax in plt.gcf().get_axes():
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_visible(True)

In [9]:
# necessary on some systems to update the plot
plt.gcf().canvas.draw()

# Histograms

In [10]:
# create 2x2 grid of axis subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

# draw n = 10, 100, 1000, and 10000 samples from the normal distribution and plot corresponding histograms
for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample)
    axs[n].set_title('n={}'.format(sample_size))

<IPython.core.display.Javascript object>

In [11]:
# repeat with number of bins set to 100
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample, bins=100)
    axs[n].set_title('n={}'.format(sample_size))

<IPython.core.display.Javascript object>

In [12]:
plt.figure()
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
plt.scatter(X,Y)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7f9f480a7f28>

In [13]:
# use gridspec to partition the figure into subplots
import matplotlib.gridspec as gridspec

plt.figure()
gspec = gridspec.GridSpec(3, 3)

top_histogram = plt.subplot(gspec[0, 1:])
side_histogram = plt.subplot(gspec[1:, 0])
lower_right = plt.subplot(gspec[1:, 1:])

<IPython.core.display.Javascript object>

In [14]:
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
lower_right.scatter(X, Y)
top_histogram.hist(X, bins=100)
s = side_histogram.hist(Y, bins=100, orientation='horizontal')

In [15]:
# clear the histograms and plot normed histograms
top_histogram.clear()
top_histogram.hist(X, bins=100, normed=True)
side_histogram.clear()
side_histogram.hist(Y, bins=100, orientation='horizontal', normed=True)
# flip the side histogram's x axis
side_histogram.invert_xaxis()

The 'normed' kwarg was deprecated in Matplotlib 2.1 and will be removed in 3.1. Use 'density' instead.
  This is separate from the ipykernel package so we can avoid doing imports until
The 'normed' kwarg was deprecated in Matplotlib 2.1 and will be removed in 3.1. Use 'density' instead.
  """


In [16]:
# change axes limits
for ax in [top_histogram, lower_right]:
    ax.set_xlim(0, 1)
for ax in [side_histogram, lower_right]:
    ax.set_ylim(-5, 5)

# Box and Whisker Plots

In [17]:
import pandas as pd
normal_sample = np.random.normal(loc=0.0, scale=1.0, size=10000)
random_sample = np.random.random(size=10000)
gamma_sample = np.random.gamma(2, size=10000)

df = pd.DataFrame({'normal': normal_sample, 
                   'random': random_sample, 
                   'gamma': gamma_sample})

In [18]:
gamma_sample = pd.DataFrame(np.random.gamma(2, size=1000))

In [19]:
gamma_sample.describe()

Unnamed: 0,0
count,1000.0
mean,2.017056
std,1.371649
min,0.0681
25%,0.983263
50%,1.680265
75%,2.691951
max,9.953482


In [20]:
print(gamma_sample.to_string(index=False))

        0
 1.676070
 1.570287
 4.681775
 3.526515
 1.972789
 2.202452
 2.716243
 0.686427
 2.148753
 0.847090
 1.269127
 4.654490
 5.096165
 0.391898
 5.438094
 0.427850
 3.027049
 1.988244
 2.445754
 4.550000
 2.044430
 0.317760
 3.501728
 1.007453
 3.653096
 0.777006
 1.681646
 1.964228
 5.365497
 3.017638
 6.499565
 0.431988
 3.245127
 2.722755
 1.233157
 0.787158
 4.601391
 2.153803
 0.419189
 2.385628
 2.821360
 6.051879
 2.789877
 1.136034
 0.692897
 4.158475
 1.756467
 1.322833
 1.942305
 2.974522
 3.403404
 1.396671
 0.430586
 2.115105
 1.187666
 4.911025
 1.483945
 2.291544
 2.180527
 0.971454
 1.219142
 3.249149
 0.744254
 3.896741
 0.780793
 4.912606
 4.025772
 1.770286
 1.098721
 4.445298
 0.589231
 1.882412
 4.943816
 0.651135
 2.443574
 1.817124
 1.688924
 1.026627
 4.365821
 1.779100
 1.498291
 2.455570
 1.174310
 1.382228
 6.051907
 5.978744
 2.470057
 1.318459
 2.310588
 0.240795
 0.990186
 1.062049
 1.172359
 1.478323
 2.209739
 1.622187
 1.434070
 4.058737
 1.568029


In [21]:
df.describe()

Unnamed: 0,normal,random,gamma
count,10000.0,10000.0,10000.0
mean,0.016952,0.503884,1.999288
std,0.994654,0.287953,1.411563
min,-3.668563,8.5e-05,0.019702
25%,-0.646611,0.256729,0.964457
50%,0.016945,0.50028,1.669593
75%,0.681309,0.755384,2.671845
max,3.456847,0.999963,13.327658


In [22]:
print(df["normal"].to_string(index=False))

-0.745570
 0.712188
-0.456490
-0.400044
-0.263976
 0.374398
 1.970527
-0.099392
 0.162534
 0.070768
-1.340838
-0.222500
-0.424716
-1.056299
 0.676824
-1.833496
-0.156738
-0.012704
 0.579545
 1.691357
-0.360188
 0.013418
 0.681245
 1.079603
 2.371428
 1.948438
 0.301252
-0.590193
 0.162107
-0.388402
-0.256978
 0.311252
 0.197389
-2.040585
-0.227548
 1.561115
-0.136995
-0.510145
 0.570837
 1.179408
 0.033651
 0.601796
 0.312202
-0.362376
-0.154360
-0.317187
-1.906029
-0.973093
 1.203454
-2.442503
 1.169788
 0.641945
 0.808485
-0.730785
 1.377692
-0.944849
 1.570888
 0.417997
-1.870307
 0.320436
 0.048214
 0.789761
-0.190475
 0.110184
 0.736904
 1.371852
 0.612871
-0.822790
-0.281628
 0.175041
-0.683380
 0.470058
 1.753416
 0.466288
 0.118133
-0.528318
 0.327822
-2.203089
-0.738346
 0.153190
-0.307507
 0.715962
-1.415199
-1.540180
-1.997472
-1.176222
-1.075768
-1.121399
 0.808551
-0.864288
-0.985131
-1.208228
-0.456726
-1.057704
 3.001997
 0.712937
 0.202249
 1.341630
 0.312117
 0.396096


In [23]:
plt.figure()
# create a boxplot of the normal data, assign the output to a variable to supress output
plt.boxplot(df['normal'], whis='range')

<IPython.core.display.Javascript object>

{'whiskers': [<matplotlib.lines.Line2D at 0x7f9f4b07c470>,
  <matplotlib.lines.Line2D at 0x7f9f4b07c7b8>],
 'caps': [<matplotlib.lines.Line2D at 0x7f9f4b07cb00>,
  <matplotlib.lines.Line2D at 0x7f9f4b07ce48>],
 'boxes': [<matplotlib.lines.Line2D at 0x7f9f4b07c048>],
 'medians': [<matplotlib.lines.Line2D at 0x7f9f4b07cf28>],
 'fliers': [<matplotlib.lines.Line2D at 0x7f9f4b072518>],
 'means': []}

In [24]:
# clear the current figure
plt.clf()
# plot boxplots for all three of df's columns
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')

In [25]:
plt.figure()
_ = plt.hist(df['gamma'], bins=100)

<IPython.core.display.Javascript object>

In [26]:
import mpl_toolkits.axes_grid1.inset_locator as mpl_il

plt.figure()
plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')
# overlay axis on top of another 
ax2 = mpl_il.inset_axes(plt.gca(), width='60%', height='40%', loc=2)
ax2.hist(df['gamma'], bins=100)
ax2.margins(x=0.5)

<IPython.core.display.Javascript object>

In [27]:
# switch the y axis ticks for ax2 to the right side
ax2.yaxis.tick_right()

In [28]:
# if `whis` argument isn't passed, boxplot defaults to showing 1.5*interquartile (IQR) whiskers with outliers
plt.figure()
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ] )

<IPython.core.display.Javascript object>

# Heatmaps

In [29]:
plt.figure()

Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
_ = plt.hist2d(X, Y, bins=25)

<IPython.core.display.Javascript object>

In [30]:
plt.figure()
_ = plt.hist2d(X, Y, bins=100)

<IPython.core.display.Javascript object>

In [31]:
# add a colorbar legend
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x7f9f4b2079e8>

# Animations

In [32]:
import matplotlib.animation as animation

n = 100
x = np.random.randn(n)

In [33]:
# create the function that will do the plotting, where curr is the current frame
def update(curr):
    # check if animation is at the last frame, and if so, stop the animation a
    if curr == n: 
        a.event_source.stop()
    plt.cla()
    bins = np.arange(-4, 4, 0.5)
    plt.hist(x[:curr], bins=bins)
    plt.axis([-4,4,0,30])
    plt.gca().set_title('Sampling the Normal Distribution')
    plt.gca().set_ylabel('Frequency')
    plt.gca().set_xlabel('Value')
    plt.annotate('n = {}'.format(curr), [3,27])

In [35]:
fig = plt.figure()
a = animation.FuncAnimation(fig, update, interval=100)

<IPython.core.display.Javascript object>

# Interactivity

In [36]:
plt.figure()
data = np.random.rand(10)
plt.plot(data)

def onclick(event):
    plt.cla()
    plt.plot(data)
    plt.gca().set_title('Event at pixels {},{} \nand data {},{}'.format(event.x, event.y, event.xdata, event.ydata))

# tell mpl_connect we want to pass a 'button_press_event' into onclick when the event is detected
plt.gcf().canvas.mpl_connect('button_press_event', onclick)

<IPython.core.display.Javascript object>

7

In [37]:
from random import shuffle
origins = ['China', 'Brazil', 'India', 'USA', 'Canada', 'UK', 'Germany', 'Iraq', 'Chile', 'Mexico']

shuffle(origins)

df = pd.DataFrame({'height': np.random.rand(10),
                   'weight': np.random.rand(10),
                   'origin': origins})
df

Unnamed: 0,height,weight,origin
0,0.346693,0.263194,Canada
1,0.872236,0.727761,Mexico
2,0.472751,0.952297,India
3,0.867089,0.925987,Chile
4,0.678026,0.757887,Brazil
5,0.779668,0.90957,China
6,0.917853,0.786628,USA
7,0.7865,0.605454,UK
8,0.118564,0.311128,Germany
9,0.406124,0.51662,Iraq


In [38]:
plt.figure()
# picker=5 means the mouse doesn't have to click directly on an event, but can be up to 5 pixels away
plt.scatter(df['height'], df['weight'], picker=5)
plt.gca().set_ylabel('Weight')
plt.gca().set_xlabel('Height')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'Height')

In [39]:
def onpick(event):
    origin = df.iloc[event.ind[0]]['origin']
    plt.gca().set_title('Selected item came from {}'.format(origin))

# tell mpl_connect we want to pass a 'pick_event' into onpick when the event is detected
plt.gcf().canvas.mpl_connect('pick_event', onpick)

7