In [2]:
%matplotlib notebook

import matplotlib.pyplot as plt
import numpy as np
plt.subplot?

In [3]:
plt.figure()
# subplot with 1 row, 2 columns, and current axis is 1st subplot axes
plt.subplot(1, 2, 1)

linear_data = np.array([1,2,3,4,5,6,7,8])

plt.plot(linear_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fde952fe0b8>]

In [4]:
exponential_data = linear_data**2 

# subplot with 1 row, 2 columns, and current axis is 2nd subplot axes
plt.subplot(1, 2, 2)
plt.plot(exponential_data, '-o')

[<matplotlib.lines.Line2D at 0x7fde95ca2438>]

In [5]:
# plot exponential data on 1st subplot axes
plt.subplot(1, 2, 1)
plt.plot(exponential_data, '-x')

  


[<matplotlib.lines.Line2D at 0x7fde96804d30>]

In [6]:
plt.figure()
ax1 = plt.subplot(1, 2, 1)
plt.plot(linear_data, '-o')
# pass sharey=ax1 to ensure the two subplots share the same y axis
ax2 = plt.subplot(1, 2, 2, sharey=ax1)
plt.plot(exponential_data, '-x')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fde96838dd8>]

In [7]:
plt.figure()
# the right hand side is equivalent shorthand syntax
plt.subplot(1,2,1) == plt.subplot(121)

<IPython.core.display.Javascript object>

  This is separate from the ipykernel package so we can avoid doing imports until


True

In [8]:
# create a 3x3 grid of subplots
fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True)
# plot the linear_data on the 5th subplot axes 
ax5.plot(linear_data, '-')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7fde9693efd0>]

In [11]:
# set inside tick labels to visible
for ax in plt.gcf().get_axes():
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_visible(True)

In [12]:
# necessary on some systems to update the plot
plt.gcf().canvas.draw()

# Histograms

In [14]:
# create 2x2 grid of axis subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

# draw n = 10, 100, 1000, and 10000 samples from the normal distribution and plot corresponding histograms
for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample)
    axs[n].set_title('n={}'.format(sample_size))

<IPython.core.display.Javascript object>

In [15]:
# repeat with number of bins set to 100
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample, bins=100)
    axs[n].set_title('n={}'.format(sample_size))

<IPython.core.display.Javascript object>

In [17]:
plt.figure()
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
plt.scatter(X,Y)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7fde9a5db4e0>

In [18]:
# use gridspec to partition the figure into subplots
import matplotlib.gridspec as gridspec

plt.figure()
gspec = gridspec.GridSpec(3, 3)

top_histogram = plt.subplot(gspec[0, 1:])
side_histogram = plt.subplot(gspec[1:, 0])
lower_right = plt.subplot(gspec[1:, 1:])

<IPython.core.display.Javascript object>

In [19]:
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
lower_right.scatter(X, Y)
top_histogram.hist(X, bins=100)
s = side_histogram.hist(Y, bins=100, orientation='horizontal')

In [20]:
# clear the histograms and plot normed histograms
top_histogram.clear()
top_histogram.hist(X, bins=100, normed=True)
side_histogram.clear()
side_histogram.hist(Y, bins=100, orientation='horizontal', normed=True)
# flip the side histogram's x axis
side_histogram.invert_xaxis()

The 'normed' kwarg was deprecated in Matplotlib 2.1 and will be removed in 3.1. Use 'density' instead.
  This is separate from the ipykernel package so we can avoid doing imports until
The 'normed' kwarg was deprecated in Matplotlib 2.1 and will be removed in 3.1. Use 'density' instead.
  """


In [21]:
# change axes limits
for ax in [top_histogram, lower_right]:
    ax.set_xlim(0, 1)
for ax in [side_histogram, lower_right]:
    ax.set_ylim(-5, 5)

# Box and Whisker Plots

In [3]:
import pandas as pd
normal_sample = np.random.normal(loc=0.0, scale=1.0, size=10000)
random_sample = np.random.random(size=10000)
gamma_sample = np.random.gamma(2, size=10000)

df = pd.DataFrame({'normal': normal_sample, 
                   'random': random_sample, 
                   'gamma': gamma_sample})

In [32]:
gamma_sample = pd.DataFrame(np.random.gamma(2, size=1000))

In [33]:
gamma_sample.describe()

Unnamed: 0,0
count,1000.0
mean,2.040107
std,1.378039
min,0.029047
25%,1.046811
50%,1.719853
75%,2.818813
max,7.894735


In [34]:
print(gamma_sample.to_string(index=False))

        0
 1.906967
 1.025920
 0.170195
 1.283421
 3.674425
 1.470862
 0.858354
 2.905236
 1.376675
 3.650266
 2.837305
 4.652057
 2.847341
 3.349526
 2.488629
 0.778380
 1.225564
 0.332074
 1.247079
 2.665753
 1.403789
 0.533309
 1.211282
 0.586556
 0.824067
 1.659784
 2.941495
 2.062140
 0.273551
 0.712452
 1.447933
 1.713656
 3.183623
 3.098916
 2.943824
 0.772001
 2.920096
 0.663258
 1.520201
 0.339574
 0.763695
 1.894406
 0.349832
 4.408248
 1.175112
 2.375239
 0.363461
 0.790159
 1.599339
 2.509914
 1.428444
 3.469218
 3.165067
 4.234552
 7.894735
 0.729339
 0.716382
 2.677327
 1.061271
 1.740225
 1.459863
 0.514406
 1.589397
 5.926391
 3.073207
 2.352495
 0.401842
 2.652273
 1.081163
 3.407477
 0.761655
 1.605669
 2.878739
 0.376260
 2.703838
 2.393847
 0.730546
 0.757410
 0.428355
 2.552959
 2.698966
 1.009672
 1.206855
 3.767061
 3.724413
 1.280259
 1.331168
 5.021936
 2.092419
 1.156324
 1.241747
 2.159257
 1.075445
 1.351982
 3.704171
 1.319303
 1.614839
 1.563874
 0.920684


In [4]:
df.describe()

Unnamed: 0,normal,random,gamma
count,10000.0,10000.0,10000.0
mean,-0.007721,0.503219,2.010901
std,0.998253,0.288389,1.41956
min,-3.597256,0.00014,0.007413
25%,-0.684435,0.254341,0.954452
50%,-0.009459,0.504178,1.683104
75%,0.654857,0.754308,2.736677
max,3.574407,0.999974,12.689143


In [11]:
print(df["normal"].to_string(index=False))

-0.172950
-0.848201
-0.454797
-1.253796
-1.359144
-0.002692
 0.572417
 0.181196
-0.756868
-0.590976
 1.520316
-0.794539
 0.303440
-0.757162
-0.565061
 0.936599
-0.743781
-0.496186
 0.315751
-0.309505
 2.413987
-0.488463
-1.795156
 0.557008
 0.380278
-0.154109
-0.248898
-0.112340
-0.997160
-0.283224
 0.990998
 1.528884
 0.442598
 1.698848
-0.561452
-0.048450
 0.085365
-1.005400
-0.539080
 0.600622
 0.800965
 0.660648
-1.341719
-0.031552
-1.706993
-1.703803
-0.302137
 0.092278
 0.029132
-0.011765
 0.134838
-0.817624
-0.711162
 0.221998
-0.499270
 1.107589
-0.947917
-0.063110
-0.124987
 1.666206
 0.024012
-0.301009
-1.481510
 0.868855
 0.630542
 0.807148
 1.723347
 0.769180
-0.961874
 2.441971
-0.164337
 1.072860
-0.719920
 0.405561
-1.394978
-1.356150
 0.159580
-0.653635
 1.762968
-0.928633
 0.776991
-0.304112
-0.295996
-0.298748
-1.372062
 0.677580
-1.925658
 0.225366
 0.346066
 0.394792
-0.026131
-0.018573
 0.623595
-0.943677
 0.832046
-0.538971
-0.764954
-0.377615
 2.372489
-0.145577


In [12]:
plt.figure()
# create a boxplot of the normal data, assign the output to a variable to supress output
plt.boxplot(df['normal'], whis='range')

<IPython.core.display.Javascript object>

{'whiskers': [<matplotlib.lines.Line2D at 0x7fc0e6128a20>,
  <matplotlib.lines.Line2D at 0x7fc0cd987c88>],
 'caps': [<matplotlib.lines.Line2D at 0x7fc0cd987da0>,
  <matplotlib.lines.Line2D at 0x7fc0cd810358>],
 'boxes': [<matplotlib.lines.Line2D at 0x7fc0cd987588>],
 'medians': [<matplotlib.lines.Line2D at 0x7fc0cd8106d8>],
 'fliers': [<matplotlib.lines.Line2D at 0x7fc0cd810a20>],
 'means': []}

In [13]:
# clear the current figure
plt.clf()
# plot boxplots for all three of df's columns
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')

In [14]:
plt.figure()
_ = plt.hist(df['gamma'], bins=100)

<IPython.core.display.Javascript object>

In [15]:
import mpl_toolkits.axes_grid1.inset_locator as mpl_il

plt.figure()
plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')
# overlay axis on top of another 
ax2 = mpl_il.inset_axes(plt.gca(), width='60%', height='40%', loc=2)
ax2.hist(df['gamma'], bins=100)
ax2.margins(x=0.5)

<IPython.core.display.Javascript object>

In [16]:
# switch the y axis ticks for ax2 to the right side
ax2.yaxis.tick_right()

In [17]:
# if `whis` argument isn't passed, boxplot defaults to showing 1.5*interquartile (IQR) whiskers with outliers
plt.figure()
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ] )

<IPython.core.display.Javascript object>

# Heatmaps

In [18]:
plt.figure()

Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
_ = plt.hist2d(X, Y, bins=25)

<IPython.core.display.Javascript object>

In [19]:
plt.figure()
_ = plt.hist2d(X, Y, bins=100)

<IPython.core.display.Javascript object>

In [20]:
# add a colorbar legend
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x7fc0d168d208>