# Basic Plotting with matplotlib

You can show matplotlib figures directly in the notebook by using the `%matplotlib notebook` and `%matplotlib inline` magic commands. 

`%matplotlib notebook` provides an interactive environment.

In [1]:
%matplotlib notebook

In [2]:
import matplotlib as mpl
mpl.get_backend()

'nbAgg'

In [3]:
import matplotlib.pyplot as plt
plt.plot?

In [4]:
# because the default is the line style '-', 
# nothing will be shown if we only pass in one point (3,2)
plt.plot(3, 2)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f1f445a9860>]

In [5]:
# we can pass in '.' to plt.plot to indicate that we want
# the point (3,2) to be indicated with a marker '.'
plt.plot(3, 2, '.')

[<matplotlib.lines.Line2D at 0x7f1f445b4860>]

Let's see how to make a plot without using the scripting layer.

In [6]:
# First let's set the backend without using mpl.use() from the scripting layer
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.figure import Figure

# create a new figure
fig = Figure()

# associate fig with the backend
canvas = FigureCanvasAgg(fig)

# add a subplot to the fig
ax = fig.add_subplot(111)

# plot the point (3,2)
ax.plot(3, 2, '.')

# save the figure to test.png
# you can see this figure in your Jupyter workspace afterwards by going to
# https://hub.coursera-notebooks.org/
canvas.print_png('test.png')

We can use html cell magic to display the image.

In [7]:
%%html
<img src='test.png' />

In [8]:
# create a new figure
plt.figure()

# plot the point (3,2) using the circle marker
plt.plot(3, 2, 'o')

# get the current axes
ax = plt.gca()

# Set axis properties [xmin, xmax, ymin, ymax]
ax.axis([0,6,0,10])

<IPython.core.display.Javascript object>

[0, 6, 0, 10]

In [9]:
# create a new figure
plt.figure()

# plot the point (1.5, 1.5) using the circle marker
plt.plot(1.5, 1.5, 'o')
# plot the point (2, 2) using the circle marker
plt.plot(2, 2, 'o')
# plot the point (2.5, 2.5) using the circle marker
plt.plot(2.5, 2.5, 'o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f1f30d7cc50>]

In [10]:
# get current axes
ax = plt.gca()
# get all the child objects the axes contains
ax.get_children()

[<matplotlib.lines.Line2D at 0x7f1f30d7cac8>,
 <matplotlib.lines.Line2D at 0x7f1f30db7cf8>,
 <matplotlib.lines.Line2D at 0x7f1f30d7cc50>,
 <matplotlib.spines.Spine at 0x7f1f30e03a20>,
 <matplotlib.spines.Spine at 0x7f1f30da5668>,
 <matplotlib.spines.Spine at 0x7f1f30da5518>,
 <matplotlib.spines.Spine at 0x7f1f44b8e940>,
 <matplotlib.axis.XAxis at 0x7f1f445c8390>,
 <matplotlib.axis.YAxis at 0x7f1f30dc6358>,
 <matplotlib.text.Text at 0x7f1f30ddcd30>,
 <matplotlib.text.Text at 0x7f1f30ddcda0>,
 <matplotlib.text.Text at 0x7f1f30ddce10>,
 <matplotlib.patches.Rectangle at 0x7f1f30ddce48>]

# Scatterplots

In [11]:
import numpy as np

x = np.array([1,2,3,4,5,6,7,8])
y = x

plt.figure()
plt.scatter(x, y) # similar to plt.plot(x, y, '.'), but the underlying child objects in the axes are not Line2D

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7f1f30cfc0b8>

In [12]:
import numpy as np

x = np.array([1,2,3,4,5,6,7,8])
y = x

# create a list of colors for each point to have
# ['green', 'green', 'green', 'green', 'green', 'green', 'green', 'red']
colors = ['green']*(len(x)-1)
colors.append('red')

plt.figure()

# plot the point with size 100 and chosen colors
plt.scatter(x, y, s=100, c=colors)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7f1f30c6d6a0>

In [13]:
# convert the two lists into a list of pairwise tuples
zip_generator = zip([1,2,3,4,5], [6,7,8,9,10])

print(list(zip_generator))
# the above prints:
# [(1, 6), (2, 7), (3, 8), (4, 9), (5, 10)]

zip_generator = zip([1,2,3,4,5], [6,7,8,9,10])
# The single star * unpacks a collection into positional arguments
print(*zip_generator)
# the above prints:
# (1, 6) (2, 7) (3, 8) (4, 9) (5, 10)

[(1, 6), (2, 7), (3, 8), (4, 9), (5, 10)]
(1, 6) (2, 7) (3, 8) (4, 9) (5, 10)


In [14]:
# use zip to convert 5 tuples with 2 elements each to 2 tuples with 5 elements each
print(list(zip((1, 6), (2, 7), (3, 8), (4, 9), (5, 10))))
# the above prints:
# [(1, 2, 3, 4, 5), (6, 7, 8, 9, 10)]


zip_generator = zip([1,2,3,4,5], [6,7,8,9,10])
# let's turn the data back into 2 lists
x, y = zip(*zip_generator) # This is like calling zip((1, 6), (2, 7), (3, 8), (4, 9), (5, 10))
print(x)
print(y)
# the above prints:
# (1, 2, 3, 4, 5)
# (6, 7, 8, 9, 10)

[(1, 2, 3, 4, 5), (6, 7, 8, 9, 10)]
(1, 2, 3, 4, 5)
(6, 7, 8, 9, 10)


In [22]:
plt.figure()
# plot a data series 'Tall students' in red using the first two elements of x and y
plt.scatter(x[:2], y[:2], s=100, c='red', label='Tall students')
# plot a second data series 'Short students' in blue using the last three elements of x and y 
plt.scatter(x[2:], y[2:], s=100, c='blue', label='Short students')

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7f1f189f5e80>

In [23]:
# add a label to the x axis
plt.xlabel('The number of times the child kicked a ball')
# add a label to the y axis
plt.ylabel('The grade of the student')
# add a title
plt.title('Relationship between ball kicking and grades')

<matplotlib.text.Text at 0x7f1f18a54358>

In [24]:
# add a legend (uses the labels from plt.scatter)
plt.legend()

<matplotlib.legend.Legend at 0x7f1f18a28668>

In [25]:
# add the legend to loc=4 (the lower right hand corner), also gets rid of the frame and adds a title
plt.legend(loc=4, frameon=False, title='Legend')

<matplotlib.legend.Legend at 0x7f1f30c9f8d0>

In [None]:
# get children from current axes (the legend is the second to last item in this list)
plt.gca().get_children()

In [None]:
# get the legend from the current axes
legend = plt.gca().get_children()[-2]

In [None]:
# you can use get_children to navigate through the child artists
legend.get_children()[0].get_children()[1].get_children()[0].get_children()

In [None]:
# import the artist class from matplotlib
from matplotlib.artist import Artist

def rec_gc(art, depth=0):
    if isinstance(art, Artist):
        # increase the depth for pretty printing
        print("  " * depth + str(art))
        for child in art.get_children():
            rec_gc(child, depth+2)

# Call this function on the legend artist to see what the legend is made up of
rec_gc(plt.legend())

# Line Plots

In [6]:
import numpy as np

linear_data = np.array([1,2,3,4,5,6,7,8])
exponential_data = linear_data**2

plt.figure()
# plot the linear data and the exponential data
plt.plot(linear_data, '-o', exponential_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7ff3f8b0b198>,
 <matplotlib.lines.Line2D at 0x7ff3f8b0b2e8>]

In [35]:
# plot another series with a dashed red line
plt.plot([22,44,55], '--r')

[<matplotlib.lines.Line2D at 0x7f1f1813b048>]

In [36]:
plt.xlabel('Some data')
plt.ylabel('Some other data')
plt.title('A title')
# add a legend with legend entries (because we didn't have labels when we plotted the data series)
plt.legend(['Baseline', 'Competition', 'Us'])

<matplotlib.legend.Legend at 0x7f1f181437b8>

In [37]:
# fill the area between the linear data and exponential data
plt.gca().fill_between(range(len(linear_data)), 
                       linear_data, exponential_data, 
                       facecolor='blue', 
                       alpha=0.55)

<matplotlib.collections.PolyCollection at 0x7f1f181541d0>

Let's try working with dates!

In [38]:
plt.figure()

observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]')

plt.plot(observation_dates, linear_data, '-o',  observation_dates, exponential_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f1f181192e8>,
 <matplotlib.lines.Line2D at 0x7f1f18119470>]

Let's try using pandas

In [39]:
import pandas as pd

plt.figure()
observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]')
observation_dates = map(pd.to_datetime, observation_dates) # trying to plot a map will result in an error
plt.plot(observation_dates, linear_data, '-o',  observation_dates, exponential_data, '-o')

<IPython.core.display.Javascript object>

TypeError: object of type 'map' has no len()

In [40]:
plt.figure()
observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]')
observation_dates = list(map(pd.to_datetime, observation_dates)) # convert the map to a list to get rid of the error
plt.plot(observation_dates, linear_data, '-o',  observation_dates, exponential_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f1f052a1630>,
 <matplotlib.lines.Line2D at 0x7f1f01a5ea58>]

In [41]:
x = plt.gca().xaxis

# rotate the tick labels for the x axis
for item in x.get_ticklabels():
    item.set_rotation(45)

In [42]:
# adjust the subplot so the text doesn't run off the image
plt.subplots_adjust(bottom=0.25)

In [43]:
ax = plt.gca()
ax.set_xlabel('Date')
ax.set_ylabel('Units')
ax.set_title('Exponential vs. Linear performance')

<matplotlib.text.Text at 0x7f1f052b3f60>

In [44]:
# you can add mathematical expressions in any text element
ax.set_title("Exponential ($x^2$) vs. Linear ($x$) performance")

<matplotlib.text.Text at 0x7f1f052b3f60>

# Bar Charts

In [7]:
plt.figure()
xvals = range(len(linear_data))
plt.bar(xvals, linear_data, width = 0.3)

<IPython.core.display.Javascript object>

<Container object of 8 artists>

In [8]:
new_xvals = []

# plot another set of bars, adjusting the new xvals to make up for the first set of bars plotted
for item in xvals:
    new_xvals.append(item+0.3)

plt.bar(new_xvals, exponential_data, width = 0.3 ,color='red')

<Container object of 8 artists>

In [9]:
from random import randint
linear_err = [randint(0,15) for x in range(len(linear_data))] 

# This will plot a new set of bars with errorbars using the list of random error values
plt.bar(xvals, linear_data, width = 0.3, yerr=linear_err)

<Container object of 8 artists>

In [10]:
# stacked bar charts are also possible
plt.figure()
xvals = range(len(linear_data))
plt.bar(xvals, linear_data, width = 0.3, color='b')
plt.bar(xvals, exponential_data, width = 0.3, bottom=linear_data, color='r')

<IPython.core.display.Javascript object>

<Container object of 8 artists>

In [11]:
# or use barh for horizontal bar charts
plt.figure()
xvals = range(len(linear_data))
plt.barh(xvals, linear_data, height = 0.3, color='b')
plt.barh(xvals, exponential_data, height = 0.3, left=linear_data, color='r')

<IPython.core.display.Javascript object>

<Container object of 8 artists>

In [12]:
import matplotlib.pyplot as plt
import numpy as np

In [16]:
plt.figure()

languages =['Python', 'SQL', 'Java', 'C++', 'JavaScript']
pos = np.arange(len(languages))
popularity = [56, 39, 34, 34, 29]

plt.bar(pos, popularity, align='center')
plt.xticks(pos, languages)
plt.ylabel('% Popularity')
plt.title('Top 5 Languages for Math & Data \nby % popularity on Stack Overflow', alpha=0.8)

#TODO: remove all the ticks (both axes), and tick labels on the Y axis
plt.tick_params(top='off', bottom='off', left='off', right='off', labelleft='off', labelbottom='on')

plt.show()

<IPython.core.display.Javascript object>

In [17]:
# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
plt.show()    

In [19]:
plt.figure()

languages =['Python', 'SQL', 'Java', 'C++', 'JavaScript']
pos = np.arange(len(languages))
popularity = [56, 39, 34, 34, 29]

# change the bar colors to be less bright blue
bars = plt.bar(pos, popularity, align='center', linewidth=0, color='lightslategrey')
# make one bar, the python bar, a contrasting color
bars[0].set_color('#1F77B4')

# soften all labels by turning grey
plt.xticks(pos, languages, alpha=0.8)
#plt.ylabel('% Popularity', alpha=0.8)
plt.title('Top 5 Languages for Math & Data \nby % popularity on Stack Overflow', alpha=0.8)

# remove all the ticks (both axes), and tick labels on the Y axis
plt.tick_params(top='off', bottom='off', left='off', right='off', labelleft='off', labelbottom='on')

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
plt.show()

<IPython.core.display.Javascript object>

In [20]:
# direct label each bar with Y axis values
for bar in bars:
    plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height() - 5, str(int(bar.get_height())) + '%', 
                 ha='center', color='w', fontsize=11)
    
plt.show()    

In [4]:
import pandas as pd

df = pd.read_csv('data/C2A2_data/BinSize_d{}.csv'.format(400))
df.head()

Unnamed: 0,ID,LATITUDE,LONGITUDE,ELEVATION,STATE,NAME,GSNFLAG,HCNFLAG,WMOID,x,y,x_group,y_group,xy_group,hash
0,AE000041196,25.333,55.517,34.0,,SHARJAH INTER. AIRP,GSN,,41196.0,5579578.0,2816905.0,5497601.479782179 to 5594934.908498545,2772265.475723952 to 2869598.904440318,"5497601.479782179 to 5594934.908498545, 277226...",1f6f0e89f4e937d934fe9acde2545e0d66bbcfe3b66320...
1,AEM00041194,25.255,55.364,10.4,,DUBAI INTL,,,41194.0,5567782.0,2808232.0,5497601.479782179 to 5594934.908498545,2772265.475723952 to 2869598.904440318,"5497601.479782179 to 5594934.908498545, 277226...",1f6f0e89f4e937d934fe9acde2545e0d66bbcfe3b66320...
2,AEM00041217,24.433,54.651,26.8,,ABU DHABI INTL,,,41217.0,5532707.0,2716829.0,5497601.479782179 to 5594934.908498545,2674932.047007587 to 2772265.4757239525,"5497601.479782179 to 5594934.908498545, 267493...",35c9093084d26708afc5b482ea15ba52e3f20fb7cd781f...
3,AEM00041218,24.262,55.609,264.9,,AL AIN INTL,,,41218.0,5637301.0,2697815.0,5594934.908498544 to 5692268.33721491,2674932.047007587 to 2772265.4757239525,"5594934.908498544 to 5692268.33721491, 2674932...",2a6a7edad90579049fd1b3c3a990e8a8be90cb1975118e...
4,AFM00040990,31.5,65.85,1010.0,,KANDAHAR AIRPORT,,,40990.0,6243199.0,3502645.0,6178935.480796736 to 6276268.909513102,3453599.476738509 to 3550932.9054548745,"6178935.480796736 to 6276268.909513102, 345359...",039e75bbef55baa3fecc0568dc3b245666a98b1bcba6b4...


In [7]:
df = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv')
df.head()

Unnamed: 0,ID,Date,Element,Data_Value
0,USW00094889,2014-11-12,TMAX,22
1,USC00208972,2009-04-29,TMIN,56
2,USC00200032,2008-05-26,TMAX,278
3,USC00205563,2005-11-11,TMAX,139
4,USC00200230,2014-02-27,TMAX,-106


In [8]:
df_zipped = df['Date'].apply(lambda x: (x[:4], x[5:]))
df_zipped

0         (2014, 11-12)
1         (2009, 04-29)
2         (2008, 05-26)
3         (2005, 11-11)
4         (2014, 02-27)
5         (2010, 10-01)
6         (2010, 06-29)
7         (2005, 10-04)
8         (2007, 12-14)
9         (2011, 04-21)
10        (2013, 01-16)
11        (2008, 05-29)
12        (2008, 10-17)
13        (2006, 05-14)
14        (2006, 05-14)
15        (2014, 12-07)
16        (2008, 09-07)
17        (2006, 04-22)
18        (2008, 02-22)
19        (2015, 01-03)
20        (2011, 03-28)
21        (2008, 02-10)
22        (2008, 02-03)
23        (2008, 02-23)
24        (2012, 03-20)
25        (2006, 03-29)
26        (2006, 05-11)
27        (2012, 03-31)
28        (2010, 07-25)
29        (2014, 12-09)
              ...      
165055    (2009, 10-09)
165056    (2015, 02-23)
165057    (2009, 11-24)
165058    (2010, 03-22)
165059    (2015, 06-23)
165060    (2010, 05-23)
165061    (2012, 12-26)
165062    (2014, 02-06)
165063    (2010, 05-23)
165064    (2008, 08-04)
165065    (2006,

In [10]:
df['Year'], df['Month-Date'] = zip(*df_zipped)
df['Year']

0         2014
1         2009
2         2008
3         2005
4         2014
5         2010
6         2010
7         2005
8         2007
9         2011
10        2013
11        2008
12        2008
13        2006
14        2006
15        2014
16        2008
17        2006
18        2008
19        2015
20        2011
21        2008
22        2008
23        2008
24        2012
25        2006
26        2006
27        2012
28        2010
29        2014
          ... 
165055    2009
165056    2015
165057    2009
165058    2010
165059    2015
165060    2010
165061    2012
165062    2014
165063    2010
165064    2008
165065    2006
165066    2008
165067    2005
165068    2005
165069    2011
165070    2011
165071    2010
165072    2015
165073    2005
165074    2009
165075    2014
165076    2013
165077    2014
165078    2011
165079    2005
165080    2015
165081    2009
165082    2014
165083    2006
165084    2006
Name: Year, dtype: object