In [1]:
# import libraries

# % python magic function for notebook / back-end rendering of plots to screen or files  
%matplotlib notebook
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt


In [2]:
# Week 2: Applied Plotting, Charting & Data Rep
# Basic Plotting
# Scatterplots
# Line plots
# Bar chart

In [3]:
# Week 2: Intro

In [4]:
# Artist Object available in matplotlib
# matplotlib has 2 layers: backend layer and artist layer

# backend layer: rendering of plots to screen or files, inline backend in jupiter
# artist layer: contains containers such as figure, subplot, axes. primitives such as line2D, rectangle, collections, path collection
# scripting layer: simplifies acces to Artist and Backend layers

In [5]:
# Week 2: Basic Plotting
# from matplotlib.backends.backend_agg import FigureCanvasAgg
# from matplotlib.figure import Figure

# all functions in pyplot are part of the scripting layer
# plt.plot function: generate series of lines rendered against an axis object

# plot (x, y, type),  plot coordinates x,y type=type_leg
# fig = Figure(),  create new Figure
# ax = fig.add_subplot(111) , create subplot to Figure 
# plt.gca() get axis from plt
# ax.get_children(), get features of a plot

In [6]:
import matplotlib as mpl
import matplotlib.pyplot as plt

plt.plot?
# *args: supports any number of args

In [7]:
# plot (x, y, form)
plt.plot(3, 2, '.')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x20c88f0acd0>]

In [8]:
plt.plot(3, 2, '.')

[<matplotlib.lines.Line2D at 0x20c88f25310>]

In [9]:
# Figure matplotlib object API - direct interface  with Artist layer
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.figure import Figure

fig = Figure()
canvas = FigureCanvasAgg(fig)

# fig.add_subplot return object is the axis object, which contains methods for plotting
ax = fig.add_subplot(111)
ax.plot(3, 2, '.')
canvas.print_png('test.png')

In [10]:
%%html
<img src = 'test.png' />
# HTML magic plot

In [11]:
# plt.figure() create new figure
plt.figure()

# plot point coordinates x,y, type='o'
plt.plot(3, 2, 'o')

# plt.gca() get axis from plt
ax = plt.gca()

#ax.axis([x_min,x_max,y_min,y_max]),  limit axis
ax.axis([0,6,0,10])

<IPython.core.display.Javascript object>

(0.0, 6.0, 0.0, 10.0)

In [12]:
# add Artist to axis object at anytime
# plot multiple points, with different legends(colors)
plt.figure()
plt.plot(1.5, 1.5, 'o')
plt.plot(2, 2, 'o')
plt.plot(2.5, 2.5, 'o')

# get child objects/ get features of the plot
ax = plt.gca()
ax.get_children()

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x20c89156910>,
 <matplotlib.lines.Line2D at 0x20c89156d30>,
 <matplotlib.lines.Line2D at 0x20c891620a0>,
 <matplotlib.spines.Spine at 0x20c891244f0>,
 <matplotlib.spines.Spine at 0x20c89124610>,
 <matplotlib.spines.Spine at 0x20c89124730>,
 <matplotlib.spines.Spine at 0x20c89124850>,
 <matplotlib.axis.XAxis at 0x20c89124490>,
 <matplotlib.axis.YAxis at 0x20c89124d60>,
 Text(0.5, 1.0, ''),
 Text(0.0, 1.0, ''),
 Text(1.0, 1.0, ''),
 <matplotlib.patches.Rectangle at 0x20c8913e160>]

In [13]:
# Week 2: Scatterplots: 2D discrete plot
# plt.scatter(x,y, s=size, c=colors, label),  plot scatterplot given x,y data-set points
# parameters: x=x-data, y=y-data, s=size of points, c=colors, label= label of plot
# zip_gen = zip([1,2,3,4,5], [6,7,8,9,10]),  create zip generator function

# plt.xlabel(x-label),  create x-label
# plt.ylabel(y-label),  create y-label
# plt.title(title), create title of plot(subplot)


# plt.legend() create legend 
# plt.legend(loc=loc, frameon=False, title='title'),  set legend parameters: characterization of lines/functions in figures 

In [14]:
# 1. pyplot retrieve current figure with function gcf, then get the current axis with function gca. keep track of axis function
# 2. pyplot just mirrors the API of axis object, calling axis plot function underneed
# 3. function declaration for most functions in pyplot end with open set of keyword arguments

In [15]:
# scatterplot: 2D discrete plot
# parameters x,y of same length

# create point coordinates
x = np.array([1,2,3,4,5,6,7,8])
y = x 

# plt.figure(),  create figure
plt.figure()

# plt.scatter(x,y),  plot scatterplot onto figure with x,y coordinates
plt.scatter(x,y)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x20c8918be20>

In [16]:
# create x,y coordinates
x = np.array([1,2,3,4,5,6,7,8])
y = x 

# create colors' legend, set green for points in len(x)-1, red for last point
colors = ['green']*(len(x)-1)
colors.append('red')

# plot figure
plt.figure()
plt.scatter(x, y, s=100, c=colors)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x20c891ea190>

In [17]:
# zip function = generator of tuples, unpacking of list
# zip function takes a number of iterable objects and converts it in tuple

# create zip generator list
zip_gen = zip([1,2,3,4,5], [6,7,8,9,10])
# convert zip_gen function to list of tuple
list(zip_gen)

[(1, 6), (2, 7), (3, 8), (4, 9), (5, 10)]

In [18]:
# create zip generator function
zip_gen = zip([1,2,3,4,5], [6,7,8,9,10])

# create tuple of x,y values from zip_gen function
x,y = zip(*zip_gen)

In [19]:
# plt.figure() create figure
plt.figure()

# plot scatter plot, coordinate values [:2]
plt.scatter(x[:2], y[:2], s=100, c='red', label = 'Tall students')
# plot scatter plot, coordinate values [2:]
plt.scatter(x[2:], y[2:], s=100, c='blue', label = 'Short students')


<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x20c893dab80>

In [20]:
# plt.xlabel(x-label),  create x-label
plt.xlabel('# Number of Kicks to the ball')

# plt.ylabel(y-label),  create y-label
plt.ylabel('Grade of the student')

# plt.title(title), create title of plot(subplot)
plt.title('Grades of Students vs Number of Kicks')

Text(0.5, 1.0, 'Grades of Students vs Number of Kicks')

In [21]:
# create legend
plt.legend()
# set legend parameters: characterization of lines/functions in figures 
plt.legend(loc=4, frameon=False, title='Legend')

<matplotlib.legend.Legend at 0x20c891a89a0>

In [22]:
# Week 2: Line plot, continous function  

# plt.plot(function, '-o' )
# plt.gca().fill_between(range(len(x_data)),function1, function2 ,facecolor='color', alpha=transparency),  sobrear entre 2 funciones
# np.arange('YYYY-MM-DD')   create dates arange
# 

In [23]:
# Week 2: Line plot, continous function  

# plt.plot(function, '-o' )

# plt.gca().fill_between(range(len(x_data)),function1, function2 ,facecolor='color', alpha=transparency),  sobrear entre 2 funciones
# np.arange('YYYY-MM-DD')   create dates arange
observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]' ) 
# map observation_dates arange into pd.to_datetime
observation_dates = list(map(pd.to_datetime, observation_dates))

In [24]:
# plt.plot() parameter is only the y values of the function, no x values 
# plot identifies each different function data, and uses different color (legend)
# 

In [25]:
# create x,y  values
linear_data = np.array([1,2,3,4,5,6,7,8])
quadratic_data = linear_data**2

# plt.figure() create Figure
plt.figure()

# plt.plot(func, 'flag'),  flag=-o dots  flag
plt.plot(linear_data, '-o', quadratic_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x20c893f1a60>,
 <matplotlib.lines.Line2D at 0x20c893f15e0>]

In [26]:
plt.plot([22, 44, 55], '--r')

[<matplotlib.lines.Line2D at 0x20c891a8430>]

In [27]:
# create x,y labels, legend, title, legend of functions
plt.xlabel('x label')
plt.ylabel('y label')
plt.title('title')
plt.legend(['plot1','plot2','plot3'])

<matplotlib.legend.Legend at 0x20c89453160>

In [28]:
# highligh difference between 2 functions -std deviation or difference between graphs
# plt.gca().fill_between()

plt.gca().fill_between(range(len(linear_data)),linear_data, quadratic_data,facecolor='blue', alpha=0.25)

<matplotlib.collections.PolyCollection at 0x20c89427c10>

In [29]:
plt.figure()

observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]' )
plt.plot(observation_dates, linear_data, '-o', observation_dates, quadratic_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x20c8947d550>,
 <matplotlib.lines.Line2D at 0x20c894aef10>]

In [34]:
# create figure
plt.figure()


observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]' )
observation_dates = map(pd.to_datetime, observation_dates)
plt.plot(observation_dates, linear_data, '-o', observation_dates, quadratic_data, '-o')

<IPython.core.display.Javascript object>

RuntimeError: matplotlib does not support generators as input

In [37]:
# create Figure
plt.figure()

# np.arange('YYYY-MM-DD')   create dates arange
observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]' )

# map observation_dates arange into pd.to_datetime
observation_dates = list(map(pd.to_datetime, observation_dates))
plt.plot(observation_dates, linear_data, '-o', observation_dates, quadratic_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x20c8c857580>,
 <matplotlib.lines.Line2D at 0x20c8b2ae550>]

In [36]:
# plt.gca().x axis  get x axis
x = plt.gca().xaxis

# iterate through item years in tick labels
for item in x.get_ticklabels():
    # rotate item
    item.set_rotation(45)
# adjust x axis by rotation
plt.subplots_adjust(bottom=0.25)

In [None]:
# get axis
ax = plt.gca()

# set x-label, y-label, title
ax.set_xlabel('Date')
ax.set_ylabel('Units')
ax.set_title('Quadratic vs. Linear Performance')


In [None]:
ax.set_title('Quadratic ($x^2$) vs. Linear ($x$) performance')

In [None]:
# Week 2: Bar charts,  histogram, discrete graph
# plt.bar(x vals, y vals, width=width_bar) 
# linear_err = [randint(0,15) for x in range(len(linear_data)) ],  add error bars
# plt.bar(x_vals, y_vals, width= width_bar, yerr=linear_err) parameter:  yerr= linear_err

# plt.barh(x_vals, y_vals, height=height_bar, color='color '),  horizontal bar chart
# parameter left= function,  stack function
# plt.barh(xvals, quadratic_data, height= 0.3, left = linear_data, color = 'r')

In [31]:
# create figure
plt.figure()

# get x values from y function range data
xvals = range(len(linear_data))
# plt.bar(x vals, y vals, width=width_bar) 
plt.bar(xvals, linear_data, width = 0.3 )

<IPython.core.display.Javascript object>

<BarContainer object of 8 artists>

In [32]:
new_xvals = []

# iterate through items in xvals
for item in xvals:
    # append new values to new_xvals
    new_xvals.append(item+0.3)

# plot new bar chart with different color
plt.bar(new_xvals, quadratic_data, width=0.3, color='red')


<BarContainer object of 8 artists>

In [33]:
from random import randint

# add error bars 
linear_err = [randint(0,15) for x in range(len(linear_data)) ]

# parameter:  yerr= linear_err
plt.bar(xvals, linear_data, width= 0.3, yerr=linear_err)

<BarContainer object of 8 artists>

In [None]:
# create Figure
plt.figure()

# get xvalue from y function range
xvals = range(len(linear_data))

# plt.bar(xvals, y_function, width=width_bar, color=color_leg)
plt.bar(xvals, linear_data, width=0.3, color='b')

# parameter:  bottom = function 1,  for stack charts
plt.bar(xvals, quadratic_data, width=0.3, bottom=linear_data, color='r')


In [None]:
# create figure
plt.figure()

# get x vals from range of y function
xvals = range(len(linear_data))

# plt.barh(x vals, y vals, height=height_bar) 
plt.barh(xvals, linear_data, height=0.3, color='b')

# parameter left= function,  stack function
plt.barh(xvals, quadratic_data, height= 0.3, left = linear_data, color = 'r')

In [None]:
# Week 2: Dejunkifying a plot
# bars[0].set_color(#REF), get bars[0] in 0,  set color of 1 bar chart

# for bar in bars:  iterate through each bar with Y axis
    
    # convert y values into text in bars
    # plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height() - 5, str(int(bar.get_height())) + '%', 
    #             ha='center', color='w', fontsize=11)
# plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

#  create plot
plt.figure()

# create languages list
languages =['Python', 'SQL', 'Java', 'C++', 'JavaScript']

# create x values arange
pos = np.arange(len(languages))

# create y values 
popularity = [56, 39, 34, 34, 29]

# change the bar colors to be less bright blue

#  plt.bar()  parameters:  x values=pos, y values=popularity, linewidth=0, color= 'color'
bars = plt.bar(pos, popularity, align='center', linewidth=0, color='lightslategrey')
# make one bar, the python bar, a contrasting color
# get the the first bar from bars in plt.bar plot

# bars[0].set_color(#REF),  set color of 1 bar chart
bars[0].set_color('#1F77B4')

# soften all labels by turning grey
# plt.xticks(x values=pos, y values=languages, alpha=transparency)
plt.xticks(pos, languages, alpha=0.8)

# TODO: remove the Y label since bars are directly labeled
# plt.ylabel('% Popularity', alpha=0.8)


plt.title('Top 5 Languages for Math & Data \nby % popularity on Stack Overflow', alpha=0.8)

# remove all the ticks (both axes), and tick labels on the Y axis
plt.tick_params(top='off', bottom='off', left='off', right='off', labelleft='off', labelbottom='on')

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# TODO: direct label each bar with Y axis values


# iterate through each bar with Y axis
for bar in bars:
    
    # convert y values into text in bars
    plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height() - 5, str(int(bar.get_height())) + '%', 
                 ha='center', color='w', fontsize=11)
plt.show()