# Introduction to Data Visualization with the Matplotlib library

This Notebook is a companion to my "Introduction to Data Visualization with the Matplotlib library" article which you can read [here]() if you're interested. 

This notebook contains all the code samples used in the article so that you can run each one individually and see the resulting plot straight away. Don't forget to run the imports cell before the plots.

## Imports

In [0]:
from matplotlib import pyplot as plt
import numpy as np

## Line plots

In [0]:
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
doubles = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
squares = [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

# Plot the base numbers against their doubles and against their squares
plt.plot(nums, doubles, label="Doubles", color="red")
plt.plot(nums, squares, label="Squares", color="blue")

# Give a label to each axis
plt.xlabel("Number")
plt.ylabel("Result")
# Give a title to the graph
plt.title("Evolution of doubles and squares for the range [1,10]")
# By default the legend is drawn in the top left corner
plt.legend()
plt.tight_layout()
plt.show()

## Scatter plot

In [0]:
x_coords = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
fav_nums = [9, 37, 45, 32, 46, 56, 28, 9, 16, 68, 64, 100, 25, 1, 59, 50, 31, 96, 13, 76]

# Plot the favorite numbers of twenty individuals
plt.scatter(x_coords, fav_nums, marker="x", color="red")

plt.ylabel("Favorite number")
# Hide the ticks of the X axis
plt.xticks([])
plt.title("Favorite number of twenty individuals")
plt.tight_layout()
plt.show()

## Pie chart

In [0]:
# Slice values, labels and colors
slices = [1, 9, 6, 7, 8]
labels = ["Dwight", "Pam", "Robert", "Michael", "Jim"]
colors = ["#ff9999", "#66b3ff", "#99ff99", "#ffcc99", "#cccccc"]

# Plot the number of movies watched in 2019 by five people, in a pie chart
# Show the percentages of each slice, with two decimal places
plt.pie(slices, labels=labels, colors=colors, autopct="%1.2f%%")

plt.title("Number of movies watched at the cinema in 2019")
plt.tight_layout()
plt.show()

## Bar chart

In [0]:
pairs_owned = [16, 9, 9, 6]
options = ["One", "Two", "Three", "Four+"]

# Plot the years of experience working with Python of 40 individuals in a bar chart
plt.bar(options, pairs_owned)

plt.title("Years of experience working with Python (n=40)")
plt.ylabel("Number of respondents")
plt.tight_layout()

plt.show()

## Horizontal bar chart

In [0]:
pairs_owned = [16, 9, 9, 6]
options = ["One", "Two", "Three", "Four+"]

# Plot the years of experience working with Python of 40 individuals in a bar chart
plt.barh(options, pairs_owned)

plt.title("Years of experience working with Python (n=40)")
plt.xlabel("Number of respondents")
plt.tight_layout()
plt.show()

## Histogram

In [0]:
ages = [83, 50, 88, 31, 37, 52, 81, 58, 23, 60, 51, 62, 36, 95, 64, 59, 91, 70, 35,
        94, 61, 65, 96, 21, 95, 78, 99, 33, 29, 35, 29, 98, 54, 48, 97, 41, 29, 82,
        67, 55, 37, 99, 20, 69, 70, 98, 88, 41, 30, 58, 96, 33, 25, 52, 40, 69, 40,
        32, 50, 51, 64, 57, 75, 87, 37, 37, 82, 68, 65, 96, 45, 27, 52, 86, 51, 45,
        52, 67, 72, 98, 84, 76, 43, 26, 44, 41, 58, 19, 84, 21, 87, 46, 84, 76, 61,
        74, 32, 58, 66, 29]
# Bin edges (class intervals)
bins = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

# Plot the ages of 100 people, using the specified class intervals
plt.hist(ages, bins=bins, color="red", edgecolor="black")

plt.title("Distribution of ages in a group of 100 people")
plt.xlabel("Ages")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

## Fill between

In [0]:
days = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# Need to save the data for the Y axis as NumPy arrays because the\
# `fill_between()` method needs to receive arrays instead of lists
company_sales = np.array([2, 6, 9, 12, 8, 15, 20, 32, 27, 30])
competition_sales = np.array([1, 2, 4, 5, 13, 12, 18, 22, 34, 38])

plt.plot(days, company_sales, color="blue", label="Company A")
# Plot the competition with a dashed black line
plt.plot(days, competition_sales, linestyle="--", color="black", label="Competition")

# Given the plot of the sales of the company and its competition, fill the\
# area between both plots where the company sold more than its competition
plt.fill_between(
    days, company_sales, competition_sales,
    where=(company_sales > competition_sales),
    interpolate=True,
    color="green",
    alpha=0.25,
    label="Company A sold more"
)

# Given the plot of the sales of the company and its competition, fill the\
# area between both plots where the competition sold more as much as the\
# company
plt.fill_between(
    days, competition_sales, company_sales,
    where=(company_sales <= competition_sales),
    interpolate=True,
    color="red",
    alpha=0.25,
    label="Competition sold more"
)

plt.title("Company A vs Competition sales")
plt.xlabel("Day")
plt.ylabel("Number of sales (thousands)")
plt.legend()
plt.show()