In [None]:
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

#for showing plots without calling plt.show()
%matplotlib inline

# https://matplotlib.org/stable/api/
# https://matplotlib.org/stable/tutorials/index.html

# 1. Plotting Types

Two types of plotting are used with Matplotlib

1. Pyplot Functional Interface
2. Object Oriented Interface

In [None]:
# Here we create data for x and y as python list. 
# But both plt and sns can take numpy array vector, pytorch tensors etc.
x = [1, 2, 4, 6, 8]
y = [2, 4, 6, 12, 16]

## 1.1 Pyplot Functional Interface

In [None]:
# plt interface
plt.plot(x,y)

## 1.2 Object Oriented interface

In [None]:
# object-oriented interface. this will be used more as get advanced
fig, ax = plt.subplots()  # Create a figure containing a single axes
ax.plot(x, y) # Plot some data on the axes

# 2. Basic plotting and Elementary Customization

In [None]:
a = np.linspace(0,10,1000)
b = np.sin(a)
plt.plot(a,b)

In [None]:
# Multiple plots in same figure. Colors are picked automatically by Matplotlib
a = np.linspace(0,10,1000)

b1 = np.sin(a)
b2 = np.cos(a)
b3 = b1+b2

plt.plot(a,b1)
plt.plot(a,b2)
plt.plot(a,b3)

### Note: Different types of elementary customizations

Inline customization per plot - Passed as params to plt.plot() function)
1. color
2. line style

Customization per interaction / for a Cell strictly speaking - set by invoking plt.setXYZ
1. x and y limit
2. figure size

Global customizations - once set this impacts all further plots (VERY IMP).
1. Plot style

## 2.1 Inline customization

### 2.1.1 Line color

In [None]:
# NOTE: color attribute is specified in 3 different ways
plt.plot(a,b1,color='blue')
plt.plot(a,b2,color='g')
plt.plot(a,b3,color=(0.5,0.2,0.4))

### 2.1.2 Line Style

In [None]:
# NOTE: line style is 3rd positional attribute. 
# But instead of remembering this, better to use named attribute as in next example
plt.plot(a,b1,'-.k')
plt.plot(a,b2,':g')
plt.plot(a,b3,'--r')

In [None]:
# 2 named attributes linestyle and color are provided
plt.plot(a,b,linestyle='-.',color='green')

## 2.2 Per Cell Customization

### 2.2.1 X Y Plot Range

In [None]:
plt.plot(a,b1,'-.k')
plt.plot(a,b2,':g')
plt.plot(a,b3,'--r')

plt.xlim(-5,15)
plt.ylim(-3,2.5)

### 2.2.2 Figure Size 

In [None]:
plt.figure(figsize=(8,8))
plt.plot(a,b)

In [None]:
# Note subsequent plots revert to original figure size
plt.plot(a,b)

## 2.3 Global Customization

### 2.3.1 Precaution: Make a copy default config before global customization 

Apart from above customizations, other customizations such as plot style etc. change the global default. It is best to store a copy of the default params before further customization.
This is done differently in Jupyter and normal Python

In [None]:
# This works in normal python but not in Jupyter
# mpl.rcParams.update(mpl.rcParamsDefault)

# Run these lines For Jupyter to reset
default_config = dict(mpl.rcParams)
mpl.rcParams.update(default_config)

### 2.3.2 Check available styles and choose

In [None]:
# check available styles in plt
print(plt.style.available)

In [None]:
plt.style.use('classic') # Pick one style

fig = plt.figure() # Build a empty figure

ax = plt.axes() # watch how both ax and fig can be obtained in a single subplots() call in previous example

In [None]:
plt.style.use('seaborn-whitegrid') #now the style has changed
fig = plt.figure()
ax = plt.axes()

In [None]:
# Plotting now impacts the grid style
plt.plot(a,b)

In [None]:
#Reset style
mpl.rcParams.update(default_config)

In [None]:
#Plot again. Notice the style has defaulted to original
plt.plot(a,b)

## 2.4 What can be customized
Look at the anatomy of the plot to see this. Image courtesy (https://matplotlib.org/stable/tutorials/introductory/usage.html)

In [None]:
from IPython.core.display import Image, display

image_file = "https://raw.githubusercontent.com/datavector-io/datascience/main/01-Data_Visualization_Basics/anatomy.png"

display(Image(filename=image_file))

### 2.4.1 Pyplot Functional Style customization

In [None]:
x = np.linspace(0, 2, 100)

plt.plot(x, x, label='linear')  # Plot some data on the (implicit) axes.
plt.plot(x, x**2, label='quadratic')  # etc.
plt.plot(x, x**3, label='cubic')
plt.xlabel('x label')
plt.ylabel('y label')
plt.title("Simple Plot")
plt.legend()

### 2.4.2 OO Style customization

In [None]:
x = np.linspace(0, 2, 100)

# Note that even in the OO-style, we use `.pyplot.figure` to create the figure.
fig, ax = plt.subplots()  # Create a figure and an axes.
ax.plot(x, x, label='linear')  # Plot some data on the axes.
ax.plot(x, x**2, label='quadratic')  # Plot more data on the axes...
ax.plot(x, x**3, label='cubic')  # ... and some more.
ax.set_xlabel('x label')  # Add an x-label to the axes.
ax.set_ylabel('y label')  # Add a y-label to the axes.
ax.set_title("Simple Plot")  # Add a title to the axes.
ax.legend()  # Add a legend.

## 2.5 Saving Repetition
Typically one has to plot the same way over and over again with different data sets, which leads to custom functions for plotting

In [None]:
def plot_data(ax, data1, data2, param_dict={'marker': '*', 'linestyle': '--'}):
    out = ax.plot(data1, data2, **param_dict)
    return out

x = np.linspace(0, 2, 10)
fig, ax = plt.subplots()
plot_data(ax, x, x**2) # no need to pass linestyle marker styles etc.

# 3. Simple Visualizations

## 3.1 Subplots

In [None]:
# First way of subplotting
x = np.linspace(0, 2, 10)

fig, (ax1, ax2) = plt.subplots(1, 2) # 1 row with 2 subplots

ax1.plot(x, x**2)
ax2.plot(x, x**3)

In [None]:
# Second way of subplotting
x = np.linspace(0, 2, 10)

plt.subplot(131)
plt.plot(x, x)
plt.subplot(132)
plt.plot(x, x**2)
plt.subplot(133)
plt.plot(x, x**3)
plt.suptitle('Second way of Sub Plotting in default figure size')

plt.figure(figsize=(12, 3)) # By default, plt attempts to fit all plots in a narrow band as above. Increase this

plt.subplot(131)
plt.plot(x, x)
plt.subplot(132)
plt.plot(x, x**2)
plt.subplot(133)
plt.plot(x, x**3)
plt.suptitle('Second way of Sub Plotting')

## 3.2 Plotting Numerical Variables
with plot() and scatter()

In [None]:
x = np.linspace(0, 2, 10)

plt.subplot(121)
plt.plot(x, x)
plt.subplot(122)
plt.scatter(x, x**2)

In [None]:
# evenly sampled time at 200ms intervals
t = np.arange(0., 5., 0.2)

# red dashes, blue squares and green triangles
plt.plot(t, t, 'r--', t, t**2, 'bs', t, t**3, 'g^')
plt.show()

### 3.2.1 Plotting Numerical data straight from Pandas

In [None]:
# simulating a pandas dataframe
data = {'a': np.arange(50),
        'c': np.random.randint(0, 50, 50),
        'd': np.random.randn(50)}
data['b'] = data['a'] + 10 * np.random.randn(50)
data['d'] = np.abs(data['d']) * 100

# what is s masking? 
# https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html
plt.scatter('a', 'b', c='c', s='d', data=data)
plt.xlabel('entry a')
plt.ylabel('entry b')
plt.show()

## 3.3 Demo

In [None]:
# create some random data
x = np.random.randn(100)
rand_array = np.random.randn(100)
print(type(rand_array))
print(rand_array.shape)
y = 3 * x + rand_array
y

In [None]:
plt.scatter(x,y) #v matplotlib scatter

In [None]:
# matplotlib plot
T = np.array([6, 7, 8, 9, 10, 11, 12])
power = np.array([1.53E+03, 5.92E+02, 2.04E+02, 7.24E+01, 2.72E+01, 1.10E+01, 4.70E+00])

plt.plot(T,power)
plt.show()

In [None]:
from scipy.interpolate import make_interp_spline, BSpline
# 300 represents number of points to make between T.min and T.max
xnew = np.linspace(T.min(), T.max(), 300) 

spl = make_interp_spline(T, power, k=3)  # type: BSpline
power_smooth = spl(xnew)

plt.plot(xnew, power_smooth)
plt.show()

In [None]:
def f(t):
    return np.exp(-t) * np.cos(2*np.pi*t)

t1 = np.arange(0.0, 5.0, 0.1)
t2 = np.arange(0.0, 5.0, 0.02)

plt.figure()
plt.subplot(211)
#multiple plots in single invocation
plt.plot(t1, f(t1), 'bo', t2, f(t2), 'k') # bo means blue circle g^ means green triangle etc.

plt.subplot(212)
plt.plot(t2, np.cos(2*np.pi*t2), 'r--') # r-- means red dash
plt.show()

## 3.4 Plotting Categorical Variables

### 3.4.1 Bar chart

In [None]:
names = ['group_a', 'group_b', 'group_c']
values = [1, 10, 100]
plt.bar(names, values)

### 3.4.2 Histogram

In [None]:
mu, sigma = 100, 15
x = mu + sigma * np.random.randn(10000)

# the histogram of the data
n, bins, patches = plt.hist(x, 50, density=1, facecolor='g', alpha=0.75)


plt.xlabel('Smarts')
plt.ylabel('Probability')
plt.title('Histogram of IQ')
plt.text(60, .025, r'$\mu=100,\ \sigma=15$')
plt.axis([40, 160, 0, 0.03])
plt.grid(True)
plt.show()

### 3.4.3 Side by side bar charts

In [None]:
# Setting the data
years = [2013,2014,2015,2016,2017,2018,2019,2020,2021]
Ford = [0,0,4,0,0,0,1,1,1]
Honda = [2,0,0,0,1,2,2,2,2]
Toyota = [1,2,4,7,8,18,18,20,22]
Chevrolet = [0,0,0,1,1,3,4,4,4]
Dodge = [4,0,0,8,3,4,8,10,10]

# set bar width
width=0.15

# axis index
years_index = np.arange(len(years))

# clear reset plt style
plt.style.use('default')
plt.style.use(['ggplot'])

plt.figure(figsize=(8,5))

# plt interface approach for bar charts
plt.bar(years_index-2*width, Ford, color='#c6ddf1', label='Ford', width=width, linewidth=0.4,edgecolor='darkgrey')
plt.bar(years_index-width, Honda, color='#99b6ce', label='Honda', width=width)
plt.bar(years_index, Toyota, color='#6d91ad', label='Toyota', width=width)
plt.bar(years_index+width, Chevrolet, color='#416e8c', label='Chevrolet', width=width)
plt.bar(years_index+2*width, Dodge, color='#004c6d', label='Dodge', width=width)

plt.legend()
plt.title('Car model ownership')
plt.xlabel('Year')
plt.xticks(ticks=years_index, labels=years)
plt.ylabel('Count')
#plt.yticks(list(np.arange(df_cumsum['count'].max()+1)))
plt.show()

## 3.5 More Plots
https://matplotlib.org/stable/tutorials/introductory/sample_plots.html#sphx-glr-tutorials-introductory-sample-plots-py

# 4. Other Customization

In [None]:
plt.title(r'$\sigma_i=15$') #Use mathemtical symbols in text etc

In [None]:
# Add annotations in plot
ax = plt.subplot()

t = np.arange(0.0, 5.0, 0.01)
s = np.cos(2*np.pi*t)
line, = plt.plot(t, s, lw=2)

plt.annotate('local max', xy=(2, 1), xytext=(3, 1.5),
             arrowprops=dict(facecolor='black', shrink=0.05),
             )

plt.ylim(-2, 2)
plt.show()

In [None]:
# Non linear axes

# Fixing random state for reproducibility
np.random.seed(19680801)

# make up some data in the open interval (0, 1)
y = np.random.normal(loc=0.5, scale=0.4, size=1000)
y = y[(y > 0) & (y < 1)]
y.sort()
x = np.arange(len(y))

# plot with various axes scales
plt.figure()

# linear
plt.subplot(221)
plt.plot(x, y)
plt.yscale('linear')
plt.title('linear')
plt.grid(True)

# log
plt.subplot(222)
plt.plot(x, y)
plt.yscale('log')
plt.title('log')
plt.grid(True)

# symmetric log
plt.subplot(223)
plt.plot(x, y - y.mean())
plt.yscale('symlog', linthresh=0.01)
plt.title('symlog')
plt.grid(True)

# logit
plt.subplot(224)
plt.plot(x, y)
plt.yscale('logit')
plt.title('logit')
plt.grid(True)

# Adjust the subplot layout, because the logit one may take more space
# than usual, due to y-tick labels like "1 - 10^{-3}"
plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25,
                    wspace=0.35)

plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

x = np.random.randint(100, size=(100))
y = np.random.randint(100, size=(100))
colors = np.random.randint(100, size=(100))
sizes = 10 * np.random.randint(100, size=(100))

plt.scatter(x, y, c=colors, s=sizes, alpha=0.5, cmap='nipy_spectral')

plt.colorbar()

plt.show()