# Introduction
`matplotlib` is a comprehensive library for creating static, animated, and interactive visualizations in Python.

<img src='https://matplotlib.org/_images/sphx_glr_scatter_demo2_0011.png'>

[Matplotlib Gallery](https://matplotlib.org/gallery/index.html)

# Imports

In [None]:
# Data Wrangling
import numpy as np
import pandas as pd

# Visulisation
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# Datetime
from datetime import datetime


In [None]:
mpl.__version__

In [None]:
mpl?

In [None]:
mpl??

# Paths & Variable Initialisations

In [None]:
plt.style.use('ggplot')

# Basics

In [None]:
x = np.linspace(0, 10, 100)
fig=plt.figure()

In [None]:
plt.subplots()

Matplotlib is a highly object oriented, on a broad level it has two major objects :-

- Figure : Think of a `figure` as a blank canvas on which you would draw.
- Axes : Think of `axes` as the tools that you would draw with

In [None]:
sines = np.sin(x)
cosines=np.cos(x)
fig=plt.figure(figsize=(12,8), dpi=120)
plt.plot(x, sines, '-')

In [None]:
plt.plot(x, cosines, '--')

In [None]:
fig=plt.figure()
plt.plot(sines, cosines, '-.')

<b>Saving a plot</b>

In [None]:
fig.savefig('sine_cosine.png')

<b>Multiple Plots</b>

In [None]:
fig, axes = plt.subplots(2)
axes[0].plot(x, sines)
axes[0].set_ylabel('Sine')

axes[1].plot(x, cosines)
axes[1].set_ylabel('Cosine')

fig.suptitle('Sine / Cosine')

# Simple Line Plot

<b>Generating Random Data</b>

In [None]:
def gen_data(shape=200, init_offset=10, multiplier = 1e3):
    _data = [np.random.randn()+init_offset]
    for _ in range(shape):
        _data.append(np.random.randn()+_data[-1])
    return np.array(_data)*multiplier


<b>Creating a Time Series</b>

In [None]:
sdata = gen_data()
sindex = pd.period_range(datetime.now(), freq='B', periods=sdata.shape[0])
sts_data = pd.Series(data=sdata, index=sindex)
sts_data.head()

In [None]:
_=sts_data.plot(figsize=(15,7),
                title='Simple Line Plot ($dateteime$ index)')

In [None]:
sts_data.index = sts_data.index.astype('str')
_=sts_data.plot(figsize=(15,7),
                title='Simple Line Plot ($object$ index)')

<b>Lets say the plot was describing a price of a commodiity in Indian Market, can we make this plot look a little better?</b>

- Add Rupee Symbol to the yticks
- Proper formatting of the datetime in the x-axis
- Add a marker to the actual datapoints to highlight there presence
- Add a legened to the plot
- Add title to the plot

In [None]:
sts_data.index = pd.to_datetime(sts_data.index)
sts_data.index.freq = pd.infer_freq(sts_data.index)
sts_data.name = 'Gold Prices'
sts_data.index

<b>Difference between plotting with `pandas` and `matplotlib`</b>

In [None]:
fig, axes =plt.subplots(figsize=(15,7))

 # Similar method - FuncFormatter
fmt = '₹{x:,.0f}'
axes.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter(fmt))

# Plot
_=axes.plot(sts_data, linestyle='-.', # Change linestyle
            label=sts_data.name, # Add label
            marker='o', markersize=4, # Add marker size
            )
_=axes.legend() # Add Legend


In [None]:
_=sts_data.plot(figsize=(15,7), legend=True, 
                linestyle='-.',
                marker='o', markersize=4, # Add marker size
                )
_=plt.title('Gold Price Fluctuation', fontsize=20)
# y-tick format
plt.gca().yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter(fmt))


<b>Plotting Multiple Line Plots</b>

In [None]:
sdata1 = gen_data()
sdata2 = gen_data()
sindex = pd.period_range(datetime.now(), freq='B', periods=sdata.shape[0])
stdf_data = pd.DataFrame(data=np.c_[sdata, sdata2], # What have i done just here??
                         columns= ['Gold Prices', 'Silver Prices'],
                         index=sindex)
stdf_data.head()

In [None]:
# y-tick format
_=stdf_data.plot(figsize=(15,7), legend=True, 
                 linestyle='-.', color=['r','g'],
                 marker='o', markersize=4, # Add marker size
                 )

_=plt.title('Gold & Silver Price Fluctuation', fontsize=20)
plt.gca().yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter(fmt))


<b>Multi-line plots on dual axis</b>

In [None]:
fig, axes =plt.subplots(figsize=(15,7))

# y-tick format
fmt = '₹{x:,.0f}'
axes.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter(fmt))

# Plot
data1 = stdf_data.iloc[:,0].copy()
_=axes.plot(data1, linestyle='-.', # Change linestyle
            label=data1.name, # Add label
            marker='o', markersize=4, # Add marker size
            )
_=axes.legend() # Add Legend

# Spawn a new axes which shared the x-axis
data2 = stdf_data.iloc[:,1].copy()
axes_dual = axes.twinx()
_=axes_dual.plot(data2, linestyle='-.', # Change linestyle
            label=data2.name, # Add label
            marker='o', markersize=4, # Add marker size
            )
_=axes_dual.legend() # Add Legend



plt.gca().yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter(fmt))



Throws an Error! Why? Because `PeriodIndex` is not an object type understood by matplotlib

In [None]:
fig, axes =plt.subplots(figsize=(15,7))

# y-tick format
fmt = '₹{x:,.0f}'
axes.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter(fmt))

# Plot
data1 = stdf_data.iloc[:,0].copy()
data1.index = data1.index.to_timestamp()
_=axes.plot(data1, linestyle='-.', # Change linestyle
            label=data1.name, # Add label
            marker='o', markersize=4, # Add marker size
            )
_=axes.legend() # Add Legend

# Spawn a new axes which shared the x-axis
data2 = stdf_data.iloc[:,1].copy()
data2.index = data2.index.to_timestamp()
axes_dual = axes.twinx()
_=axes_dual.plot(data2, linestyle='-.', # Change linestyle
            label=data2.name, # Add label
            marker='o', markersize=4, # Add marker size
            )
_=axes_dual.legend() # Add Legend


_=fig.suptitle('Gold & Silver Price Fluctuation (Independent y-axis)', fontsize=20) # Why not title?

plt.gca().yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter(fmt))




See what happens when you give `_=axes.set_title('Axes Title', fontsize=20)` command just after `_=fig.suptitle('Go....`

# Simple Scatter Plots

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()
irisDF = pd.DataFrame(iris.data , columns=iris.feature_names)
irisDF['target'] = iris.target
irisDF.head()

In [None]:
targetcol_mapping = {k:v for k,v in enumerate(iris.target_names)}
targetcol_mapping

In [None]:
irisDF.target.map(targetcol_mapping)

In [None]:
irisDF.target = irisDF.target.map(targetcol_mapping)
irisDF.head()

In [None]:
fig, axes =plt.subplots(figsize=(15,7))
axes.scatter(irisDF.iloc[:,0], irisDF.iloc[:,1],
             alpha=0.8, s=100*irisDF.iloc[:,3], c=iris.target,
             cmap='viridis', edgecolors='k')

axes.set_title(f'Scatter Plot of {irisDF.columns[0].capitalize()} & {irisDF.columns[1].capitalize()}')
axes.set_xlabel(irisDF.columns[0].capitalize())
axes.set_ylabel(irisDF.columns[1].capitalize())
axes.legend() # No easy way to do that... not in matplotlin -> Here comes seaborn

In [None]:
fig=plt.figure(figsize=(15,7))
_=sns.scatterplot(x = 'sepal length (cm)', y = 'sepal width (cm)', data = irisDF,
                  size = 'petal width (cm)', hue='target', edgecolor='k')
_=plt.title(f'Scatter Plot of {irisDF.columns[0].capitalize()} & {irisDF.columns[1].capitalize()}')



# Distribution Plot 

In [None]:
from warnings import filterwarnings
filterwarnings('ignore')
_=sns.distplot(irisDF.iloc[:,1])

In [None]:
sns.displot(irisDF.iloc[:,1], rug=True)
plt.hlines(irisDF.iloc[:,1].mean(), irisDF.iloc[:,1].min(), irisDF.iloc[:,1].max(),
           color='k', linestyles='-.')

# Joint Plot

In [None]:
plt.hexbin(x=irisDF.iloc[:,0], y=irisDF.iloc[:,1], gridsize=30, cmap='Blues')
cb = plt.colorbar(label='count in bin')

In [None]:
sns.jointplot(x=iris.feature_names[0], y=iris.feature_names[1],
              data=irisDF, kind='scatter')

# Random Testing Space