In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
sn.set()

## what are we going to cover ?

- Matploylib workflow
- Importing Matplotlib and the 2 ways of plotting
- Plotting data from NumPy arrays
- Plotting data from pandas DataFrames
- Customizing plots
- Saving and sharing plots

In [None]:
plt.plot(); # if we dont want to put ; at the end, we can use plt.show() at the end of the script

In [None]:
plt.plot([1, 2, 3, 4]);

In [None]:
x = [1, 2, 3, 4]
y = [11, 22, 33, 44]
plt.plot(x,y);

In [None]:
# 1st method
fig = plt.figure() # Create figure
ax = fig.add_subplot() # adds some axes
plt.show()

In [None]:
# 2nd method
fig = plt.figure()
ax = fig.add_axes([1, 1, 1, 1])
ax.plot(x, y) # add some data
plt.show()

In [None]:
# 3th method (recommended)
fig, ax = plt.subplots()
ax.plot(x, [50, 100, 200, 250]); # add some data

# Matplotlib example workflow 

In [None]:
# 0. Import matplotlib and get it ready for plotting in Jupyter
%matplotlib inline
import matplotlib.pyplot as plt

# 1. Prepare data 
x = [1, 2, 3, 4]
y = [11, 22, 33, 44]

# 2. Setup plot
fig, ax = plt.subplots(figsize=(10, 10)) # (Width, Height )

# 3. Plot data 
ax.plot(x, y)

# 4. Customize plot
ax.set(title="Simple Plot",
      xlabel= "x-axis",
      ylabel="y-axis")

# 5. Save & show
fig.savefig("../data/figures/sample-plot.png")

## Making figures with NumPy arrays

we will create :

* Line plot
* Scatter plot
* Bar plot
* Histogram
* Subplots

In [None]:
# Create some data
x = np.linspace(0, 10, 100)
x[:10]

In [None]:
# Plot the data
fig, ax = plt.subplots()
ax.plot(x, x**2);

In [None]:
# Use the same data to make a scatter plot
fig, ax = plt.subplots()
ax.scatter(x, np.exp(x));

In [None]:
# Another scatter plot
fig, ax = plt.subplots()
ax.scatter(x, np.sin(x));

In [None]:
# Another scatter plot
fig, ax = plt.subplots()
ax.scatter(x, np.cos(x));

In [None]:
# Make a vertical plot from dictionary
nut_butter_prices = {"Almond butter": 10,
                    "Peanut butter": 8,
                    "Cashew butter": 12}

fig, ax = plt.subplots()
ax.bar(nut_butter_prices.keys(), height=nut_butter_prices.values())
ax.set(title="Eric's Butter Store",
      ylabel="Price ($)");

In [None]:
# Make a horizontal plot from dictionary
nut_butter_prices = {"Almond butter": 10,
                    "Peanut butter": 8,
                    "Cashew butter": 12}

fig, ax = plt.subplots()
ax.barh(list(nut_butter_prices.keys()), list(nut_butter_prices.values()))
ax.set(title="Eric's Butter Store",
      xlabel="Price ($)");

In [None]:
# Make histogram
x = np.random.randn(1000)
fig, ax = plt.subplots()
ax.hist(x);

# Make some subplots
# - Option 1 (reccommended):

In [None]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2,
                                            ncols=2,
                                            figsize=(10, 5))

# plot to each different axis
ax1.plot(x, x/2);
ax2.scatter(np.random.random(10), np.random.random(10));
ax3.hist(np.random.randn(1000));
ax4.bar(nut_butter_prices.keys(), height=nut_butter_prices.values());

# - Option 2:

In [None]:
fig, ax = plt.subplots(nrows=2,
                       ncols=2,
                       figsize=(10, 5))

# plot to each different index
ax[0, 0].plot(x, x/2);
ax[0, 1].scatter(np.random.random(10), np.random.random(10));
ax[1, 0].hist(np.random.randn(1000));
ax[1, 1].bar(nut_butter_prices.keys(), height=nut_butter_prices.values());

## Plotting from pandas DataFrames

In [None]:
# first example:
ts = pd.Series(np.random.randn(1000),
              index=pd.date_range('1/1/2000', periods=1000))

ts = ts.cumsum()
ts.plot();

In [None]:
df = pd.read_csv("../data/raw/car-sales.csv")

In [None]:
df.head()

In [None]:
# Removing $,. from Price
df['Price'] = df['Price'].str.replace('[\$\,\.]', '')

In [None]:
# Remove the two last zeros
df['Price'] = df['Price'].str[:-2]

In [None]:
df

In [None]:
# Adding date column
df['Sale Date'] = pd.date_range('1/1/2020', periods=len(df))

In [None]:
df

In [None]:
# Adding Total Sales column
df['Total Sales'] = df['Price'].astype(int).cumsum()

In [None]:
df

In [None]:
# Let's plot the totale sales
df.plot(x="Sale Date", y ="Total Sales");

In [None]:
df['Price'] = df['Price'].astype(int)

In [None]:
# Scatter plot
df.plot(x= "Odometer (KM)", y="Price", kind="scatter");

In [None]:
x = np.random.rand(10, 4)
x

In [None]:
df_x = pd.DataFrame(x, columns=['a', 'b', 'c', 'd'])

In [None]:
df_x

In [None]:
df_x.plot(kind='bar');

In [None]:
df

In [None]:
df.plot(x="Make", y="Odometer (KM)", kind='bar');

In [None]:
df.plot(x="Make", y="Price", kind='bar');

In [None]:
avg_KM_by_Make = df.groupby("Make")['Odometer (KM)'].agg("mean")
avg_KM_by_Make

In [None]:
avg_KM_by_Make.plot.bar();

In [None]:
avg_price_by_make = df.groupby("Make")['Price'].agg("mean")
avg_price_by_make

In [None]:
avg_price_by_make.plot.bar();

In [None]:
df['Odometer (KM)'].plot.hist(bins=10);

In [None]:
df['Odometer (KM)'].plot(kind="hist");

In [None]:
heart_disease = pd.read_csv('../data/raw/heart-disease.csv')

In [None]:
heart_disease.head()

In [None]:
# Create a histogram of age
heart_disease['age'].plot.hist();

In [None]:
heart_disease.plot.hist(subplots=True);

In [None]:
heart_disease.plot.hist(figsize=(10, 30),subplots=True);

- When plotting something quickly, we can use the (`pyplot`) method

- When plotting something more advanced, use the (`OO`) method


In [None]:
over_50 = heart_disease[heart_disease['age'] > 50]

In [None]:
len(over_50)

In [None]:
over_50.head()

In [None]:
# Plotting directly with pyplot method
over_50.plot(kind='scatter',
            x='age',
            y='chol',
            c='target'); # C here is used for colour => we will colour by the target column

In [None]:
# plotting based on the OO method
fig, ax = plt.subplots(figsize=(10, 6))
over_50.plot(kind='scatter',
            x='age',
            y='chol',
            c='target',
            ax=ax);

In [None]:
# plotting based on the OO method miwex with pyplot method
fig, ax = plt.subplots(figsize=(10, 6))
over_50.plot(kind='scatter',
            x='age',
            y='chol',
            c='target',
            ax=ax)
ax.set_xlim([45, 100]);

## OO Matplotlib method from scratch

In [None]:
## OO Matplotlib method from scratch
fig, ax = plt.subplots(figsize=(10, 6))

# plot the data
scatter = ax.scatter(x=over_50['age'],
                    y=over_50['chol'],
                    c=over_50['target'])


# Customize the plot
ax.set(title= "Heart Disease and Cholesterol Levels",
      xlabel="Age",
      ylabel="Cholesterol")

# Add a legend
ax.legend(*scatter.legend_elements(), title='Target')

# Add a horizontal line
ax.axhline(over_50['chol'].mean(),
          linestyle='--',
          c='red');

In [None]:
over_50.head()

In [None]:
# Subplot of chol , age, thalach

fig, (ax0, ax1) = plt.subplots(nrows=2,
                              ncols=1,
                              figsize=(10, 10),
                              sharex=True)

# Add data to ax0
scatter = ax0.scatter(x=over_50['age'],
                     y=over_50['chol'],
                     c=over_50['target'])

# Customize ax0
ax0.set(title="Heart Disease and Cholesterol Levels",
       ylabel="Cholesterol")

# Add a legend to ax0
ax0.legend(*scatter.legend_elements(), title="Target")

# Add a meanline
ax0.axhline(over_50['chol'].mean(),
           linestyle='--',
           c='red')

# Add data to ax1
scatter = ax1.scatter(x=over_50['age'],
                     y=over_50['thalach'],
                     c=over_50['target'])

# Customize ax1
ax1.set(title="Heart Disease and Max Heart Rate Levels",
       xlabel="Age",
       ylabel="Cholesterol")

# Add a legend to ax1
ax1.legend(*scatter.legend_elements(), title="Target")

# Add a meanline
ax1.axhline(over_50['thalach'].mean(),
           linestyle='--',
           c='green')

# Add atitle to the figure
fig.suptitle("Heart Disease Analysis", fontsize=16, fontweight="bold");

## Customizing Matplotlib plots and getting stylish

In [None]:
## See the different styles avalaible
plt.style.available

In [None]:
df.head()

In [None]:
df['Price'].plot();

In [None]:
plt.style.use('seaborn-whitegrid')

In [None]:
df['Price'].plot();

In [None]:
# Scatter plot
df.plot(x= "Odometer (KM)", y="Price", kind="scatter");

In [None]:
df['Price'].plot();

In [None]:
from matplotlib.axes._axes import _log as matplotlib_axes_logger
matplotlib_axes_logger.setLevel('ERROR')

In [None]:
# Scatter plot
df.plot(x= "Odometer (KM)", y="Price", kind="scatter");

In [None]:
# Crete some data
x = np.random.randn(10, 4)
x

In [None]:
new_data = pd.DataFrame(x, columns=['a', 'b', 'c', 'd'])
new_data

In [None]:
ax = new_data.plot(kind='bar');

In [None]:
# customize the plot with the set() method
ax = new_data.plot(kind='bar')

# add some labels and title
ax.set(title="Random Number Bar Graph from DataFrame",
      xlabel="Row Number",
      ylabel="Random Number")

# Make the legend visible
ax.legend().set_visible(True);

In [None]:
# Set the style
plt.style.use('seaborn-whitegrid')
## OO Matplotlib method from scratch
fig, ax = plt.subplots(figsize=(10, 6))

# plot the data
scatter = ax.scatter(x=over_50['age'],
                    y=over_50['chol'],
                    c=over_50['target'],
                    cmap="winter") # this changes the colour scheme


# Customize the plot
ax.set(title= "Heart Disease and Cholesterol Levels",
      xlabel="Age",
      ylabel="Cholesterol")

# Add a legend
ax.legend(*scatter.legend_elements(), title='Target')

# Add a horizontal line
ax.axhline(over_50['chol'].mean(),
          linestyle='--',
          c='red');

## Customizing the plots by adjusting the axes limits

In [None]:
# Customizing the y and x axes limitation

# Subplot of chol , age, thalach

fig, (ax0, ax1) = plt.subplots(nrows=2,
                              ncols=1,
                              figsize=(10, 10),
                              sharex=True)

# Add data to ax0
scatter = ax0.scatter(x=over_50['age'],
                     y=over_50['chol'],
                     c=over_50['target'],
                     cmap="winter")

# Customize ax0
ax0.set(title="Heart Disease and Cholesterol Levels",
       ylabel="Cholesterol")

# Change the ax0 x-axis limits
ax0.set_xlim([50, 80])

# Add a legend to ax0
ax0.legend(*scatter.legend_elements(), title="Target")

# Add a meanline
ax0.axhline(over_50['chol'].mean(),
           linestyle='--',
           c='red')

# Add data to ax1
scatter = ax1.scatter(x=over_50['age'],
                     y=over_50['thalach'],
                     c=over_50['target'],
                     cmap="winter")

# Customize ax1
ax1.set(title="Heart Disease and Max Heart Rate Levels",
       xlabel="Age",
       ylabel="Max Heart Rate Levels")

# Change the ax1 x-axis limits
ax1.set_xlim([50, 80])

# Change the ax1 y-axis limits
ax1.set_ylim([60, 200])

# Add a legend to ax1
ax1.legend(*scatter.legend_elements(), title="Target")

# Add a meanline
ax1.axhline(over_50['thalach'].mean(),
           linestyle='--',
           c='green')

# Add atitle to the figure
fig.suptitle("Heart Disease Analysis", fontsize=16, fontweight="bold");

# Saving ans sharing our plots

In [None]:
fig.savefig("../data/figures/heart-disease-analysis-plot-saved-with-code.png")