# Intro to Matplotlib

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
plt.plot([1,2,3,4]);

In [None]:
x = [1, 2, 3, 4]
y = [11, 22, 33, 44]
plt.plot(x,y);

In [None]:
# 1st method
fig = plt.figure() #creates a figure
ax = fig.add_subplot() # adds some axes
ax.plot(x,y)
plt.show()

In [None]:
# 2nd method
fig = plt.figure() # creates a figure
ax = fig.add_axes([1,1,1,1])
ax.plot(x,y) ## add some data
plt.show()

In [None]:
# 3rd method
fig, ax = plt.subplots()
ax.plot(x,y); ## add some data

## Matplotlib example workflow

In [None]:
# import matplotlib
import matplotlib.pyplot as plt

# Prepare data
x = [1, 2, 3, 4]
y = [11, 22, 33, 44]

# Setup plot
fig, ax = plt.subplots(figsize=(10, 10))

# Plot data
ax.plot(x, y)

# Customize plot
ax.set(title="Simple Plot",
      xlabel="x-axis",
      ylabel="y-axis")

# Save and show figure
fig.savefig("images/sample-plot.png")

## Making figures with NumPy arrays
* Line plot
* Scatter plot
* Bar plot
* Histogram
* Subplots

In [None]:
import numpy as np

# Create some data
x = np.linspace(0,10,100)
x[:10]

In [None]:
# Plot the data and create a line plot
fig, ax = plt.subplots()
ax.plot(x, x**2);

In [None]:
# Scatter plot
fig, ax = plt.subplots()
ax.scatter(x, np.exp(x));

In [None]:
# (Vertical) Bar plot from dictionary
player_ratings = {"Messi":94,
                 "Ronaldo":92,
                 "Neymar":91,
                 "Lingard":70}
fig, ax = plt.subplots()
ax.bar(player_ratings.keys(), player_ratings.values())
ax.set(title="Fifa Rating",
      ylabel="Rating");

In [None]:
# Horizontal bar
fig, ax = plt.subplots()
ax.barh(list(player_ratings.keys()), list(player_ratings.values()))
ax.set(title="Fifa Rating",
      ylabel="Rating");

In [None]:
# Histogram
x = np.random.randn(1000)
fig, ax = plt.subplots()
ax.hist(x);

## Two ways of subplots

In [None]:
# Way 1
fig,((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows = 2,
                                           ncols = 2,
                                           figsize=(10,5))
# Plotting to different axis
ax1.plot(x,x/2);
ax2.scatter(np.random.random(10), np.random.random(10));
ax3.bar(player_ratings.keys(), player_ratings.values());
ax4.hist(np.random.randn(1000));

In [None]:
# Way 2
fig, ax = plt.subplots(nrows=2,
                      ncols=2,
                      figsize=(10,5))
# Plotting to different axis
ax[0, 0].plot(x, x/2);
ax[0, 1].scatter(np.random.random(10), np.random.random(10));
ax[1, 0].bar(player_ratings.keys(), player_ratings.values());
ax[1, 1].hist(np.random.randn(1000));

## Plotting from pandas DataFrames

In [None]:
import pandas as pd

In [None]:
ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2021", periods=1000))
ts = ts.cumsum()
ts.plot();

In [None]:
# Make a dataframe

car_sales = pd.read_csv("car-sales.csv")
car_sales["Price"] = car_sales["Price"].replace('\D', '', regex=True)
car_sales

In [None]:
# Remove last two zeros
car_sales["Price"] = car_sales["Price"].str[:-2]
car_sales

In [None]:
car_sales["Sale Date"] = pd.date_range("1/1/2021", periods=len(car_sales))
car_sales

In [None]:
# Cumulative sum
car_sales["Price"] = car_sales["Price"].astype(int)
car_sales["Total Sales"] = car_sales["Price"].cumsum()
car_sales

In [None]:
# Plot total sales
car_sales.plot(x="Sale Date", y="Total Sales");

In [None]:
car_sales.plot(x="Odometer (KM)", y="Price", kind="scatter");

In [None]:
# Plotting to bar graph

# Random data
x = np.random.rand(10,4)
x

In [None]:
# Convert to dataframe

df = pd.DataFrame(x, columns=['a', 'b', 'c', 'd'])
df

In [None]:
df.plot.bar();

In [None]:
car_sales

In [None]:
# Bar
car_sales.plot(x="Make", y="Odometer (KM)", kind="bar");

In [None]:
# Histogram
car_sales["Odometer (KM)"].plot.hist();

In [None]:
# Different dataset
heart_disease = pd.read_csv("heart-disease.csv")
heart_disease.head()

In [None]:
# Histogram of age
heart_disease["age"].plot.hist(bins=70);

In [None]:
# Histogram of every column
heart_disease.plot.hist(figsize=(10,30),subplots=True);

## pyplot vs matplotlib OO method
* Quick -> pyplot
* Advanced -> OO

In [None]:
over_50 = heart_disease[heart_disease["age"]>50]
over_50

In [None]:
# pyplot method
over_50.plot(kind='scatter',
            x='age',
            y='chol',
            c='target');

In [None]:
## OO method
fig, ax = plt.subplots(figsize=(10,6))
over_50.plot(kind='scatter',
            x='age',
            y='chol',
            c='target',
            ax=ax);
# Set limits of x axis
ax.set_xlim([45,80])

In [None]:
## OO method breakdown
fig, ax = plt.subplots(figsize=(10, 6))

# Plot the data
scatter = ax.scatter(x=over_50["age"],
                    y=over_50["chol"],
                    c=over_50["target"])
# Customize the plot
ax.set(title="Heart Disease and Cholesterol Levels",
      xlabel="Age",
      ylabel="Cholesterol");

# Add legend
ax.legend(*scatter.legend_elements(),title="Target");

# Add a horizontal line
ax.axhline(over_50["chol"].mean(),
          linestyle='--');

In [None]:
# Setup plot
fig, (ax0, ax1) = plt.subplots(nrows=2, # 2 rows
                               ncols=1, 
                               sharex=True, 
                               figsize=(10, 8))

# Add data for ax0
scatter = ax0.scatter(over_50["age"], 
                      over_50["chol"], 
                      c=over_50["target"])
# Customize ax0
ax0.set(title="Heart Disease and Cholesterol Levels",
        ylabel="Cholesterol")
ax0.legend(*scatter.legend_elements(), title="Target")

# Mean line
ax0.axhline(y=over_50["chol"].mean(), 
            color='b', 
            linestyle='--', 
            label="Average")


# Add data for ax1
scatter = ax1.scatter(over_50["age"], 
                      over_50["thalach"], 
                      c=over_50["target"])

# Customize ax1
ax1.set(title="Heart Disease and Max Heart Rate Levels",
        xlabel="Age",
        ylabel="Max Heart Rate")
ax1.legend(*scatter.legend_elements(), title="Target")

# Mean line
ax1.axhline(y=over_50["thalach"].mean(), 
            color='b', 
            linestyle='--', 
            label="Average")

# Title the figure
fig.suptitle('Heart Disease Analysis', fontsize=16, fontweight='bold');

## Matplotlib customization

In [None]:
# Available styles
plt.style.available

In [None]:
plt.style.use('seaborn-whitegrid')
car_sales["Price"].plot();

In [None]:
plt.style.use('seaborn')
car_sales["Price"].plot();

In [None]:
plt.style.use('ggplot')
car_sales["Price"].plot();

In [None]:
x = np.random.randn(10, 4)
x

In [None]:
df = pd.DataFrame(x, columns=['a', 'b', 'c', 'd'])
df

In [None]:
ax = df.plot(kind='bar')

In [None]:
# set() method to customize
ax = df.plot(kind='bar')
ax.set(title="Random Number Bar Graph", 
       xlabel="Row", 
       ylabel="Random number")
ax.legend().set_visible(True)

## Saving a figure

In [None]:
fig

In [None]:
fig.savefig("heart-disease-analysis.png")