# Introduction to Matplotlib


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
plot_data_x = np.arange(100)
plot_data_y = np.linspace(0, 10, 100)

In [None]:
plt.plot(plot_data_x)

In [None]:
# PyPlot API
plt.plot(plot_data_x, plot_data_y)

In [None]:
# Object-oriented Interface
fig, ax = plt.subplots()
ax.plot(plot_data_x, plot_data_y)
ax.set(title="Random Integers Plot", xlabel="x", ylabel="y")
fig.savefig("./figures/random-figure.png")

In [None]:
# Line Plot
fig, ax = plt.subplots()
ax.plot(np.square(plot_data_y))

In [None]:
# Scatter Plot

fig, ax = plt.subplots()
ax.scatter(plot_data_y, np.square(plot_data_y))
ax.set(title="Square")

fig, ax = plt.subplots()
ax.scatter(plot_data_y, np.sin(plot_data_y))
ax.set(title="Sine")

In [None]:
# Bar Chart

dict_data = {"Milk": 0.99, "Eggs": 2.99, "Bread": 1.49}

fig, ax = plt.subplots()
ax.bar(dict_data.keys(), dict_data.values())
ax.set(title="Supermarket Prices", xlabel="Product", ylabel="Price")

fig, ax = plt.subplots()
ax.barh(list(dict_data.keys()), list(dict_data.values()))
ax.set(title="Supermarket Prices", xlabel="Price", ylabel="Product")

In [None]:
# Histogram

plot_data_normal = np.random.randn(1000)
fig, ax = plt.subplots()
ax.hist(plot_data_normal)

In [None]:
# Multiple Graphs

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
ax1.plot(plot_data_y, np.multiply(plot_data_y, 2))
ax1.set(title="Double")
ax2.plot(plot_data_y, np.square(plot_data_y))
ax2.set(title="Square")
ax3.plot(plot_data_y, np.sin(plot_data_y))
ax3.set(title="Sine")
ax4.plot(plot_data_y, np.cos(plot_data_y))
ax4.set(title="Cosine")

In [None]:
# Graphs from Pandas Series
time_series = pd.Series(
    np.random.randn(365),
    index=pd.date_range("2022-01-01", periods=365),
)
time_series_cum_sum = time_series.cumsum()
time_series_cum_sum.plot()

In [None]:
# Pandas DataFrame Preparation

car_sales = pd.read_csv("./data/car-sales.csv")

car_sales["Odometer (KM)"].fillna(car_sales["Odometer (KM)"].mean(), inplace=True)
car_sales["Odometer (KM)"] = car_sales["Odometer (KM)"].round(0).astype(int)

car_sales["Doors"].fillna(round(car_sales["Doors"].mean()), inplace=True)
car_sales["Doors"] = car_sales["Doors"].round(0).astype(int)

car_sales["Price"] = car_sales["Price"].replace("[\$,]", "", regex=True).astype(float)
car_sales["Price"].fillna(car_sales["Price"].mean(), inplace=True)
car_sales["Price"] = car_sales["Price"].round(0).astype(int)

car_sales["Registration Date"] = pd.date_range("2023-01-01", periods=len(car_sales))

car_sales["Cumulative Sales"] = car_sales["Price"].cumsum()

In [None]:
# Pandas DataFrame Line Chart

car_sales.plot.line(
    title="Cumulative Sales",
    x="Registration Date",
    y="Cumulative Sales",
    xlabel="Date",
    ylabel="Sales",
)

In [None]:
# Pandas DataFram Scatter Chart

car_sales.plot.scatter(
    title="Price by Odometer",
    x="Odometer (KM)",
    y="Price",
    xlabel="Price",
    ylabel="Odometer (KM)",
)

In [None]:
# Pandas DataFrame grouped Bar Chart

car_sales.groupby("Make")["Price"].mean().plot.bar()