### Chapter 3 Matplotlib Data Visualization

Exercise 3

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import ticker
import statsmodels.formula.api as smf

In [None]:
src_file = Path.cwd() / 'data' / 'raw' / 'EPA_fuel_economy.csv'
image_dir = Path.cwd() / 'images'

In [None]:
df = pd.read_csv(src_file)

In [None]:
df.head()

In [None]:
%matplotlib inline

In [None]:
avg_by_year = df.groupby(['year'], as_index = False).agg({'fuelCost08': 'mean'}).round(2)
avg_by_year

In [None]:
mpg_model = smf.ols("fuelCost08 ~ year", data = avg_by_year).fit()

In [None]:
mpg_model.fittedvalues

In [None]:
mpg_model.summary()

In [None]:
fig, ax = plt.subplots()
ax.scatter(x = avg_by_year['year'], y = avg_by_year['fuelCost08'])
ax.plot(avg_by_year['year'], mpg_model.fittedvalues);

In [None]:
fig, ax = plt.subplots()
ax.scatter(x = avg_by_year['year'], y = avg_by_year['fuelCost08'])
ax.plot(avg_by_year['year'], mpg_model.fittedvalues)
ax.set_xlim((2010,2020))
ax.set_ylim((1800,2200));

In [None]:
df_2010 = df.query('year >= 2010').copy()
avg_fuel_cost = df_2010['fuelCost08'].mean().round(0)
avg_fuel_cost

In [None]:
mpl.style.use('ggplot')
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, 
                               figsize=(12,6))

ax1.scatter(x=avg_by_year['year'], 
            y=avg_by_year['fuelCost08'])
ax1.plot(avg_by_year['year'], 
         mpg_model.fittedvalues, 
         color='forestgreen', linestyle='--')

ax1.set(xlabel='Year', ylabel='Fuel Cost', 
        ylim=(1850, 2200), xlim=(2010,2020))
ax1.yaxis.set_major_formatter('${x:,.0f}')
ax1.axhline(avg_fuel_cost, linestyle=':', color='orange')
ax1.annotate(f'${avg_fuel_cost}', xy=(2017, avg_fuel_cost))

ax2.hist(df_2010['fuelCost08'], color = "skyblue", ec="white")
ax2.xaxis.set_major_formatter('${x:,.0f}')
ax2.set(xlabel='Fuel Costs', ylabel='Num autos')
ax2.axvline(avg_fuel_cost, linestyle=':')
ax2.annotate(f'${avg_fuel_cost}', xy=(avg_fuel_cost, 3500))

fig.suptitle('EPA Estimated FuelCosts', 
             weight='bold', size=14)
fig.savefig(image_dir/'line_hist.png', 
            transparent=False, dpi=200, bbox_inches="tight")