In [39]:
%matplotlib inline
from matplotlib import pyplot as plt
from pathlib import Path
import pandas as pd
import tarfile
import urllib.request

def load_housing_data():
    tarball_path = Path("datasets/housing.tgz")
    if not tarball_path.is_file():
        Path("datasets").mkdir(parents=True, exist_ok=True)
        url = "https://github.com/ageron/data/raw/main/housing.tgz"
        urllib.request.urlretrieve(url, tarball_path)
        with tarfile.open(tarball_path) as housing_tarball:
            housing_tarball.extractall(path="datasets")
    return pd.read_csv(Path("datasets/housing/housing.csv"))



# extra code – code to save the figures as high-res PNGs for the book

IMAGES_PATH = Path() / "images" / "end_to_end_project"
IMAGES_PATH.mkdir(parents=True, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = IMAGES_PATH / f"{fig_id}.{fig_extension}"
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)



housing = load_housing_data()

In [40]:
housing.info()

In [41]:
housing.head()

In [42]:
housing["ocean_proximity"].value_counts()

In [43]:
housing.describe()

In [44]:
housing.hist(bins=30, figsize=(20,15))
plt.show()

In [45]:
housing.hist(bins=40, figsize=(20,15))
plt.show()

In [46]:
housing.hist(figsize=(20,15), bins=50, legend=True)
plt.show()

In [47]:
housing.head()