In [None]:
%matplotlib inline

In [None]:
# Dependencies
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from scipy.stats import sem

In [None]:
# Import the Boston housing data set and get description
boston_dataset = load_boston()

print(boston_dataset.DESCR)

In [None]:
# Read Boston housing data into a Pandas dataframe
housing_data = pd.DataFrame(data=boston_dataset.data,columns=boston_dataset.feature_names)
housing_data['MEDV'] = boston_dataset.target
housing_data.head()

In [None]:
# Create a bunch of samples, each with sample size of 20
nsamples = 25
div = 20
samples = [housing_data.sample(div) for x in range(0,nsamples)]

In [None]:
# Calculate means
means = [s.MEDV.mean() for s in samples]
# Calculate standard error on means
sems = [sem(s.MEDV) for s in samples]

In [None]:
# Plot sample means with error bars
fig, ax = plt.subplots()
ax.errorbar(np.arange(0, len(samples), 1)+1,means, yerr=sems, fmt="o", color="b",
            alpha=0.5, label="Mean of House Prices")
ax.set_xlim(0, len(means)+1)
ax.set_xlabel("Sample Number")
ax.set_ylabel("Mean of Median House Prices ($1000)")
plt.legend(loc="best", fontsize="small", fancybox=True)
plt.show()

In [None]:
# Calculate the range of SEM values
print(f"The range of SEM values in the sample set is {max(sems)-min(sems)}")

In [None]:
# Determine which sample has the lowest standard error value
print(f"The smallest SEM observed was {min(sems)}")
samp_index = sems.index(min(sems))
print(f"The sample with the smallest SEM is sample {samp_index+1}")

In [None]:
# Compare to the population mean
print(f"The mean of the sample 5 is {samples[samp_index].MEDV.mean()}")
print(f"The mean of the population data set is {housing_data.MEDV.mean()}")