In [46]:
import pandas as pd
import numpy as np
from scipy.stats import trim_mean
from statsmodels import robust

import matplotlib.pyplot as plt
import wquantiles

In [11]:
state = pd.read_csv("./practical-statistics-for-data-scientists-master/data/state.csv")

print(state['Population'].mean())

# 0.1 drops 10% from each end.
print(trim_mean(state['Population'], 0.1))
print(state["Population"].median())

6162876.3
4783697.125
4436369.5


In [16]:
# weighted mean of murder rates.
print(np.average(state["Murder.Rate"], weights=state["Population"]))

# weighted median of murder rates.
print(wquantiles.median(state["Murder.Rate"], weights=state["Population"]))

4.445833981123393
4.4


In [27]:
print(np.quantile(state["Population"], 0.25, interpolation="linear"))
print(np.quantile(state["Population"], 0.25, interpolation="midpoint"))
print(np.quantile(state["Population"], 0.25, interpolation="nearest"))

# help(np.quantile)

1833004.25
1839667.5
1826341


In [38]:
print("Standard deviation of the population =", state["Population"].std())
print("IQR of the population =", state["Population"].quantile(0.75) - state["Population"].quantile(0.25))
print("Median absolute deviation =", robust.scale.mad(state["Population"]))

Standard deviation of the population = 6848235.347401142
IQR of the population = 4847308.0
Median absolute deviation = 3849876.1459979336


In [39]:
state["Murder.Rate"].quantile([0.05, 0.25, 0.50, 0.75, 0.95])

0.05    1.600
0.25    2.425
0.50    4.000
0.75    5.550
0.95    6.510
Name: Murder.Rate, dtype: float64

### Boxplots.

In [79]:
%matplotlib widget

plt.boxplot(state["Population"]/1_000_000)
plt.ylabel("Population (millions)")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Population (millions)')

In [65]:
binnedPopulation = pd.cut(state["Population"], 10)
for x in binnedPopulation.value_counts():
    print(x)

24
14
6
2
1
1
1
1
0
0


In [67]:
%matplotlib widget
ax = (state["Population"] / 1_000_000).plot.hist(figsize=(4, 4))
ax.set_xlabel("Population (millions)")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'Population (millions)')

In [78]:
%matplotlib widget
ax = (state["Murder.Rate"]).plot.hist(density=True, xlim=[0,12], bins=range(1,12))
state["Murder.Rate"].plot.density(ax=ax)
ax.set_xlabel("Population (millions)")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'Population (millions)')