# An example: US presidents' height
(data and example structure come from _Python Data Science Handbook (2nd edition)_ by Jake VanderPlas)


In [None]:
import numpy as np
import pandas as pd

data = pd.read_csv("data/president_heights.csv")

## extract height data from the dataframe as a numpy array

In [None]:

heights = np.array(data["height(cm)"])
print(heights)

## compute summary statistics using the numpy functions we learnt

In [None]:
print("Mean height:       ", heights.mean())
print("Standard deviation:", heights.std())
print("Minimum height:    ", heights.min())
print("Maximum height:    ", heights.max())

In [None]:
print("25th percentile:   ", np.percentile(heights, 25))
print("Median:            ", np.median(heights))
print("75th percentile:   ", np.percentile(heights, 75))

## Convert the height to feet and inches

- $1 \space inch = 2.54 \space cm$
- $1 \space foot = 12 \space inches$



In [None]:
heights_in_inches = heights/2.54
print(heights_in_inches)

In [None]:
heights_in_feet = heights_in_inches // 12
heights_remainder_inches = heights_in_inches % 12

print("Height in feet")
print(heights_in_feet)
print("\nremainding inches")
print(heights_remainder_inches)

In [None]:
# Let's print out the height in feet
# note: \ escapes the following double quotation sign.

for i in range(0, len(heights)):
    name = data.iloc[i]['name']
    height_in_cm = heights[i]
    height_in_feet = heights_in_feet[i]
    height_remainder_inches = heights_remainder_inches[i]
    print(f"{name}'s is {height_in_cm} cm tall,"
          f"equivalent to {height_in_feet:.0f}'{height_remainder_inches:.0f}\"")


## Optional: Plot the height

In [None]:
import matplotlib.pyplot as plt

plt.hist(heights)
plt.title('Height Distribution of US Presidents')
plt.xlabel('height (cm)')
plt.ylabel('number')