# Health Stats Part 3: Numpy Structured Arrays

<!--- Paste in your explanation from part 1.  --->
Waist-to-hip ratio is a ratio of the circumference of the waist to that of the hips. To calculate it you take the size of your hips vs your waist. Waist divided by hip measurement (W ÷ H). 

## Source Data 

<!--- Paste in your column definition table from part 2. --->
* ID (integer)
* Waist (integer)
* Hip (integer)
* Gender (string)

## Data Import

In [7]:
import numpy as np

rows = np.genfromtxt('w2h_data.csv',delimiter=',',skip_header=1,
                     dtype=[('ID','i8'),('Waist','f8'),('Hip','f8'),('Gender','U1')])
rows

array([( 1, 30., 32., 'M'), ( 2, 32., 37., 'M'), ( 3, 30., 36., 'M'),
       ( 4, 33., 39., 'M'), ( 5, 29., 33., 'M'), ( 6, 32., 38., 'M'),
       ( 7, 33., 42., 'M'), ( 8, 30., 40., 'M'), ( 9, 30., 37., 'M'),
       (10, 32., 39., 'M'), (11, 24., 35., 'F'), (12, 25., 37., 'F'),
       (13, 24., 37., 'F'), (14, 22., 34., 'F'), (15, 26., 38., 'F'),
       (16, 26., 37., 'F'), (17, 25., 38., 'F'), (18, 26., 37., 'F'),
       (19, 28., 40., 'F'), (20, 23., 35., 'F')],
      dtype=[('ID', '<i8'), ('Waist', '<f8'), ('Hip', '<f8'), ('Gender', '<U1')])

array([0.9375    , 0.86486486, 0.83333333, 0.84615385, 0.87878788,
       0.84210526, 0.78571429, 0.75      , 0.81081081, 0.82051282,
       0.68571429, 0.67567568, 0.64864865, 0.64705882, 0.68421053,
       0.7027027 , 0.65789474, 0.7027027 , 0.7       , 0.65714286])

## Calculations

In [9]:
# Goal: For each row of data calculate and store the w2h_ratio and shape.

# calculate the waist to hips ratios
w2h_ratios = rows['Waist']/rows['Hip']

# determine who is male (True) or female (False)
males = (rows['Gender']=='M')

# males with w2h ration > 0.9 are apple shaped
male_apples = np.logical_and((w2h_ratios>0.9),males)

# females with w2h ratio > 0.8 are apple shaped
female_apples = np.logical_and((w2h_ratios>0.8),np.logical_not(males))

# combines the male_apples and female_apples
apples=np.logical_or(male_apples,female_apples)

# works like an if then else statement
shapes=np.where(apples,'Apple','Pear')

# The code below creates a new results array 

# add the two new columns to the dtype descriptor
dt = np.dtype(rows.dtype.descr + [('W2H Ratio',float),('Shape','U5')])

# create a new results array with the same number of rows and the new dtype
results = np.zeros(rows.shape,dtype=dt)

# copy over the rows data
for c in rows.dtype.names:
    results[c]=rows[c]
    
# add the two new columns
results['W2H Ratio']=w2h_ratios
results['Shape']=shapes
     

## Output

In [10]:
# Goal: pretty print the rows as an HTML table

# Note: this works, but we can do this much better with pandas
html_table = '<table><tr><th>'
html_table += "</th><th>".join(results.dtype.names)
html_table += '</th></tr>'
for row in results:
    html_table += "<tr><td>"
    html_table += "</td><td>".join(str(v) for v in row)
    html_table += "</td></tr>"
html_table += "</table>"

from IPython.display import HTML, display
display(HTML(html_table))

# Export to "StatsResults.csv"
np.savetxt("StatsResults.csv",results,fmt='%s',delimiter=',',header=','.join(results.dtype.names),comments="")

ID,Waist,Hip,Gender,W2H Ratio,Shape
1,30.0,32.0,M,0.9375,Apple
2,32.0,37.0,M,0.8648648648648649,Pear
3,30.0,36.0,M,0.8333333333333334,Pear
4,33.0,39.0,M,0.8461538461538461,Pear
5,29.0,33.0,M,0.8787878787878788,Pear
6,32.0,38.0,M,0.8421052631578947,Pear
7,33.0,42.0,M,0.7857142857142857,Pear
8,30.0,40.0,M,0.75,Pear
9,30.0,37.0,M,0.8108108108108109,Pear
10,32.0,39.0,M,0.8205128205128205,Pear
