# Bone Mineral Density
Relative spinal bone mineral density measurements on 261 North American adolescents.  The response is relative change in spinal BMD (the difference in spnbmd taken on two consecutive visits, divided by the average) over two consecutive visits, typically about one year apart. The age is the average age over the two visits.

Variables:

|        |                                                       |
|--------|-------------------------------------------------------|
|idnum   |identifies the child, and hence the repeat measurements|
|age     |average age of child when measurements were taken      |
|gender  |male or female                                         |
|spnbmd  |Relative Spinal bone mineral density measurement       |

In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
%matplotlib inline

In [2]:
df = pd.read_csv("../data/Bone Mineral Density.txt", sep='\t')
df_male = df[df.gender == 'male']
df_female = df[df.gender == 'female']
df.head()

Unnamed: 0,idnum,age,gender,spnbmd
0,1,11.7,male,0.018081
1,1,12.7,male,0.060109
2,1,13.75,male,0.005858
3,2,13.25,male,0.010264
4,2,14.3,male,0.210526


In [3]:
from sympy import *

In [4]:
def buid_basis_functions(knots):
    x_min, x_max = min(knots), max(knots)
    x = symbols('x')
    dK_1 = (x-knots[-2])**3/(knots[-1] - knots[-2])
    h = [S.One, x]
    for i in range(knots.shape[0] - 2):
        n = (x - knots[i])**3/(knots[-1] - knots[i])
        h.append(Piecewise(
            (0, x < knots[i]), 
            (n-dK_1, x > knots[-2]),
            (n, True)))
    return h

def calc_basis_matrix(basis_functions, x):
    H = np.zeros((x.shape[0], len(basis_functions)))
    for i in range(H.shape[0]):
        for j in range(H.shape[1]):
            H[i, j] = basis_functions[j].evalf(subs={'x': x[i]})
    return H

# incorrect integral calculation, check 2x4
def calc_omega_matrix(basis_functions, knots):
    s = len(basis_functions)
    basis_functions_diffs = [bs.diff('x', 2) for bs in basis_functions]
    O = np.zeros((s, s))
    for i in range(s):
        for j in range(s):
            if j > i or i <= 1 or j <= 1:
                continue
            bs_i, bs_j = basis_functions_diffs[i], basis_functions_diffs[j]
            # sympy has errors in integrating piecewise functions
            # please, be carefull
            O[i, j] = integrate(bs_i*bs_j, 
                                ('x', max(knots[i-2], knots[j-2]), knots[-1]))
            O[j, i] = O[i, j]
    return O

In [5]:
def calc_matrices(df):
    x, y = df.age.values, df.spnbmd.values
    knots = np.unique(x)
    x_plot = np.linspace(knots[0], knots[-1], 100)
    h = buid_basis_functions(knots)
    H = calc_basis_matrix(h, x)
    H_plot = calc_basis_matrix(h, x_plot)
    O = calc_omega_matrix(h, knots)
    return H, O, H_plot, x_plot

In [None]:
H_male, O_male, H_plot_male, x_plot_male = calc_matrices(df_male)
H_female, O_female, H_plot_female, x_plot_female = calc_matrices(df_female)

In [None]:
def calc_degress_of_freedom(H, O, l):
    Sl = H @ np.linalg.inv(H.T @ H + l*O) @ H.T
    return np.trace(Sl)

def calc_model_parameters(H, O, l, y):
    theta = np.linalg.inv(H.T @ H + l*O) @ H.T @ y
    return theta

In [None]:
l = 0.00022*3600
print(f'MALE DF={calc_degress_of_freedom(H_male, O_male, l):.2f}')
print(f'FEMALE DF={calc_degress_of_freedom(H_female, O_female, l):.2f}')
theta_male = calc_model_parameters(H_male, O_male, l, df_male.spnbmd.values)
theta_female = calc_model_parameters(H_female, O_female, l, df_female.spnbmd.values)

In [None]:
y_plot_male = H_plot_male @ theta_male
y_plot_female = H_plot_female @ theta_female

In [None]:
plt.figure(figsize=(7.5,5.7))
plt.plot(x_plot_male, y_plot_male, color='blue')
plt.plot(x_plot_female, y_plot_female, color='red')
plt.scatter(df_male.age.values, df_male.spnbmd.values, color='blue', s=0.5)
plt.scatter(df_female.age.values, df_female.spnbmd.values, color='red', s=0.5)