In [None]:
%%bash
# The script to give header for filter file
for DATASET in $(ls ../dataset);do
    for FILE in $(ls ../dataset/$DATASET);do
        sed -i -e '1 s/Rsq/Rsq\n/' ../dataset/$DATASET/$FILE
    done
done

# Problem setting
* 4 observations.
* x = 1, 2, 3, 4
* Y = 9, 13, 32, 41
$$Y = \beta_0 x^0 + \beta_1 x^1 + \beta_2 x^2 + \beta_3 x^3$$

# Create Desing Matrix
x = 1 -> [1,1,1,1]    
x = 2 -> [1,2,4,8]    
x = 3 -> [1,3,9,27]    
x = 4 -> [1,4,16,64]    
$\beta=[\beta_0,\beta_1,\beta_2,\beta_3]^T$
$$Y = X\beta + \epsilon$$
Usually, 
$$X^T Y =X^T X\beta$$
$$(X^T X)^{-1}X^T Y =(X^T X)^{-1}X^T X\beta$$
$$\beta=(X^T X)^{-1}X^T Y$$


In [169]:
import numpy as np
x= np.array([1,2,3,4])
X = np.vstack((x**k for k in range(4)))
Xt = np.transpose(X)
print(Xt)

[[ 1  1  1  1]
 [ 1  2  4  8]
 [ 1  3  9 27]
 [ 1  4 16 64]]


# QR decomposition
$$X=QR$$
$$(Q^T = Q^{-1}, det(Q)=1)$$

In [170]:
Q,R = np.linalg.qr(Xt)
print (Q)

[[-0.5         0.67082039  0.5         0.2236068 ]
 [-0.5         0.2236068  -0.5        -0.67082039]
 [-0.5        -0.2236068  -0.5         0.67082039]
 [-0.5        -0.67082039  0.5        -0.2236068 ]]


In [172]:
print(R)

[[ -2.          -5.         -15.         -50.        ]
 [  0.          -2.23606798 -11.18033989 -46.51021393]
 [  0.           0.           2.          15.        ]
 [  0.           0.           0.          -1.34164079]]


In [173]:
np.matmul(Q,R)

array([[ 1.,  1.,  1.,  1.],
       [ 1.,  2.,  4.,  8.],
       [ 1.,  3.,  9., 27.],
       [ 1.,  4., 16., 64.]])

In [183]:
Qt = np.transpose(Q)
np.matmul(Q, Qt)

array([[ 1.00000000e+00,  2.77555756e-17,  8.32667268e-17,
        -5.55111512e-17],
       [ 2.77555756e-17,  1.00000000e+00,  0.00000000e+00,
        -1.53429048e-17],
       [ 8.32667268e-17,  0.00000000e+00,  1.00000000e+00,
         8.32667268e-17],
       [-5.55111512e-17, -1.53429048e-17,  8.32667268e-17,
         1.00000000e+00]])

In [175]:
np.matmul(Q, Qt).astype('float32')

array([[ 1.0000000e+00, -2.0534340e-17,  8.6219662e-17, -5.5511151e-17],
       [-2.0534340e-17,  1.0000000e+00,  0.0000000e+00, -1.3936734e-17],
       [ 8.6219662e-17,  0.0000000e+00,  1.0000000e+00,  8.7029287e-17],
       [-5.5511151e-17, -1.3936734e-17,  8.7029287e-17,  1.0000000e+00]],
      dtype=float32)

In [176]:
np.linalg.det(Q)

-0.9999999999999999

# Solve the problem without using inverse matrix
$$Y = X\beta$$
$$Q^T Y = Q^T X \beta$$
$$Q^T Y = Q^T QR \beta$$
$$Q^T Y = R\beta$$
$R$ is an upper triangular matrix, so we can calculate beta one by one from the bottom law    
(e.g. http://metodososcaruis.blogspot.com/)

# How do you use it for longitudinal data?

Get the Qi of which the first column was eliminated.    
This is not an orthogonal matrix anymore.

In [212]:
Qi = Q[:,1:]
Qit = np.transpose(Qi)
print(Q1)

[[ 0.67082039  0.5         0.2236068 ]
 [ 0.2236068  -0.5        -0.67082039]
 [-0.2236068  -0.5         0.67082039]
 [-0.67082039  0.5        -0.2236068 ]]


In [213]:
np.matmul(Qit, Qi)

array([[ 1.00000000e+00,  0.00000000e+00, -1.53429048e-17],
       [ 0.00000000e+00,  1.00000000e+00,  8.32667268e-17],
       [-1.53429048e-17,  8.32667268e-17,  1.00000000e+00]])

In [214]:
np.matmul(Qi, Qit)

array([[ 0.75, -0.25, -0.25, -0.25],
       [-0.25,  0.75, -0.25, -0.25],
       [-0.25, -0.25,  0.75, -0.25],
       [-0.25, -0.25, -0.25,  0.75]])

See what happens to constant vector multiply with $Qi^T$

In [206]:
constant = np.array([1,1,1,1])
constant = np.transpose(female)
np.matmul(Qit, constant)

array([-1.11022302e-16, -1.66533454e-16,  5.55111512e-17])

**I still don't know why** but this $Qi^T$ transfers constant vactor to $O$    


Now, for example, consider we are going to evaluate the association between decline in calculation speed and SNP_A after 65 years old    
Our model is, 
$$ Y = \beta_0 + \beta_1 x_{sex} + \beta_2 x_{education} + \beta_3 x_{SNP} + \beta_4 x_{age-65} + \beta_5 x_{SNP} * x_{age-65}$$

$$ (Yi = Xi\beta) $$

We will think about $i$th subject who was observed at age 65, 66, 67 and 70    
The observed design matrix is;

In [207]:
Xi = np.array([[1, 1, 17, 2, 0, 0],
               [1, 1, 17, 2, 1, 2],
               [1, 1, 17, 2, 2, 4],
               [1, 1, 17, 2, 5, 10]])

We are interested in the $\beta$ of interaction term ($\beta_5$)    
Multiply the above equation by $Qi^T$
$$ Qi^T Yi = Qi^T Xi\beta $$
Let's call the above as an equation in the transformed space.    
the results of $Qi^T Xi$ is;

In [208]:
transXi=np.matmul(Qit, Xi)
np.around(transXi, 3)

array([[-0.   , -0.   , -0.   , -0.   , -3.578, -7.155],
       [-0.   , -0.   , -0.   , -0.   ,  1.   ,  2.   ],
       [ 0.   ,  0.   ,  0.   ,  0.   , -0.447, -0.894]])

So it is apparent that in the transformed spcae, intercept, $x_{sex}$, $x_{education}$, $x_{SNP}$ are all 0s.    
But we can still estimate $\beta_4$ and $\beta_5$.    
In other words, all the cross-sectional features are omitted in this space and we don't need to think about it anymore.    
This is particularly helpful when we are not sure about cross-sectional fetures affecting an outcome.    

The above example just illustrated the $i$th subject.      
For data analysis, we will transfer each subjects observations with their own Qi and then solve using an ordinary linear mixed model algorithm because error within subject are thought to be correlated. Note that intercept term is not needed anymore in that linear mixed model. 
We do the same thing for all the subject--calculate $Qi$ for each subject and transform the observations. 
This is a **conditional linear regression model**    


# Appendix

poly function to get $Q_i$     

(equivalent to R's poly function; https://stackoverflow.com/questions/41317127/python-equivalent-to-r-poly-function)

In [215]:
def poly(x, p):
    x = np.array(x)
    X = np.transpose(np.vstack((x**k for k in range(p+1))))
    return np.linalg.qr(X)[0][:,1:]

In [216]:
poly([1,2,3,4],3)

array([[ 0.67082039,  0.5       ,  0.2236068 ],
       [ 0.2236068 , -0.5       , -0.67082039],
       [-0.2236068 , -0.5       ,  0.67082039],
       [-0.67082039,  0.5       , -0.2236068 ]])

# Reference
R code from    

Eur J Hum Genet. 2015 Oct; 23(10): 1384–1391.    
Published online 2015 Feb 25. doi:  10.1038/ejhg.2015.1    
PMCID: PMC4592098    
PMID: 25712081    
GWAS with longitudinal phenotypes: performance of approximate procedures    
Karolina Sikorska,1,2 Nahid Mostafavi Montazeri,1,3 André Uitterlinden,2 Fernando Rivadeneira,2 Paul HC Eilers,1 and Emmanuel Lesaffre1,4,*
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4592098/



            id	y	Time
            1	1.12	1
            1	1.14	2
            1	1.16	3
            1	1.2	4
            1	1.26	5
            2	0.95	1
            2	0.83	2
            2	0.65	3
            2	0.49	4
            2	0.34	5

    cond = function(data, vars) {
    data = data[order(data$id), ]
    ### delete missing observations
    data1 = data[!is.na(data$y), ]
    ## do the transformations
    ids = unique(data1$id)
    transdata = NULL
    for(i in ids) {
    xi = data1[data1$id == i, vars]
    xi = as.matrix(xi)
    if(nrow(xi) > 1) {
    A = cumsum(rep(1, nrow(xi)))
    A1 = poly(A, degree = length(A)-1)
    transxi = t(A1) %*% xi
    transxi = cbind(i, transxi)
    transdata = rbind(transdata, transxi)
        }
    }
    transdata = as.data.frame(transdata)
    names(transdata) = c("id", vars)
    row.names(transdata) = 1:nrow(transdata)
    return(transdata)
    }

    trdata = cond(mydata, vars = c("Time", "y"))
    #fit the reduced model and extract random slopes
    mod2 = lmer(y ˜ Time - 1 + (Time-1|id), data = trdata)