# Using the OPNMF method to dimensionally reduce the ADNI imaging data

In [1]:
import pandas as pd
import numpy as np
import matplotlib

### Read the database, examine it:

Instead of reading the whole database, we read only the data that's useful to us. That is, we read only specific columns of data, and we take only the row containing the first scan for each person. 

In "ADNI Regressional Analysis.ipynb" we have done that exactly, as well as performed linear regression transformation to the imaging data, in order to remove any age, sex, and DLICV_baseline effect. 

The data is located at "./DATA/Reduced_Linearly_Transformed_Unique_Dataset.pkl"

In [2]:
unique = pd.read_pickle("./DATA/Linearly_Transformed_Unique_Dataset.pkl")
unique.head(15)

Unnamed: 0,PTID,Date,Age,Sex,DLICV_baseline,APOE4_Alleles,APOE_Genotype,Diagnosis_nearest_2.0,MUSE_Volume_4,MUSE_Volume_11,...,rs28394864,rs111278892,rs3752246,rs4147929,rs41289512,rs3865444,rs6024870,rs6014724,rs7274581,rs429358
0,002_S_0295,2006-04-18,84.742466,0,1485405.375,1.0,E3/E4,CN,-440.777069,-507.297168,...,0,1,1,1,0,0,0,0,0,1
9,002_S_0413,2006-05-02,76.283562,1,1364116.0,0.0,E3/E3,CN,577.755137,-188.813792,...,1,0,1,1,0,1,0,0,0,0
24,002_S_0559,2006-05-23,79.223288,0,1570479.625,1.0,E3/E4,CN,198.499249,1080.290951,...,2,0,0,0,0,1,0,0,0,0
31,002_S_0619,2006-06-01,77.447945,0,1859348.25,2.0,E4/E4,Dementia,2623.687012,649.558822,...,1,0,0,0,1,1,0,0,0,2
36,002_S_0685,2006-07-06,89.561644,1,1372862.125,0.0,E3/E3,CN,727.400228,-250.782773,...,2,1,1,1,0,0,0,0,0,0
45,002_S_0729,2006-07-17,65.056164,1,1166961.75,1.0,E3/E4,MCI,246.226215,628.340793,...,1,0,0,0,1,1,0,0,0,1
64,002_S_0816,2006-08-30,70.767123,0,1444128.125,2.0,E4/E4,Dementia,-145.138564,-193.593195,...,0,0,0,0,0,1,0,0,0,2
69,002_S_0938,2006-10-05,82.167123,1,1309685.0,0.0,E3/E3,Dementia,169.421397,-610.085153,...,0,0,1,1,0,1,0,0,0,0
74,002_S_0954,2006-10-10,69.19863,1,1075661.5,1.0,E3/E4,MCI,-81.66421,1343.833768,...,2,2,1,1,0,1,0,0,0,1
81,002_S_0955,2006-10-11,78.161644,1,1363607.0,1.0,E3/E4,Dementia,1036.385233,-353.324662,...,1,1,0,0,0,1,0,0,0,1


### The columns that interest us are the 145 ROIs:

We need essentially the columns MUSE_Volume_4 , ... , MUSE_Volume_207. These contain the residuals from the Linear Regression performed previously. 

In [6]:
unique.loc[:,"MUSE_Volume_4":"MUSE_Volume_207"].head(15)

Unnamed: 0,MUSE_Volume_4,MUSE_Volume_11,MUSE_Volume_23,MUSE_Volume_30,MUSE_Volume_31,MUSE_Volume_32,MUSE_Volume_35,MUSE_Volume_36,MUSE_Volume_37,MUSE_Volume_38,...,MUSE_Volume_198,MUSE_Volume_199,MUSE_Volume_200,MUSE_Volume_201,MUSE_Volume_202,MUSE_Volume_203,MUSE_Volume_204,MUSE_Volume_205,MUSE_Volume_206,MUSE_Volume_207
0,-440.777069,-507.297168,-35.171797,-14.510211,90.244138,140.075639,299.828133,63.88968,56.259492,4434.963481,...,745.557312,-188.95447,-1594.432454,-1648.308374,798.003198,-468.672456,-81.798945,283.990527,-134.708868,-102.291612
9,577.755137,-188.813792,35.574764,-39.881572,40.161648,58.255314,-909.95651,-107.325098,118.445748,-932.590538,...,1336.384182,2631.004114,1410.754665,30.295558,-1258.071206,115.187177,-175.177715,-533.517736,-37.990106,-475.586534
24,198.499249,1080.290951,137.416288,142.58683,121.231074,41.449232,1825.886437,-267.694901,6.605333,-947.176391,...,-1764.15837,-2206.292278,1473.087979,532.054466,1714.763199,2469.640085,209.533224,-49.858132,-206.268764,-117.520261
31,2623.687012,649.558822,-162.939446,-122.19178,-329.934406,-351.510297,-3426.992838,-826.297201,-713.213854,-355.750507,...,-641.454806,583.322773,-701.560285,-1369.412583,-2919.253412,-2766.270514,-757.912814,-822.7715,-347.672981,-131.863034
36,727.400228,-250.782773,-27.195126,6.992014,48.594085,57.675286,1120.707525,136.378936,316.063586,4364.77216,...,47.451381,279.144879,-1423.470776,-1844.920979,-186.886077,-270.949818,-312.293181,174.818988,-559.090352,-289.513744
45,246.226215,628.340793,10.979183,24.346908,-165.999584,-114.587813,171.517739,628.498317,88.70557,-521.590388,...,-346.626209,-670.579403,163.045892,1008.186971,-1557.957769,-1396.447884,-146.49525,-188.233592,-200.821122,-254.208574
64,-145.138564,-193.593195,-1.961382,-43.963703,-114.956569,-287.346946,993.225831,-355.357204,-299.109526,-4585.096939,...,-639.210338,1881.904887,-278.802002,-2569.0808,581.980098,-471.029522,-359.748531,-742.928859,-321.919196,-39.968796
69,169.421397,-610.085153,-82.24344,-88.850034,-388.425224,-393.19257,-890.491741,-746.264217,-621.836515,-5005.238233,...,-891.150119,190.989398,435.710078,528.230795,-2207.796469,-658.308728,-312.575988,1000.451702,-210.94453,-277.536629
74,-81.66421,1343.833768,101.836384,43.596098,-154.810341,-134.886347,1033.170275,968.539022,941.960498,-1405.34949,...,-2610.451083,-841.474614,-722.211848,-61.546022,354.051466,-407.221175,348.960159,-62.218961,-310.715251,-121.535091
81,1036.385233,-353.324662,-49.617421,-18.439395,-164.007463,-327.518454,717.249036,257.48618,312.58826,351.860893,...,-2120.59943,-62.905455,-530.826364,-915.761368,-803.96132,-1507.304613,122.936219,373.046852,-429.932657,149.960522
