# Testing the OPNMF method

This notebook uses the data available from the UK BioBank database. The data includes 599 Cognitive Normal Males. We use the 145 Regions Of Interest (ROI) that are included, to apply OPNMF and see the optimal number of components. 

In a later change, the ROIs will be replaced with the RAVENS maps

In [2]:
import pandas as pd
import numpy as np

data = pd.read_csv("ukbb_cn_males_baseline_4550.csv")
data.head()

Unnamed: 0,PTID,Date,Visit_Code,Age,Study,Phase,Delta_Baseline,Education_Years,APOE_Genotype,APOE4_Alleles,...,WMLS,RAVENS_GM,RAVENS_VN,RAVENS_WM,RAVENS_CSF,H_RAVENS_GM,H_RAVENS_VN,H_RAVENS_WM,H_RAVENS_CSF,session_id
0,1214962,2015-07-11,2_0,45.0,UKBIOBANK,UKBB-Batch8,1892.0,,E2/E3,0.0,...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,1
1,1177941,2014-08-30,2_0,45.0,UKBIOBANK,UKBB-Batch3,1607.0,,E3/E3,0.0,...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,1
2,1065888,2016-08-20,2_0,46.0,UKBIOBANK,UKBB-Batch8,2227.0,,E3/E3,0.0,...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,1
3,1080555,2015-03-09,2_0,46.0,UKBIOBANK,UKBB-Batch8,1925.0,,,,...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,1
4,1109805,2016-04-03,2_0,46.0,UKBIOBANK,UKBB-Batch4,2085.0,,E3/E3,0.0,...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/UKBB_Processed/Pipelines/UKBB_...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,/cbica/projects/ISTAGING/Pipelines/ISTAGING_Da...,1


### The columns that interest us are the 145 ROIs:

We need essentially the columns MUSE_Volume_4 , ... , MUSE_Volume_207.

In [3]:
column_list = list(data.columns)
start = column_list.index("MUSE_Volume_4")
end   = column_list.index("MUSE_Volume_207")

roi_matrix = data[ column_list[start:end+1] ]
# Add the PTID for reference:
# roi_matrix = data[ ["PTID"] + column_list[start:end+1]]
roi_matrix.head()

Unnamed: 0,MUSE_Volume_4,MUSE_Volume_11,MUSE_Volume_23,MUSE_Volume_30,MUSE_Volume_31,MUSE_Volume_32,MUSE_Volume_35,MUSE_Volume_36,MUSE_Volume_37,MUSE_Volume_38,...,MUSE_Volume_198,MUSE_Volume_199,MUSE_Volume_200,MUSE_Volume_201,MUSE_Volume_202,MUSE_Volume_203,MUSE_Volume_204,MUSE_Volume_205,MUSE_Volume_206,MUSE_Volume_207
0,1057.003024,1790.005121,382.001093,358.001024,955.002732,987.002824,23171.066293,3175.009084,3089.008838,47976.13726,...,9446.027025,10389.029723,7161.020488,6581.018828,8594.024588,7765.022216,3177.009089,3828.010952,1508.004314,1684.004818
1,1395.006984,2848.014259,550.002754,575.002879,1267.006344,1222.006118,26027.130312,4036.020207,3790.018976,66299.331945,...,12556.062865,11597.058064,7684.038472,9264.046383,9924.049687,10564.052892,3113.015586,3225.016147,1391.006964,1389.006954
2,763.999818,1554.999629,362.999913,361.999914,1041.999752,1104.999737,21884.994782,2855.999319,2878.999314,55630.986737,...,8629.997942,9008.997852,6600.998426,7305.998258,10003.997615,8926.997872,3400.999189,3901.99907,1145.999727,1676.9996
3,632.000753,1230.001466,468.000558,470.00056,938.001118,971.001158,22836.027223,3024.003605,3075.003666,53106.063307,...,9406.011213,9517.011345,7208.008593,6852.008168,8861.010563,8875.01058,3112.00371,4133.004927,1600.001907,2177.002595
4,566.00027,1137.000542,468.000223,467.000223,1092.000521,1203.000574,24355.011613,2930.001397,2913.001389,44892.021406,...,9209.004391,9116.004347,7104.003387,8328.003971,10632.00507,8658.004128,3219.001535,4058.001935,1561.000744,2289.001091


### Perform OPNMF:

For testing purposes I use sklearn NMF code, later use of lab OPNMF code will be integrated

In [4]:
from sklearn.decomposition import NMF

model = NMF(n_components=15, init='random', random_state=0, max_iter=5000)
W = model.fit_transform(roi_matrix)
H = model.components_

### Visualize the Coefficients:

In [5]:
coeffs = pd.DataFrame(np.transpose(H),columns = ["Component " + str(i+1) for i in range(model.n_components)])
coeffs.head()

Unnamed: 0,Component 1,Component 2,Component 3,Component 4,Component 5,Component 6,Component 7,Component 8,Component 9,Component 10,Component 11,Component 12,Component 13,Component 14,Component 15
0,0.776989,1.860794,1.666682,0.0,15.22636,1.302001,15.22996,3.39023,39.225443,0.0,0.532068,0.0,554.03596,0.0,0.0
1,3.665332,2.087805,0.0,1.147357,20.860084,6.763519,32.137167,0.79821,121.307412,4.700305,0.0,0.0,397.476628,0.0,0.763711
2,0.888201,0.568438,0.57072,0.270925,2.451631,0.850116,2.80459,0.542219,49.048022,1.830295,0.709402,8.438929,0.0,1.943963,0.635443
3,1.112495,0.491321,0.555795,0.176266,1.338812,0.861641,2.828412,0.88027,57.202774,1.52764,0.850275,10.848527,0.0,1.672046,0.763395
4,1.699706,2.493616,1.486436,1.597001,11.326832,2.456025,9.018586,2.104546,121.955112,3.694008,1.467911,18.688976,45.503721,2.461235,1.020032


In [9]:
H[0].shape

(145,)