In [498]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [499]:
import sys
sys.path.append('/Users/Tim/PycharmProjects/HOI/')

In [500]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from ipywidgets import IntProgress
import itertools
import ast

import matplotlib.pyplot as plt

import hypernetx as hnx
import networkx as nx

In [501]:
from HOI.preprocessings import compute_kernel, compute_kernel_n
from HOI.tests import test_independence
from HOI.statistics import compute_dHSIC_statistics

In [502]:
def find_comb(var):
    power_set = []
    for L in range(len(var) + 1):
        for subset in itertools.combinations(var, L):
            if len(subset) >= 2:
                power_set.append(list(subset))
    return power_set

In [503]:
# (EDUC) Years of Education
# (SES) Socioeconomic Status
# (MMSE) Mini Mental State Examination
# (CDR) Clinical Dementia Rating
# (eTIV) Estimated total intracranial volume
# (nWBV) Normalized Whole Brain Volume
# (ASF) Atlas Scaling Factor

In [504]:
df = pd.read_csv('data/oasis_longitudinal.csv')
df = df.dropna(axis=1)
df.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,0.5,1698,0.701,1.034


In [505]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer
scaler = StandardScaler()
df.iloc[:,7:] = scaler.fit_transform(df.iloc[:,7:].to_numpy())

In [506]:
value_counts = df['Subject ID'].value_counts()
values_to_select = set(value_counts[value_counts >= 3].index)
selected_rows = df[df['Subject ID'].isin(values_to_select)]

selected_rows.dropna(inplace=True)
selected_rows['cumcount'] = selected_rows.groupby('Subject ID').cumcount()

Age = selected_rows.set_index(['Subject ID', 'cumcount'])['Age'].unstack().reset_index(drop=True).dropna(axis=1)
EDUC = selected_rows.set_index(['Subject ID', 'cumcount'])['EDUC'].unstack().reset_index(drop=True).dropna(axis=1)
# SES = selected_rows.set_index(['Subject ID', 'cumcount'])['SES'].unstack().reset_index(drop=True).dropna(axis=1)
# MMSE = selected_rows.set_index(['Subject ID', 'cumcount'])['MMSE'].unstack().reset_index(drop=True).dropna(axis=1)
CDR = selected_rows.set_index(['Subject ID', 'cumcount'])['CDR'].unstack().reset_index(drop=True).dropna(axis=1)
eTIV = selected_rows.set_index(['Subject ID', 'cumcount'])['eTIV'].unstack().reset_index(drop=True).dropna(axis=1)
nWBV = selected_rows.set_index(['Subject ID', 'cumcount'])['nWBV'].unstack().reset_index(drop=True).dropna(axis=1)
ASF = selected_rows.set_index(['Subject ID', 'cumcount'])['ASF'].unstack().reset_index(drop=True).dropna(axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_rows.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_rows['cumcount'] = selected_rows.groupby('Subject ID').cumcount()


In [507]:
Age_k = compute_kernel_n(Age)
EDUC_k = compute_kernel_n(EDUC)
# SES_k = compute_kernel_n(SES)
# MMSE_k = compute_kernel_n(MMSE)
CDR_k = compute_kernel_n(CDR)
eTIV_k = compute_kernel_n(eTIV)
nWBV_k = compute_kernel_n(nWBV)
ASF_k = compute_kernel_n(ASF)

In [508]:
ASF_k.shape

(56, 56)

In [515]:
power_set_k = find_comb([Age_k, CDR_k, nWBV_k, eTIV_k])
power_set_name = find_comb(['Age', 'Clinical Dementia Rating', 'Normalised Whole Brain Volume', 'Estimated Total Intracranial Volume'])
n_exp = 10
edges = {}
for (name, klist) in tqdm(zip(power_set_name, power_set_k), total=len(power_set_k)):
    rejects = 0
    for i in range(n_exp):
        _, _, _, reject = test_independence(klist, None, n_perms=1000, alpha=0.05, mode = 'permutation')
        rejects = rejects + reject
    if rejects/n_exp != 0:
        edges[str(name)] = rejects/n_exp
print(edges)

  0%|          | 0/11 [00:00<?, ?it/s]

{"['Age', 'Normalised Whole Brain Volume']": 1.0, "['Clinical Dementia Rating', 'Normalised Whole Brain Volume']": 1.0, "['Age', 'Clinical Dementia Rating', 'Normalised Whole Brain Volume']": 1.0, "['Age', 'Normalised Whole Brain Volume', 'Estimated Total Intracranial Volume']": 1.0, "['Clinical Dementia Rating', 'Normalised Whole Brain Volume', 'Estimated Total Intracranial Volume']": 1.0, "['Age', 'Clinical Dementia Rating', 'Normalised Whole Brain Volume', 'Estimated Total Intracranial Volume']": 1.0}
