### Taking average of each bundle for dki_fa

In [None]:
import afqinsight as ai 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
nodes = pd.read_csv('combined_tract_profiles.csv')

In [None]:
buddles = sorted(list(set(nodes["tractID"])))
buddles

In [None]:
len(nodes)/100

In [None]:
subject = set(nodes['subjectID'])
subject_sorted = sorted(list(subject))
node_avg = np.zeros((len(subject), 18)) 

In [None]:
len(subject_sorted)

In [None]:
from numpy import nanmean

In [None]:
node_avg = nodes.groupby(['tractID', 'subjectID']).agg(nanmean)

In [None]:
node_avg["dki_fa"]

In [None]:
dki_fa = pd.merge(node_avg.loc["ARC_L", :], node_avg.loc["ARC_R", :], on="subjectID", how="outer")[["dki_fa_x", "dki_fa_y"]]

In [None]:
dki_fa = dki_fa.rename(columns={"dki_fa_x": "ARC_L", "dki_fa_y": "ARC_R"})

In [None]:
for bundle in buddles[2:]:
    dki_fa = pd.merge(dki_fa, node_avg.loc[bundle, "dki_fa"], on="subjectID", how="outer").rename(columns={"dki_fa": bundle})

In [None]:
df.columns

In [None]:
all_data = pd.merge(dki_fa, df, left_on="subjectID", right_on="Observations", how="outer")

In [None]:

for i in range(len(subject_sorted)):
    for j in range(len(buddles)):
        sub = subject_sorted[i]
        bud = buddles[j]
        nodes_sub_tract = nodes.loc[(nodes['subjectID'] == 'sub') & (nodes['tractID'] == 'bud')]
        nodes_avg_sub_tract = pd.Series.mean(nodes_sub_tract['dki_fa'])
        node_avg[i, j] = nodes_avg_sub_tract

In [None]:
df=pd.read_csv('participant_data.tsv', sep='\t')

In [None]:
len(set(df['Observations']))

In [None]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer()
dki_fa_imp = imputer.fit_transform(dki_fa)

In [None]:
plt.plot(dki_fa_imp[:, :].T)
plt.xlabel('18 bundles')
plt.ylabel('averaged values')

### NMF with 2 components

In [None]:
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
W = model.fit_transform(dki_fa_imp)
H = model.components_

In [None]:
W.shape
#H.shape

In [None]:
H.shape

In [None]:
W@H

In [None]:
fig, ax = plt.subplots()
ax.scatter(W[:, 0], W[:, 1], c = all_data.loc[:640, 'hand'], cmap = 'viridis')
# add age as color


In [None]:
fig, ax = plt.subplots()
ax.plot(W[0, :]@H)
ax.plot(node_avg_imp[0, :])

### PCA with 2 components

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
#scaler = StandardScaler()
pca_2 = PCA(n_components=2)
#dki_fa_imp_nor = scaler.fit_transform(dki_fa_imp)
x_2 = pca_2.fit_transform(dki_fa_imp)

In [None]:
c = pca_2.components_

In [None]:
loss_nmf_2 = []
loss_pca_2 = []
for i in range(641):
    loss_nmf_2.append(np.sqrt(np.mean(((W[i, :]@H)-dki_fa_imp[i, :])**2)))
    loss_pca_2.append(np.sqrt(np.mean(((x_2[i, :]@c)-dki_fa_imp[i, :])**2)))
fig1, ax1 = plt.subplots()
ax1.scatter(loss_nmf_2, loss_pca_2)

ax1.title
# compare loss b/w nmp & pca, scatterplot loss of these two

### NMF with 3 components

In [None]:
from sklearn.decomposition import NMF
model_3 = NMF(n_components=3, init='random', random_state=0)
W_3 = model_3.fit_transform(dki_fa_imp)
H_3 = model_3.components_

In [None]:
W_3.shape

In [None]:
H_3.shape

### PCA with 3 components

In [None]:

#scaler = StandardScaler()
pca_3 = PCA(n_components=2)
#dki_fa_imp_nor = scaler.fit_transform(dki_fa_imp)
x_3 = pca_3.fit_transform(dki_fa_imp)

In [None]:
c_3 = pca_3.components_

In [None]:
loss_nmf_3 = []
loss_pca_3 = []
for i in range(641):
    loss_nmf_3.append(np.sqrt(np.mean(((W_3[i, :]@H_3)-dki_fa_imp[i, :])**2)))
    loss_pca_3.append(np.sqrt(np.mean(((x_3[i, :]@c_3)-dki_fa_imp[i, :])**2)))
fig2, ax2 = plt.subplots()
ax2.scatter(loss_nmf_3, loss_pca_3)

ax2.title('loss comparasion b/w PCA & NMF')
# compare loss b/w nmp & pca, scatterplot loss of these two

### NMF with 4 components

In [None]:
from sklearn.decomposition import NMF
model_4 = NMF(n_components=3, init='random', random_state=0)
W_4 = model_4.fit_transform(dki_fa_imp)
H_4 = model_4.components_

In [None]:
W_4.shape

In [None]:
H_4.shape

### PCA with 4 components

In [None]:

#scaler = StandardScaler()
pca_4 = PCA(n_components=2)
#dki_fa_imp_nor = scaler.fit_transform(dki_fa_imp)
x_4 = pca_4.fit_transform(dki_fa_imp)

In [None]:
c_4 = pca_4.components_

In [None]:
loss_nmf_4 = []
loss_pca_4 = []
for i in range(641):
    loss_nmf_4.append(np.sqrt(np.mean(((W_4[i, :]@H_4)-dki_fa_imp[i, :])**2)))
    loss_pca_4.append(np.sqrt(np.mean(((x_4[i, :]@c_4)-dki_fa_imp[i, :])**2)))
fig4, ax4 = plt.subplots()
ax4.scatter(loss_nmf_4, loss_pca_4)

ax4.set_title('loss comparasion b/w PCA & NMF')
# compare loss b/w nmp & pca, scatterplot loss of these two