In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers

### __1. Import and data pre-processing:__

In [None]:
# import counts dataframe:

df_counts = pd.read_csv("dataset/mdd_counts.csv") 
df_counts = df_counts.set_index("Unnamed: 0")

print(df_counts.shape)
df_counts.head()

In [None]:
# import metadata:

df_meta = pd.read_csv("dataset/mdd_meta.csv")
df_meta = df_meta.set_index("!Sample_title")
df_meta.head()

In [None]:
# select (x, y) data:
# x -> counts table
# y -> (sample, phenotype, denger)

counts = df_counts.to_numpy()
print(counts.min(), counts.mean(), counts.max())

plt.figure(figsize=(15,3))
plt.imshow(counts)
plt.colorbar()
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
# normalization:
x = np.log(counts+1)
x /= x.max()

print(x.min(), x.mean(), x.max())

plt.figure(figsize=(15,3))
plt.imshow(x)
plt.colorbar()
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
# meta -> (subject_id, sample, phenotype, gender)

meta = [x.split(".") for x in df_counts.index]

N = len(meta)

for i in range(N):
    n = meta[i][0]
    
    meta[i] += df_meta.loc[['phenotype', 'gender'], n].tolist()

meta = np.array(meta)
print(meta.shape)
meta

In [None]:
# one-hot encoding:

def one_hot(meta):

    labels = np.array(list(set(meta)))
    hot = np.zeros((len(meta), len(labels)))

    for i, x in enumerate(meta):
        j = np.where(x == labels)[0]
        hot[i][j] = 1.0

    return hot

meta1 = one_hot(meta.T[1])
meta2 = one_hot(meta.T[2])
meta3 = one_hot(meta.T[3])

y = np.concatenate([meta1, meta2, meta3], axis=1)

print("y:", y.shape)

y[:4]

In [None]:
# shuffling:
N_samples = x.shape[0]
i = np.random.permutation(N_samples)
x, y = x[i], y[i]
meta = meta[i]

x.shape, y.shape

In [None]:
# splitting (x, y) into train, validation, and test:

N_val = int(0.2*N_samples)
N_test = int(0.1*N_samples)

N_train = N_samples - (N_val + N_test)

x_train = x[:N_train]
x_val = x[N_train:N_train+N_val]
x_test = x[N_train+N_val:]

print(f"x-train:{x_train.shape}, x-val:{x_val.shape}, x-test:{x_test.shape}")

y_train = y[:N_train]
y_val = y[N_train:N_train+N_val]
y_test = y[N_train+N_val:]

print(f"y-train:{y_train.shape}, y-val:{y_val.shape}, y-test:{y_test.shape}")


In [None]:
# deleting unnecessary arrays:

del df_counts, df_meta, counts
del meta1, meta2, meta3, x, y

### __2. Neural network modeling:__

In [None]:
In = keras.Input((x_train.shape[1], ))

x = layers.Dense(400, activation='relu')(In)

x = layers.Dense(200, activation='relu')(x)
x = layers.Dense(20, activation='relu')(x)
x = layers.Dense(200, activation='relu')(x)
x = layers.Dense(400, activation='relu')(x)

Out = layers.Dense(x_train.shape[1], activation='sigmoid')(x)

model = keras.Model(inputs=In, outputs=Out)
model.summary()

### __3. Model compilation:__

### __4. Train and validation__   

### __5. Final training__    

### __6. Test evaluation__:

#### __6.1 Anomaly detection:__

### __7. Saving the model__:
<font size=3>
    
For model __loading__, see [2.2-notebook](2.2-notebook.ipynb).