In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from tensorflow import keras
from tensorflow.keras import layers

### __1. Import and data pre-processing:__

In [None]:
# import counts dataframe:

df_counts = pd.read_csv("dataset/mdd_counts.csv") 
df_counts = df_counts.set_index("Unnamed: 0")

print(df_counts.shape)
df_counts.head()

In [None]:
# import metadata:

df_meta = pd.read_csv("dataset/mdd_meta.csv")
df_meta = df_meta.set_index("!Sample_title")
df_meta.head()

In [None]:
# select (x, y) data:
# x -> counts table
# y -> (sample, phenotype, denger)

counts = df_counts.to_numpy()
print(counts.min(), counts.mean(), counts.max())

plt.figure(figsize=(15,3))
plt.imshow(counts)
plt.colorbar()
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
# normalization:
x = np.log(counts+1)
x /= x.max()

print(x.min(), x.mean(), x.max())

plt.figure(figsize=(15,3))
plt.imshow(x)
plt.colorbar()
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
# meta -> (subject_id, sample, phenotype, gender)

meta = [x.split(".") for x in df_counts.index]

N = len(meta)

for i in range(N):
    n = meta[i][0]
    
    meta[i] += df_meta.loc[['phenotype', 'gender'], n].tolist()

meta = np.array(meta)
print(meta.shape)
meta

In [None]:
# one-hot encoding:

def one_hot(meta):

    labels = np.array(list(set(meta)))
    hot = np.zeros((len(meta), len(labels)))

    for i, x in enumerate(meta):
        j = np.where(x == labels)[0]
        hot[i][j] = 1.0

    return hot

y1 = one_hot(meta.T[1])
y2 = one_hot(meta.T[2])
y3 = one_hot(meta.T[3])

print(f"y1:{y1.shape}, y2:{y2.shape}, y3:{y3.shape}")

y1[:2], y2[:2], y3[:2]

In [None]:
# shuffling:
N_samples = x.shape[0]
i = np.random.permutation(N_samples)

x, y1, y2, y3 = x[i], y1[i], y2[i], y3[i]
meta = meta[i]

x.shape, y1.shape, y2.shape, y3.shape

In [None]:
# add channel for convnet:
x = np.expand_dims(x, axis=-1)
x.shape

In [None]:
# splitting (x, y) into (x, x-test):
N_test = int(0.10*N_samples)

x_test = x[:N_test]
x = x[N_test:]

print(f"x:{x.shape}, x-test:{x_test.shape}")

y1_test = y1[:N_test]
y1 = y1[N_test:]

print(f"y1:{y1.shape}, y1-test:{y1_test.shape}")

y2_test = y2[:N_test]
y2 = y2[N_test:]

print(f"y2:{y2.shape}, y2-test:{y2_test.shape}")

y3_test = y3[:N_test]
y3 = y3[N_test:]

print(f"y3:{y3.shape}, y3-test:{y3_test.shape}")


In [None]:
# deleting unnecessary arrays:
del df_counts, df_meta, counts

### __2. Neural network modeling:__

In [None]:
def MyModel(latent_dim=20, act="relu", kernel_size=5):
    
    input_size = 1052
    
    In = keras.Input((input_size, 1))
    
    x = layers.Conv1D(32, kernel_size, activation=act, padding="same")(In)
    x = layers.MaxPooling1D(2)(x)
    
    x = layers.Conv1D(64, kernel_size, activation=act, padding="same")(x)
    x = layers.MaxPooling1D(2)(x)

    x = layers.GlobalMaxPooling1D()(x)

    x = layers.Dense(latent_dim)(x)
    x = layers.BatchNormalization()(x) 
    x = layers.Activation("tanh", name="ls")(x)
    
    x = layers.Dense(263, activation=act)(x)

    x = layers.Dense(263*64, activation=act)(x)
    x = layers.Reshape((263, 64))(x)
    
    x = layers.Conv1DTranspose(64, kernel_size, activation=act, strides=2, padding="same")(x)
    x = layers.Conv1DTranspose(32, kernel_size, activation=act, strides=2, padding="same")(x)

    Out = layers.Conv1D(1, kernel_size, activation="sigmoid", padding="same")(x)
    
    return keras.Model(inputs=In, outputs=Out, name='autoencoder')

latent_dim = 2
model = MyModel(latent_dim=latent_dim, act="elu", kernel_size=5)

model.summary()

### __3. Model compilation:__

### __4. K-fold cross-validation:__   

### __5. Final training__    

### __6. Test evaluation__:

#### __6.1 Anomaly detection:__

#### __6.2 Classification:__

#### __6.3 Data generator:__

### __7. Saving the model__:
<font size=3>
    
For model __loading__, see [2.2-notebook](2.2-notebook.ipynb).