# Load data

In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import VGG16
import csv

In [2]:
# filenames
HOME = '/home/jupyter-dylan/'

INPUT_TRAIN = '{}input_train.csv'.format(HOME)
INPUT_TEST = '{}input_test.csv'.format(HOME)
OUTPUT_TRAIN = '{}output_train-1.csv'.format(HOME)
OUTPUT_TEST= '{}output_test-1.csv'.format(HOME)
OUTPUT_GENES = '{}gene labels/output_genes-1.txt'.format(HOME)

In [3]:
# load data into dataframes
train_input = pd.read_csv(INPUT_TRAIN, header=0, index_col=0)
train_output = pd.read_csv(OUTPUT_TRAIN, header=0, index_col=0)
test_input = pd.read_csv(INPUT_TEST, header=0, index_col=0)
test_output = pd.read_csv(OUTPUT_TEST, header=0, index_col=0)

# PCA on transcription factors

## Input feature design

In [4]:
# use 1st component to order inputs
pca_tf = PCA(n_components=1)
train_input_pca_tf = pca_tf.fit_transform(train_input.transpose().to_numpy())
pca_tf_order = train_input_pca_tf.reshape(-1).argsort()
pca_tf_col_names = train_input.columns[pca_tf_order]
train_input_pca_tf = train_input[pca_tf_col_names]
test_input_pca_tf = test_input[pca_tf_col_names]

In [5]:
# center inputs to mean 0 and scale to variance 1
scaler = StandardScaler()
train_input_pca_tf = scaler.fit_transform(train_input_pca_tf)
test_input_pca_tf = scaler.transform(test_input_pca_tf)

## 1D ConvNet

In [18]:
with open(OUTPUT_GENES) as f:
    genes = [gene[:-1] for gene in f]
    
metrics = dict()

for gene in genes:
    print(gene)
    # model
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(train_input.shape[1], 1)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', 'mae'])

    # training
    early_stop = EarlyStopping(monitor='val_loss', patience=5)
    model.fit(x=np.expand_dims(train_input_pca_tf, axis=2), y=train_output[gene], epochs=50, callbacks=[early_stop], validation_split=.2)

    # evaluation
    metrics[gene] = (model.evaluate(x=np.expand_dims(test_input_pca_tf, axis=2), y=test_output[gene])[0])



App
Train on 14833 samples, validate on 3709 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Apoe
Train on 14833 samples, validate on 3709 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Gusb
Train on 14833 samples, validate on 3709 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Lamp5
Train on 14833 samples, validate on 3709 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Mbp
Train on 14833 samples, validate on 3709 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Pvalb
Train on 14833 samples, validate on 3709 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Rorb
Train on 14833 samples, validate on 3709 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
S100b
Train on 14833 samples, validate on 3709 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Slc30a3
Train on 14833 samples, validat

KeyError: 'Map'

In [19]:
metrics # MSE

{'App': 1.0460105057207233e-06,
 'Apoe': 8.929495186361592e-06,
 'Gusb': 3.871291941410777e-09,
 'Lamp5': 6.345943838978644e-07,
 'Mbp': 5.839914346504421e-07,
 'Pvalb': 2.092350798229969e-07,
 'Rorb': 1.4227604709813485e-08,
 'S100b': 1.822111108670648e-08,
 'Slc30a3': 2.0113461473288134e-08,
 'Snca': 1.8244598839261085e-06}

In [35]:
# write mse to csv
with open('1d_cnn_mse.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(metrics.items())