In [40]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [41]:
# Load csv file from Google drive
file_path = '/content/drive/MyDrive/Colab Notebooks/2024_BNEM/DL/data_pattern_mean.csv'
data = pd.read_csv(file_path)
data

Unnamed: 0,seq,N5_mean,N50_mean,N500_mean,N5M10_mean,N5M100_mean,AAAAA,AAAAT,AAAAG,AAAAC,...,...CA,...CT,...CG,...CC,...C.,....A,....T,....G,....C,.....
0,TAAAAATT,0.509056,0.582795,0.703277,0.713463,0.674791,True,False,False,False,...,False,False,False,False,False,True,False,False,False,True
1,TAAAACTT,0.491550,0.571642,0.709767,0.737302,0.782054,False,False,False,True,...,False,False,False,False,False,False,False,False,True,True
2,TAAAAGTT,0.510142,0.591441,0.720091,0.742541,0.781795,False,False,True,False,...,False,False,False,False,False,False,False,True,False,True
3,TAAAATTT,0.496465,0.581366,0.709739,0.730826,0.763748,False,True,False,False,...,False,False,False,False,False,False,True,False,False,True
4,TAAACATT,0.492476,0.568615,0.714035,0.742980,0.776192,False,False,False,False,...,True,False,False,False,True,True,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1019,TTTTGTTT,0.475727,0.568956,0.733198,0.742376,0.787436,False,False,False,False,...,False,False,False,False,False,False,True,False,False,True
1020,TTTTTATT,0.477428,0.565936,0.743278,0.754569,0.812502,False,False,False,False,...,False,False,False,False,False,True,False,False,False,True
1021,TTTTTCTT,0.472222,0.554448,0.715267,0.730751,0.791353,False,False,False,False,...,False,False,False,False,False,False,False,False,True,True
1022,TTTTTGTT,0.475066,0.565352,0.730467,0.743580,0.796328,False,False,False,False,...,False,False,False,False,False,False,False,True,False,True


In [42]:
col_to_drop = ['seq', 'N5_mean', 'N50_mean', 'N500_mean', 'N5M10_mean', 'N5M100_mean', '.....']
X = data.drop(columns = col_to_drop, inplace=False)

y_N5 = data[col_to_drop[1]] # N5_mean
y_N50 = data[col_to_drop[2]] # N50_mean
y_N500 = data[col_to_drop[3]] # N500_mean
y_N5M10 = data[col_to_drop[4]] # N5M10_mean
y_N5M100 = data[col_to_drop[5]] # N5M100_mean

In [43]:
X.shape

(1024, 3124)

# Deeplearning Model
There 2 models in each type of solution. \
Model 1. 1 hidden layer with 256 hidden nodes  (light ver.) \
Model 2. 3 hidden layers with 1024, 256, 32 hidden nodes respectively (deep ver.) \
\
Default Hyperparmeters of model \
- activation func : relu (last: linear) \
- optimizer: adam \
- loss metric: MSE \
- epochs=10, batch_size=32 \
- test metrics: MSE, RMSE, MAE \

# N5

In [44]:
# Split train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_N5, test_size=0.2, random_state=42)

In [45]:
print("model1 of N5\n")

model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse', 'mae'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate
loss, mse, mae = model.evaluate(X_test, y_test)
print('\nTest MSE:', mse)
print('Test RMSE:', np.sqrt(mse))
print('Test MAE:', mae)

model1 of N5

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test MSE: 0.003274542512372136
Test RMSE: 0.05722361848373568
Test MAE: 0.04701348766684532


In [46]:
print("model2 of N5\n")

model = tf.keras.Sequential([
    tf.keras.layers.Dense(1024, activation='relu', input_shape=(3124,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse', 'mae'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate
loss, mse, mae = model.evaluate(X_test, y_test)
print('\nTest MSE:', mse)
print('Test RMSE:', np.sqrt(mse))
print('Test MAE:', mae)

model2 of N5

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test MSE: 0.0006094361888244748
Test RMSE: 0.024686761408181407
Test MAE: 0.020756462588906288


# N50

In [47]:
# Split train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_N50, test_size=0.2, random_state=42)

In [48]:
print("model1 of N50\n")

model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse', 'mae'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate
loss, mse, mae = model.evaluate(X_test, y_test)
print('\nTest MSE:', mse)
print('Test RMSE:', np.sqrt(mse))
print('Test MAE:', mae)

model1 of N50

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test MSE: 0.002742068376392126
Test RMSE: 0.05236476273594798
Test MAE: 0.04222442954778671


In [49]:
print("model2 of N50\n")

model = tf.keras.Sequential([
    tf.keras.layers.Dense(1024, activation='relu', input_shape=(3124,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse', 'mae'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate
loss, mse, mae = model.evaluate(X_test, y_test)
print('\nTest MSE:', mse)
print('Test RMSE:', np.sqrt(mse))
print('Test MAE:', mae)

model2 of N50

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test MSE: 0.0011796840699389577
Test RMSE: 0.03434652922696786
Test MAE: 0.029773404821753502


# N500

In [50]:
# Split train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_N500, test_size=0.2, random_state=42)

In [51]:
print("model1 of N500\n")

model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse', 'mae'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate
loss, mse, mae = model.evaluate(X_test, y_test)
print('\nTest MSE:', mse)
print('Test RMSE:', np.sqrt(mse))
print('Test MAE:', mae)

model1 of N500

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test MSE: 0.004455178044736385
Test RMSE: 0.06674712012316625
Test MAE: 0.05451209470629692


In [52]:
print("model2 of N500\n")

model = tf.keras.Sequential([
    tf.keras.layers.Dense(1024, activation='relu', input_shape=(3124,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse', 'mae'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate
loss, mse, mae = model.evaluate(X_test, y_test)
print('\nTest MSE:', mse)
print('Test RMSE:', np.sqrt(mse))
print('Test MAE:', mae)

model2 of N500

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test MSE: 0.0013716359389945865
Test RMSE: 0.037035603667209024
Test MAE: 0.03124573454260826


# N5M10

In [53]:
# Split train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_N5M10, test_size=0.2, random_state=42)

In [54]:
print("model1 of N5M10\n")

model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse', 'mae'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate
loss, mse, mae = model.evaluate(X_test, y_test)
print('\nTest MSE:', mse)
print('Test RMSE:', np.sqrt(mse))
print('Test MAE:', mae)

model1 of N5M10

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test MSE: 0.0042813788168132305
Test RMSE: 0.06543224600159489
Test MAE: 0.05234655365347862


In [55]:
print("model2 of N5M10\n")

model = tf.keras.Sequential([
    tf.keras.layers.Dense(1024, activation='relu', input_shape=(3124,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse', 'mae'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate
loss, mse, mae = model.evaluate(X_test, y_test)
print('\nTest MSE:', mse)
print('Test RMSE:', np.sqrt(mse))
print('Test MAE:', mae)

model2 of N5M10

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test MSE: 0.002480837982147932
Test RMSE: 0.04980801122458045
Test MAE: 0.04319719970226288


#N5M100

In [56]:
# Split train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_N5M100, test_size=0.2, random_state=42)

In [57]:
print("model1 of N5M100\n")

model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse', 'mae'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate
loss, mse, mae = model.evaluate(X_test, y_test)
print('\nTest MSE:', mse)
print('Test RMSE:', np.sqrt(mse))
print('Test MAE:', mae)

model1 of N5M100

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test MSE: 0.0061326660215854645
Test RMSE: 0.07831134031278908
Test MAE: 0.06447983533143997


In [59]:
print("model2 of N5M100\n")

model = tf.keras.Sequential([
    tf.keras.layers.Dense(1024, activation='relu', input_shape=(3124,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mse', 'mae'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate
loss, mse, mae = model.evaluate(X_test, y_test)
print('\nTest MSE:', mse)
print('Test RMSE:', np.sqrt(mse))
print('Test MAE:', mae)

model2 of N5M100

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test MSE: 0.003701569279655814
Test RMSE: 0.06084052333482853
Test MAE: 0.050853803753852844
