In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, SimpleRNN, Dense, Dropout, Flatten
from sklearn.model_selection import train_test_split

In [2]:
df_train = pd.read_csv('../Data/train.csv')
df_train['binary_open_channels'] = [int(x != 0) for x in df_train.open_channels]
df_train

Unnamed: 0,time,signal,open_channels,binary_open_channels
0,0.0001,-2.7600,0,0
1,0.0002,-2.8557,0,0
2,0.0003,-2.4074,0,0
3,0.0004,-3.1404,0,0
4,0.0005,-3.1525,0,0
...,...,...,...,...
4999995,499.9996,2.9194,7,1
4999996,499.9997,2.6980,7,1
4999997,499.9998,4.5164,8,1
4999998,499.9999,5.6397,9,1


In [5]:
signal = np.array(df_train.signal)
open_channels = np.array(df_train.open_channels)
binary_open_channels = np.array(df_train.binary_open_channels)

data = np.zeros((len(signal), 3))
data[:, 0] = signal
data[:, 1] = open_channels
data[:, 2] = binary_open_channels
data = data.reshape((1000, 5000, 3))

channel_count = 0
no_channel_count = 0

for i in range(len(data)):
    if sum(data[i,:,2]/len(data[i,:,2])) > .7:
        channel_count += 1
    else:
        no_channel_count += 1
        
print(channel_count,no_channel_count)

no_channel_data = np.zeros((no_channel_count,5000, 3))
channel_data = np.zeros((channel_count,5000, 3))

channel_count = 0
no_channel_count = 0

for i in range(len(data)):
    if sum(data[i,:,2]/len(data[i,:,2])) > .7:
        channel_data[channel_count] = data[i]
        channel_count += 1
    else:
        no_channel_data[no_channel_count] = data[i]
        no_channel_count += 1

np.random.shuffle(channel_data)
np.random.shuffle(no_channel_data)

x_channel = channel_data[:,:,0].reshape(channel_count,5000,1)
y_channel = channel_data[:,:,1].reshape(channel_count,5000,1)

x_no_channel = no_channel_data[:,:,0].reshape(no_channel_count,5000,1)
y_no_channel = no_channel_data[:,:,1].reshape(no_channel_count,5000,1)

768 232


In [6]:
x_channel_train, x_channel_test, y_channel_train, y_channel_test = train_test_split(x_channel,y_channel,test_size = .3)
x_no_channel_train, x_no_channel_test, y_no_channel_train, y_no_channel_test = train_test_split(x_no_channel,y_no_channel,test_size = .3)

In [7]:
binary_model = tf.keras.models.load_model('binary_class_94_33.h5')

In [8]:
channels_model = Sequential()
channels_model.add(SimpleRNN(units=20, return_sequences=True, input_shape=(5000,1,)))
channels_model.add(Dense(20,activation='relu'))
channels_model.add(Dense(units=1,activation='relu'))
channels_model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])

In [9]:
no_channels_model = Sequential()
no_channels_model.add(SimpleRNN(units=20, return_sequences=True, input_shape=(5000,1,)))
no_channels_model.add(Dense(20,activation='relu'))
no_channels_model.add(Dense(units=1,activation='relu'))
no_channels_model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])

In [10]:
channels_model.fit(x_channel_train, y_channel_train, epochs=20, validation_data=(x_channel_test,y_channel_test),verbose=1)

Train on 537 samples, validate on 231 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x23ad405f198>

In [11]:
no_channels_model.fit(x_no_channel_train, y_no_channel_train, epochs=20, validation_data=(x_no_channel_test,y_no_channel_test),verbose=1)

Train on 162 samples, validate on 70 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x23addab6dd8>

In [12]:
channel_predictions = channels_model.predict(x_channel)
no_channel_predictions = no_channels_model.predict(x_no_channel)

In [17]:
channel_error = []
no_channel_error = []
for i in range(len(y_channel)):
    for j in range(len(y_channel[i])):
        actual = y_channel[i][j][0]
        pred = channel_predictions[i][j][0]
        channel_error.append(abs(actual-pred))
                                 
for i in range(len(y_no_channel)):
    for j in range(len(y_no_channel[i])):
        actual = y_channel[i][j][0]
        pred = no_channel_predictions[i][j][0]
        no_channel_error.append(abs(actual-pred))

In [18]:
print('Average channel error:',sum(channel_error)/len(channel_error))
print('Average no_channel error:',sum(no_channel_error)/len(no_channel_error))

Average channel error: 1.3951610500097662
Average no_channel error: 3.4727496507126485
