In [45]:
# Run if using tensorflow2.0+
import pandas as pd
import numpy as np
import sklearn
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Add, Activation, InputLayer, Conv1D, BatchNormalization
from tensorflow.keras import Input
from tensorflow.keras.optimizers import SGD, Adam

In [2]:
df_train = pd.read_csv('../Data/train.csv')
df_train['open_channels'].value_counts(normalize=True);

In [64]:
def get_data(batch_num='all', zeros = True):
    #create training data per batch
    batch_size = 500000
    if batch_num == 'all':
        beg = 0
        end = 499999999
        print('Training on all data')
    else:
        batch = batch_num
        beg = (batch-1)*batch_size 
        end = batch*batch_size
    df_batch = df_train[beg:end]
    
    # filter out the zero open_channels cases
    if not zeros:
        df_batch = df_batch.drop(df_batch[df_batch['open_channels']==0].index) #optional, added because model was always predicting 0
        
    # get data
    signal = np.array(df_batch.signal)
    open_channels = np.array(df_batch.open_channels).reshape(-1,1)
    
    # categorize outputs
    enc = OneHotEncoder()
    enc.fit_transform(open_channels)
    
    # randomize
    data = np.zeros((len(signal), 2))
    data[:, 0] = signal
    data[:, 1] = open_channels.reshape(500000)
    data = data.reshape((-1, 1000, 2))
    np.random.shuffle(data)
    x = data[:,:,0].reshape((-1, 1000,1))
    y = data[:,:,1].reshape((-1, 1000,1))
    x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = .3)

    return x_train, x_test, y_train, y_test

In [65]:
x_train,x_test,y_train,y_test = get_data(5,zeros=True)
y_train[60][0]

array([5.])

In [69]:
def resnet(filters=64, size= 3, dropout=0, depth=1):
    #model params
    filters = filters
    filter_size = size
    dropout = dropout
    dense_size = 20
    
    #create model
    inputs = Input(shape=(1000,1,))
    in_output = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(inputs)

    if depth > 0:
        k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(in_output)
        k = BatchNormalization()(k)
        k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(k)
        k = Dropout(dropout)(k)
        block1 = Add()([k, in_output])
        kl = Dense(dense_size, activation='relu')(block1)
        
        if depth > 1:
            k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(block1)
            k = BatchNormalization()(k)
            k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(k)
            k = Dropout(dropout)(k)
            block2 = Add()([k, block1])
            kl = Dense(dense_size, activation='relu')(block2)

            if depth > 2:
                k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(block2)
                k = BatchNormalization()(k)
                k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(k)
                k = Dropout(dropout)(k)
                block3 = Add()([k, block2])
                kl = Dense(dense_size, activation='relu')(block3)

                if depth > 3:
                    k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(block3)
                    k = BatchNormalization()(k)
                    k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(k)
                    k = Dropout(dropout)(k)
                    block4 = Add()([k, block3])
                    kl = Dense(dense_size, activation='relu')(block4)
                    
                    if depth > 4:
                        k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(block4)
                        k = BatchNormalization()(k)
                        k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(k)
                        k = Dropout(dropout)(k)
                        block5 = Add()([k, block4])
                        kl = Dense(dense_size, activation='relu')(block5)
                        
                        if depth > 5:
                            k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(block5)
                            k = BatchNormalization()(k)
                            k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(k)
                            k = Dropout(dropout)(k)
                            block6 = Add()([k, block5])
                            kl = Dense(dense_size, activation='relu')(block6)
                        
                            if depth > 6:
                                k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(block6)
                                k = BatchNormalization()(k)
                                k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(k)
                                k = Dropout(dropout)(k)
                                block7 = Add()([k, block6])
                                kl = Dense(dense_size, activation='relu')(block7)
                                
                                if depth > 7:
                                    k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(block7)
                                    k = BatchNormalization()(k)
                                    k = Conv1D(filters,filter_size,padding='same',activation='relu',strides=1)(k)
                                    k = Dropout(dropout)(k)
                                    block8 = Add()([k, block7])
                                    kl = Dense(dense_size, activation='relu')(block8)

    outputs = Dense(units=11, activation='softmax')(kl)
    
    model = Model(inputs,outputs)
    return model

In [57]:
x_train,x_test,y_train,y_test = get_data(2,zeros=True)
model = resnet(filters=64, dropout=0, depth=1)
model.compile(loss='sparse_categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 70 samples, validate on 30 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e4003d9e48>

In [None]:
acc = []
for batch in range(1,11):    
    x_train,x_test,y_train,y_test = get_data(batch,zeros=True)
    model = resnet(layer_size=256, dropout=0.1, depth=8)
    model.compile(loss='sparse_categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=10, validation_data=(x_test,y_test), batch_size=100, verbose=1)
    
    y_pred = model.predict(x_test)
    score = accuracy_score(y_test, y_pred)
    acc.append(score)
acc

In [None]:
x_train,x_test,y_train,y_test = get_data(zeros=True)
model = resnet(layer_size=256, dropout=0.1, depth=8)
model.compile(loss='sparse_categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, validation_data=(x_test,y_test), batch_size=100, verbose=1)

In [72]:
# Create model for flat signal type (batches 1-6)
x_tr1,x_t1,y_tr1,y_t1 = get_data(1)
x_tr2,x_t2,y_tr2,y_t2 = get_data(2)
x_tr3,x_t3,y_tr3,y_t3 = get_data(3)
x_tr4,x_t4,y_tr4,y_t4 = get_data(4)
x_tr5,x_t5,y_tr5,y_t5 = get_data(5)
x_tr6,x_t6,y_tr6,y_t6 = get_data(6)
x_train = np.concatenate((x_tr1,x_tr2,x_tr3,x_tr4,x_tr5,x_tr6))
x_test = np.concatenate((x_t1,x_t2,x_t3,x_t4,x_t5,x_t6))
y_train = np.concatenate((y_tr1,y_tr2,y_tr3,y_tr4,y_tr5,y_tr6))
y_test = np.concatenate((y_t1,y_t2,y_t3,y_t4,y_t5,y_t6))

model1 = resnet(filters=64, size=100, dropout=0.1, depth=2)
# opt = Adam(learning_rate=.01,beta_1=0.95)
model1.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
model1.fit(x_train, y_train, epochs=10, validation_data=(x_test,y_test), batch_size=80, verbose=1)

Train on 2100 samples, validate on 900 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e4032f17f0>

In [71]:
model1.save('../Models/resnet_flat_85.h5')

In [65]:
# Create model for parabolic signal type (batches 2,7-10)
x_tr2,x_t2,y_tr2,y_t2 = get_data(2)
x_tr7,x_t7,y_tr7,y_t7 = get_data(7)
x_tr8,x_t8,y_tr8,y_t8 = get_data(8)
x_tr9,x_t9,y_tr9,y_t9 = get_data(9)
x_tr10,x_t10,y_tr10,y_t10 = get_data(10)
x_train = np.concatenate((x_tr2,x_tr7,x_tr8,x_tr9,x_tr10))
x_test = np.concatenate((x_t2,x_t7,x_t8,x_t9,x_t10))
y_train = np.concatenate((y_tr2,y_tr7,y_tr8,y_tr9,y_tr10))
y_test = np.concatenate((y_t2,y_t7,y_t8,y_t9,y_t10))

model = resnet(filters=64, size=100, dropout=0.1, depth=2)
model.compile(loss='sparse_categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=6, validation_data=(x_test,y_test), batch_size=80, verbose=1)

Train on 1874995 samples, validate on 625000 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x24e1e53cf60>

In [72]:
model.save('../Models/resnet_curvy_49.h5')