In [1]:
# Run if using tensorflow2.0+
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Add, Activation, InputLayer
from tensorflow.keras import Input

In [None]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Add, Activation
from keras import Input

In [2]:
df_train = pd.read_csv('../Data/train.csv')
df_train['open_channels'].value_counts(normalize=True)

0     0.248030
1     0.197173
3     0.133722
2     0.110785
4     0.080682
5     0.055575
7     0.053003
8     0.049037
6     0.037622
9     0.027224
10    0.007147
Name: open_channels, dtype: float64

In [14]:
def get_data(batch_num='all', zeros = True, randomize=True):
    #create training data per batch
    batch_size = 500000
    if batch_num == 'all':
        beg = 0
        end = 499999999
        print('Training on all data')
    else:
        batch = batch_num
        beg = (batch-1)*batch_size 
        end = batch*batch_size - 1
    df_batch = df_train[beg:end]
    # filter out the zero open_channels cases
    if not zeros:
        df_batch = df_batch.drop(df_batch[df_batch['open_channels']==0].index) #optional, added because model was always predicting 0
        
    # randomize 
    signal = np.array(df_batch.signal)
    open_channels = np.array(df_batch.open_channels)
    if randomize:
        x_train, x_test, y_train, y_test = train_test_split(signal,open_channels,test_size=0.25)
        x_train = np.reshape(x_train, (-1,1))
        y_train.reshape(-1,1)
        x_test = np.reshape(x_test, (-1,1))
        y_test.reshape(-1,1)
    
    return x_train, x_test, y_train, y_test

In [7]:
def resnet(layer_size=64, dropout=0, depth=1):
    #model params
    layer = layer_size
    dropout = dropout
    layers = depth
    
    #create model
    inputs = Input(shape=(1,))

    if depth > 0:
        k = Dense(layer, activation='relu')(inputs)
        k = Dense(layer, activation='relu')(k)
        k = Dropout(dropout)(k)
        block1 = Add()([k, inputs])
        kl = Dense(20, activation='relu')(block1)
        
        if depth > 1:
            k = Dense(layer, activation='relu')(block1)
            k = Dense(layer, activation='relu')(k)
            k = Dropout(dropout)(k)
            block2 = Add()([k, block1])
            kl = Dense(20, activation='relu')(block2)

            if depth > 2:
                k = Dense(layer, activation='relu')(block2)
                k = Dense(layer, activation='relu')(k)
                k = Dropout(dropout)(k)
                block3 = Add()([k, block2])
                kl = Dense(20, activation='relu')(block3)

                if depth > 3:
                    k = Dense(layer, activation='relu')(block3)
                    k = Dense(layer, activation='relu')(k)
                    k = Dropout(dropout)(k)
                    block4 = Add()([k, block3])
                    kl = Dense(20, activation='relu')(block4)
                    
                    if depth > 4:
                        k = Dense(layer, activation='relu')(block4)
                        k = Dense(layer, activation='relu')(k)
                        k = Dropout(dropout)(k)
                        block5 = Add()([k, block4])
                        kl = Dense(20, activation='relu')(block5)
                        
                        if depth > 5:
                            k = Dense(layer, activation='relu')(block5)
                            k = Dense(layer, activation='relu')(k)
                            k = Dropout(dropout)(k)
                            block6 = Add()([k, block5])
                            kl = Dense(20, activation='relu')(block6)
                        
                            if depth > 6:
                                k = Dense(layer, activation='relu')(block6)
                                k = Dense(layer, activation='relu')(k)
                                k = Dropout(dropout)(k)
                                block7 = Add()([k, block6])
                                kl = Dense(20, activation='relu')(block7)
                                
                                if depth > 7:
                                    k = Dense(layer, activation='relu')(block7)
                                    k = Dense(layer, activation='relu')(k)
                                    k = Dropout(dropout)(k)
                                    block8 = Add()([k, block7])
                                    kl = Dense(20, activation='relu')(block8)

    outputs = Dense(units=1, activation='relu')(kl)
    
    model = Model(inputs,outputs)
    return model

In [16]:
x_train,x_test,y_train,y_test = get_data(1,zeros=False)
model = resnet(layer_size=256, dropout=0.1, depth=8)
model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 11899 samples, validate on 3967 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x24e1f1a0198>

In [12]:
pred = model.predict(x_test)
np.average(pred)
np.max(pred)

0.0

In [61]:
model,x_train,x_test,y_train,y_test = resnet(batch_num=2,layer_size=256, dropout=0.1, depth=8)
model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 14503 samples, validate on 4835 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x19e0aaced30>

In [62]:
model,x_train,x_test,y_train,y_test = resnet(batch_num=3,layer_size=256, dropout=0.1, depth=8)
model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 280496 samples, validate on 93499 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x19e09bf2fd0>

In [63]:
model,x_train,x_test,y_train,y_test = resnet(batch_num=4,layer_size=256, dropout=0.1, depth=8)
model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 366165 samples, validate on 122056 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x19e0df90a58>

In [64]:
model,x_train,x_test,y_train,y_test = resnet(batch_num=5,layer_size=256, dropout=0.1, depth=8)
model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 374997 samples, validate on 125000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x19e0e238da0>

In [65]:
model,x_train,x_test,y_train,y_test = resnet(batch_num=6,layer_size=256, dropout=0.1, depth=8)
model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 374282 samples, validate on 124761 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x19e1656da90>

In [77]:
model,x_train,x_test,y_train,y_test = resnet(batch_num=7,layer_size=256, dropout=0.1, depth=5)
model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 282603 samples, validate on 94201 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x19e098189e8>

In [68]:
model,x_train,x_test,y_train,y_test = resnet(batch_num=8,layer_size=256, dropout=0.1, depth=8)
model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 365658 samples, validate on 121887 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x19e1e01c080>

In [69]:
model,x_train,x_test,y_train,y_test = resnet(batch_num=9,layer_size=256, dropout=0.1, depth=8)
model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 374274 samples, validate on 124758 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x19e2cffd978>

In [4]:
model,x_train,x_test,y_train,y_test = resnet(batch_num=10,layer_size=256, dropout=0.1, depth=8)
model.compile(loss='mean_squared_error',optimizer='sgd',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, validation_data=(x_test,y_test), batch_size=100, verbose=1)

Train on 374999 samples, validate on 125000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x273de2318d0>

In [6]:
model.predict(x_train)

array([[nan],
       [nan],
       [nan],
       ...,
       [nan],
       [nan],
       [nan]], dtype=float32)