## Earthquake Prediction

### Author: Syed Zain Raza


In [2]:
import numpy as np 
import pandas as pd
import os
from tqdm import tqdm


from numpy.random import seed
seed(639)
from tensorflow import set_random_seed
set_random_seed(5944)

#### Loading training data

In [4]:
float_data = pd.read_csv("../train.csv", dtype={"acoustic_data": np.float32, "time_to_failure": np.float32}).values



#### Features 

In [5]:
def extract_features(z):
     return np.c_[z.mean(axis=1), 
                  z.min(axis=1),
                  z.max(axis=1),
                  z.std(axis=1),
                  z.max(axis=1) / np.abs(z.min(axis=1)),
                  z.max(axis=1) - np.abs(z.min(axis=1))]

#### Creating instances of 150*1000

In [6]:
def create_X(x, last_index=None, n_steps=150, step_length=1000):
    if last_index == None:
        last_index=len(x)
       
    assert last_index - n_steps * step_length >= 0

    
    temp = (x[(last_index - n_steps * step_length):last_index].reshape(n_steps, -1) - 5 ) / 3
    
    
    return np.c_[extract_features(temp),
                 extract_features(temp[:, -step_length // 10:]),
                 extract_features(temp[:, -step_length // 100:])]


n_features = create_X(float_data[0:150000]).shape[1]
print("Our RNN is based on %i features"% n_features)
    


Our RNN is based on 18 features


#### Generating batches for training and validation set


In [7]:
def generator(data, min_index=0, max_index=None, batch_size=16, n_steps=150, step_length=1000):
    if max_index is None:
        max_index = len(data) - 1
     
    while True:
        
        rows = np.random.randint(min_index + n_steps * step_length, max_index, size=batch_size)
         
        
        samples = np.zeros((batch_size, n_steps, n_features))
        targets = np.zeros(batch_size, )
        
        for j, row in enumerate(rows):
            samples[j] = create_X(data[:, 0], last_index=row, n_steps=n_steps, step_length=step_length)
            targets[j] = data[row - 1, 1]
        yield samples, targets
        
batch_size = 32


second_earthquake = 50085877
float_data[second_earthquake, 1]


#train_gen = generator(float_data, batch_size=batch_size)
train_gen = generator(float_data, batch_size=batch_size, min_index=second_earthquake + 1)
valid_gen = generator(float_data, batch_size=batch_size, max_index=second_earthquake)



#### Model definition

In [18]:

from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.optimizers import adam
from keras.callbacks import ModelCheckpoint

cb = [ModelCheckpoint("model.hdf5", save_best_only=True, period=3)]

model = Sequential()

#return_sequences=True
model.add(LSTM(48, dropout = 0.4, return_sequebces=True, input_shape=(None, n_features)))

model.add(LSTM(96))

model.add(Dense(10, activation='relu'))

model.add(Dense(1))

model.summary()





_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 48)                12864     
_________________________________________________________________
dense_5 (Dense)              (None, 10)                490       
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 11        
Total params: 13,365
Trainable params: 13,365
Non-trainable params: 0
_________________________________________________________________


#### Compiling and running the model

In [23]:

model.compile(optimizer=adam(lr=0.0005), loss="mae")

history = model.fit_generator(train_gen,
                              steps_per_epoch=500,
                              epochs=100,
                              verbose=0,
                              callbacks=cb,
                              validation_data=valid_gen,
                              validation_steps=200)


  
  


#### Loading test file and creating submission file

In [None]:

submission = pd.read_csv('../input/sample_submission.csv', index_col='seg_id', dtype={"time_to_failure": np.float32})


for i, seg_id in enumerate(tqdm(submission.index)):
  
    seg = pd.read_csv('../input/test/' + seg_id + '.csv')
    x = seg['acoustic_data'].values
    submission.time_to_failure[i] = model.predict(np.expand_dims(create_X(x), 0))

submission.head()


submission.to_csv('submission.csv')