In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import h5py
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten, Dropout
from keras.layers import concatenate
from keras.layers.convolutional import Conv2D
from keras.optimizers import adadelta,RMSprop,SGD,Adam
from keras.layers.convolutional import MaxPooling2D, AveragePooling2D
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

Using TensorFlow backend.


In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

TRAIN_DATASET_PATH = 'data/uci_ml_hackathon_fire_dataset_2012-05-09_2013-01-01_10k_train.hdf5'
TEST_DATASET_PATH = 'data/uci_ml_hackathon_fire_dataset_2013-01-01_2014-01-01_5k_test.hdf5'

def getDataDict(DatasetPath):
    with h5py.File(DatasetPath, 'r') as f:
        data = {}
        for k in list(f):
            data[k] = f[k][:]
        return data

train_data = getDataDict(TRAIN_DATASET_PATH)
test_data = getDataDict(TEST_DATASET_PATH)

print(train_data.keys())

dict_keys(['datetime', 'land_cover', 'latitude', 'longitude', 'meteorology', 'observed', 'target'])


In [4]:
def transformDateTime(datetime):
    ret = np.asarray([x * np.ones((1,30, 30)) for x in datetime])
    return ret


def transformLandCover(landCover):
    nanConvert = {
        0: 0,
        1: 0,
        2: -1,
        3: 0,
        4: -1,
        5: 0,
        6: 0,
        16:0
    }
    ret=[]
    
    for datapoint in landCover:
        for i in range(17):
            if i in nanConvert.keys():
                datapoint[i][np.isnan(datapoint[i])]= nanConvert[i] 
        ret.append(datapoint)
    return np.asarray(ret)


def transformLatAndLong(val):
    ret = np.asarray([x * np.ones((1,30, 30)) for x in val])
    return ret

#TODO : define temperature according to datetime average
def transformMet(met,date):
    nanConvert = {
        1: 26,
        2: 0,
        3: 0,
        4: 0,
    }
    met0=[]
    met1=[]
    index = 0
    for datapoint in met:
        for i in range(5):
            if(i==0):
                if pd.to_datetime(date[index]).hour > 12:
                    datapoint[1][i][np.isnan(datapoint[1][i])] = 290
                    datapoint[0][i][np.isnan(datapoint[0][i])] = 301.91
                else:
                    datapoint[1][i][np.isnan(datapoint[1][i])] = 302.54
                    datapoint[0][i][np.isnan(datapoint[0][i])] = 287.56
            else:
                datapoint[0][i][np.isnan(datapoint[0][i])]= nanConvert[i] 
                datapoint[1][i][np.isnan(datapoint[1][i])]= nanConvert[i]
            if(i==2):
                datapoint[0][i]= sigmoid(datapoint[0][i]- 0.4232)
                datapoint[1][i]= sigmoid(datapoint[1][i]- 1.4365)
            if(i==3):
                datapoint[0][i]= sigmoid(datapoint[0][i]+0.0854)
                datapoint[1][i]= sigmoid(datapoint[1][i]- 0.495)
        met0.append(datapoint[0])
        met1.append(datapoint[1])
        index+=1
    return np.asarray(met0),np.asarray(met1)

def transformFire(fire):
    return np.asarray(fire)

#transform all of them into dict of 3d np arrays.
#Augmentation step must take place after this.
#Can store this in h5py file after this.
def transformAndClean(data):
    X = {}
    Y= {}
    X['datetime'] = transformDateTime(data['datetime'])
    X['landCover'] = transformLandCover(data['land_cover'])
    X['latitude'] = transformLatAndLong(data['latitude'])
    X['longitude'] = transformLatAndLong(data['longitude'])
    X['met0'], X['met1'] = transformMet(data['meteorology'],data['datetime'])
    X['observed'] = transformFire(data['observed'])
    Y['target'] = transformFire(data['target'])
    return X,Y

X,Y = transformAndClean(train_data)

In [5]:
testX,testY = transformAndClean(test_data)

startDictionary={
    'datetime':0,
    'landCover':1,
    'latitude':18,
    'longitude':19,
    'met0':20,
    'met1':25,
    'observed':30,
    'target':0
}

lengthDictionary={
    'datetime':1,
    'landCover':17,
    'latitude':1,
    'longitude':1,
    'met0':5,
    'met1':5,
    'observed':5,
    'target':2
}
def flattenData(data):
    length =0
    for key,value in data.items():
        
        length += value.shape[1]
        n=value.shape[0]
    ret = np.zeros((n,length,30,30))
    for key,arr in data.items():    
        for index,datapoint in enumerate(arr):
            ret[index][startDictionary[key]: startDictionary[key]+lengthDictionary[key]][:][:]=datapoint        
    return ret;

flatX = flattenData(X)
flatY = flattenData(Y)  
flatTestX = flattenData(testX)
flatTestY = flattenData(testY)

X=None
Y=None
testX=None
testY=None

In [12]:
from sklearn.preprocessing import StandardScaler

columns =[1,2,3,4,5,6,7,20,21,25,26]

def getScaler(X, columns):
    flat_X = X.transpose(0,2,3,1).reshape(X.shape[0]*X.shape[2]*X.shape[3],-1)
    scaler =StandardScaler() # change name
    # temp is 20 and 25
    scaler.fit(flat_X[:,columns])
    return scaler
def scalerTransform(X, columns, scalar):
    flat_X = X.transpose(0,2,3,1).reshape(X.shape[0]*X.shape[2]*X.shape[3],-1)
    flat_X.shape
    transformFlatData = scaler.transform(flat_X[:,columns])
    transformData = transformFlatData.reshape((-1,len(columns),30,30))
    return transformData

scaler= getScaler(flatX,columns)
scaledFlatX = flatX
scaledFlatX[:,columns] = scalerTransform(flatX,columns,scaler)
scaledTestX = flatTestX
scaledTestX[:,columns] = scalerTransform(flatTestX,columns,scaler)

In [13]:
trainX,validX,trainY,validY = train_test_split(scaledFlatX,flatY,test_size=0.1,random_state = 42,shuffle=True)

In [14]:
from keras import backend as K
smooth = 1

def iou(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(tf.cast(y_true_f,np.float32) * y_pred_f)
    return intersection / (K.sum(tf.cast(y_true_f,np.float32)) + K.sum(y_pred_f) - intersection)

def iou_loss(y_true, y_pred):
    return 1-iou(y_true, y_pred)

In [15]:
masterdict={
    0:'datetime',
    1: 'Aspect',
    2: 'Canopy Built Density',
    3: 'Canopy Base Height',
    4: 'Canopy Cover',
    5: 'Canopy Height',
    6: 'Elevation',
    7: 'Slope',
    8: 'No Data',
    9: 'Sparse',
    10: 'Tree',
    11: 'Shrub',
    12: 'Herb',
    13: 'Water',
    14: 'Barren',
    15: 'Developed',
    16: 'Snow-Ice',
    17: 'Agriculture',
    18:'latitude',
    19:'longitude',
    20:'temp 0 ',
    21:'humidity 0 ',
    22:'uwind 0 ',
    23:'vwind 0 ',
    24:'prec 0 ',
    25:'temp +12 ',
    26:'humidity +12 ',
    27:'uwind +12 ',
    28:'vwind +12 ',
    29:'prec +12 ',
    30:'observed 0',
    31:'observed -12',
    32:'observed -24',
    33:'observed -36',
    34:'observed -48'
}

In [19]:
ignitionFeatures = [8,9,10,11,12,13,14,15,16,17]
spreadFeatures = [30,31,32,33,34,22,23]
yfeatures=[0]

modelTrainIgnition = np.take(trainX,ignitionFeatures,axis=1)  
modelTrainSpread = np.take(trainX,spreadFeatures,axis=1)  
modelTrainY = np.take(trainY,yfeatures,axis=1)  


modelValidIgnition = np.take(validX,ignitionFeatures,axis=1)  
modelValidSpread = np.take(validX,spreadFeatures,axis=1)  
modelValidY = np.take(validY,yfeatures,axis=1)


modelTestIgnition = np.take(scaledTestX,ignitionFeatures,axis=1)  
modelTestSpread = np.take(scaledTestX,spreadFeatures,axis=1)  
modelTestY = np.take(flatTestY,yfeatures,axis=1)

In [18]:
from keras import Input
from keras import Model
import keras
from keras.layers import BatchNormalization, MaxPool2D



IgnitionInput=Input(shape=(10,30,30))
Ignition= Conv2D(filters=128, kernel_size=(3,3), padding = 'same', activation='relu', data_format='channels_first')(IgnitionInput)
#Ignition=Dropout(0.1)(Ignition)
Ignition=BatchNormalization()(Ignition)
Ignition= Conv2D(filters=256, kernel_size=(1,1), padding = 'same', activation='relu',  data_format='channels_first')(Ignition)
Ignition=Dropout(0.1)(Ignition)
Ignition= Conv2D(filters=512, kernel_size=(1,1), padding = 'same', activation='relu',  data_format='channels_first')(Ignition)
Ignition=Dropout(0.2)(Ignition)
Ignition=BatchNormalization()(Ignition)
Ignition = Conv2D(filters=1, kernel_size=(1,1), padding = 'same', activation='sigmoid', data_format='channels_first', name="iska_final")(Ignition)
ModelIgnition = Model(inputs=IgnitionInput, output = Ignition)

ModelIgnitionIntermediate = ModelIgnition.get_layer("iska_final").output


SpreadInput= Input(shape=(7,30,30))
Spread =keras.layers.concatenate([ModelIgnitionIntermediate, SpreadInput],axis=1)

Spread= Conv2D(filters=128, kernel_size=(5,5), padding = 'same', activation='relu',  data_format='channels_first')(SpreadInput)
Spread=BatchNormalization()(Spread)
Spread= Conv2D(filters=256, kernel_size=(3,3), padding = 'same', activation='relu', data_format='channels_first')(Spread)
Spread=Dropout(0.1)(Spread)
Spread= Conv2D(filters=512, kernel_size=(3,3), padding = 'same', activation='relu',  data_format='channels_first')(Spread)
Spread=Dropout(0.2)(Spread)
Spread=BatchNormalization()(Spread)
Spread = Conv2D(filters=512, kernel_size=(3,3), padding = 'same', activation='relu', data_format='channels_first')(Spread)
Spread = Dropout(0.2)(Spread)
Spread=Conv2D(filters=512, kernel_size=(3,3), padding = 'same', activation='relu', data_format='channels_first')(Spread)
Spread=Dropout(0.3)(Spread)
Spread=BatchNormalization()(Spread)

X=Conv2D(filters=1, kernel_size=(3,3), padding = 'same', activation='sigmoid', data_format='channels_first')(Spread)

model = Model(inputs= [SpreadInput, IgnitionInput], outputs=[X])
model.compile(optimizer=Adam(lr = 0.0002), loss=iou_loss, metrics = [iou])
history = model.fit([modelTrainSpread,modelTrainIgnition], modelTrainY, batch_size= 30, epochs=50, validation_data=([ modelTestSpread,modelTestIgnition],modelTestY))



Train on 9000 samples, validate on 5000 samples
Epoch 1/50
Epoch 2/50
1680/9000 [====>.........................] - ETA: 18s - loss: 0.7386 - iou: 0.2614

KeyboardInterrupt: 

In [None]:
# model1 = Sequential()
# output1 = model1.add(Conv2D(filters=128, kernel_size=(3,3), padding = 'same', activation='relu', input_shape=(7,30,30), data_format='channels_first'))
# output1 = model1.add(Conv2D(filters=256, kernel_size=(3,3), padding = 'same', activation='relu', input_shape=(22,30,30), data_format='channels_first'))
# output1 = model1.add(Dropout(0.1))
# output1 = model1.add(Conv2D(filters=512, kernel_size=(3,3), padding = 'same', activation='relu', input_shape=(22,30,30), data_format='channels_first'))
# output1 = model1.add(Dropout(0.2))
# output1 = model1.add(Conv2D(filters=512, kernel_size=(3,3), padding = 'same', activation='relu', input_shape=(22,30,30), data_format='channels_first'))
# output1 = model1.add(Dropout(0.2))
# output1 = model1.add(Conv2D(filters=512, kernel_size=(3,3), padding = 'same', activation='sigmoid', input_shape=(22,30,30), data_format='channels_first'))
# output1 = model1.add(Dropout(0.3))
# output1 = model1.add(Conv2D(filters=1, kernel_size=(3,3), padding = 'same', activation='sigmoid', data_format='channels_first'))



# model1.compile(optimizer=Adam(lr = 0.001), loss=iou_loss, metrics = [iou])
# history = model1.fit(modelTrainX, modelTrainY, batch_size= 30, epochs=50)

In [None]:
def iou_1(yhat, ytest):
    print("IOU_1")
    inter = yhat*ytest
    union = yhat + ytest - inter
    return np.sum(inter)/np.sum(union)


y_pred=model.predict([modelTestSpread,modelTestIgnition])
print(iou_1(y_pred,modelTestY))