In [2]:
import scipy.io as scio
import matplotlib.pyplot as plt
import numpy as np
import keras
from keras.utils import *
from keras.models import Sequential
from keras.layers import Dense, Activation, MaxPooling2D, Conv2D, Flatten

soiltypeindex = ['DL','DSG','ED','EQ','ES','F','FYLD','GC','GL','GNG','GS','HAG','HG','HSL','HV','JV','KQ','KS','LL','ML','MSG','PAM','PL','ROG','SK','SO','SVG','T','VAG','Y','ZK','']
soiltype_norm = float(len(soiltypeindex)-1.0)

observationFile = 'Limonium_vulgare.txt' 
observationFileNegative = 'Limonium_vulgare_neg.txt'

#conver text dat into feature
def convert2fea(line):
    fea = []
    temp = line.split(';')

    if temp[2] == '':
        fea.append(1.0)
    else:
        fea.append(float(temp[2])/10.0)

    fea.append((float(temp[4])-1.0)/4.0)

    if temp[5] == 'CON':
        fea.append(0.0)
    else:
        fea.append(1.0)

    fea.append(float(soiltypeindex.index(temp[6]))/31.0)

    fea.append(float(temp[7])/48991.0)

    fea.append((float(temp[8])-441994.0)/(892641.0-441994.0))

    fea.append((float(temp[9])-6050562.0)/(6402150.0-6050562.0))

    return fea

#read positive data
pos_fea = []
f = open(observationFile, 'r')
lines_pos=f.read().splitlines()
f.close()
for i in range(1,len(lines_pos)):
    temp_fea = convert2fea(lines_pos[i])
    pos_fea.append(temp_fea)


#split positive data into training and testing
dim = len(pos_fea)
testSetSize = 100
trainSetSize = dim-testSetSize
print ('number of positive observations: '+str(dim))
print ('observations for testing: '+str(testSetSize))
print ('observations for training: '+str(trainSetSize))

pos_train=pos_fea[:trainSetSize]
pos_train_lab=np.zeros((trainSetSize,1))+1.0
pos_test=pos_fea[trainSetSize:trainSetSize+testSetSize]
pos_test_lab=np.zeros((testSetSize,1))+1.0

#read negative data
neg_fea = []
f = open(observationFileNegative, 'r')
lines_neg=f.read().splitlines()
f.close()
for i in range(1,len(lines_neg)):
    temp_fea = convert2fea(lines_neg[i])
    neg_fea.append(temp_fea)

#split negative data into training and testing
neg_train=neg_fea[:trainSetSize]
neg_train_lab=np.zeros((trainSetSize,1))
neg_test=neg_fea[trainSetSize:trainSetSize+testSetSize]
neg_test_lab=np.zeros((testSetSize,1))

#define NN
model = Sequential()
model.add(Dense(50, input_dim=len(pos_train[0]), activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
batch_size = 32
num_epoch = 20
train_fea = np.asarray(pos_train+neg_train)
train_label = np.concatenate((pos_train_lab, neg_train_lab))
plot_model(model, to_file='model.png', show_shapes=True)
#train NN
model_log = model.fit(train_fea,train_label ,batch_size=batch_size,epochs=num_epoch, verbose=1)

#test NN
test_fea = np.asarray(pos_test+neg_test)
test_label = np.concatenate((pos_test_lab, neg_test_lab))

score = model.evaluate(test_fea, test_label)

numToPrint =100
print('Accuracy:', score[1])
print('Probabilities for '+str(numToPrint)+' positive data: ')
print('Prediction;'+lines_pos[0])
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)
posPredicted = model.predict(np.asarray(pos_test))
for i in range(numToPrint):
       print(str(posPredicted[i][0])+';'+lines_pos[trainSetSize+i+1])

print('')    
print('Probabilities for '+str(numToPrint)+' negative data: ')
print('Prediction;'+lines_neg[0])
negPredicted = model.predict(np.asarray(neg_test))
for i in range(numToPrint):
       print(str(round(negPredicted[i][0],3))+';'+lines_neg[trainSetSize+i+1])


Using TensorFlow backend.


number of positive observations: 559
observations for testing: 100
observations for training: 459
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy: 0.96
Probabilities for 100 positive data: 
Prediction;OBJECTID;polyType;polyTypeId;regionName;regionId;bioreg;soilType;distCoast;x_int;y_int
0.78600883;465;saltmarsh;6;SjLolFal;4;CON;HSL;424.2640686;643380;6121530
0.78841436;466;saltmarsh;6;SjLolFal;4;CON;HSL;360.5551147;643390;6121570
0.7884152;467;saltmarsh;6;SjLolFal;4;CON;HSL;360.5551147;643400;6121580
0.84610265;468;saltmarsh;6;SjLolFal;4;CON;;0.0000000;631840;6080580
0.9645779;469;saltmarsh;6;VestJyl;2;ATL;HSL;360.5551147;470980;6126600
0.89045465;470;saltmarsh;6;NordJyl;1;CON;HS