In [1]:
from keras.utils import Sequence, plot_model
import numpy as np
import ast
# import model_generator as mg

from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import TensorBoard

import pandas as pd


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
df = pd.read_csv('part2_data.csv')

# Drop claim amount, useless
df.drop('claim_amount', inplace=True, axis=1)
# Features vs labels
Xraw = df[df.columns[:-2]]
Yraw = df[df.columns[-1]]

In [3]:
Xraw = Xraw.to_numpy()
Yraw = Yraw.to_numpy()

In [4]:
def partitionize(Xraw, Yraw, ratio = (3,1)):
    size = Xraw.shape[0]
    idx = np.arange(0, size)
    np.random.shuffle(idx)
    X_shuffle = np.zeros(Xraw.shape)
    Y_shuffle = np.zeros(Yraw.shape)
    for i in range(size):
        X_shuffle[:][i] = Xraw[:][idx[i]]
        Y_shuffle[i] = Yraw[idx[i]]

    Xraw = X_shuffle
    Yraw = Y_shuffle

    partition = {"train" : [], "validation" : []}
    labels = {}
    pos = 0
    while pos != size -1 :
        try :
            for i in range(pos, pos + ratio[0], 1):
                idx = str(list(Xraw[i]))
                partition["train"].append(idx)
                labels[idx] = int(Yraw[i])

            pos = pos + ratio[0]

            for i in range(pos, pos + ratio[1], 1):
#                 print(i)
                idx = str(list(Xraw[i]))
                partition["validation"].append(idx)
                labels[idx] = int(Yraw[i])
                
            pos = pos+ratio[1]
        except IndexError:
            return partition, labels

    return partition, labels

def nearest_neighbour(X, x):
    euclidean = np.ones(X.shape[0]-1)
    
    additive = [None]*(1*X.shape[1])
    additive = np.array(additive).reshape(1, X.shape[1])
    k = 0
    for j in range(0,X.shape[0]):
        if np.array_equal(X[j], x) == False:
            euclidean[k] = sqrt(sum((X[j]-x)**2))
            k = k + 1
    euclidean = np.sort(euclidean)
    weight = random.random()
    while(weight == 0):
        weight = random.random()
    additive = np.multiply(euclidean[:1],weight)
    return additive
    
def SMOTE_100(X):
    new = [None]*(X.shape[0]*X.shape[1])
    new = np.array(new).reshape(X.shape[0],X.shape[1])
    k = 0
    for i in range(0,X.shape[0]):
        additive = nearest_neighbour(X, X[i])
        for j in range(0,1):
            new[k] = X[i] + additive[j]
            k = k + 1
    return new # the synthetic samples created by SMOTe 

class DataGenerator(Sequence):

    def __init__(self, list_IDs, labels, batch_size=32, dim=(8,1), n_classes=2, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = np.array(ast.literal_eval(ID)).reshape(8,1)

            # Store class
            y[i] = self.labels[ID]
        X = X.reshape((self.batch_size,8))
#         print(X, y)
        return X, y

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y


def model_v0_trial(num_inputs, num_classes):
    #create model
    
    model = Sequential()
    
    model.add(Dense(4, input_shape=(num_inputs,), kernel_initializer = 'random_uniform', activation = 'relu'))
    plot_model(model)
    model.add(Dense(8, input_dim = 4, kernel_initializer = 'RandomNormal', activation = 'relu'))
    plot_model(model)
    model.add(Dense(num_classes, kernel_initializer = 'RandomNormal', activation = 'sigmoid'))
    #compile model
    model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model

def train_model_batch(training, validation, model, epochs, folder):
    tbCallback = TensorBoard(log_dir="logs/"+ folder)
    model.fit_generator(generator=training, validation_data = validation, epochs = epochs, verbose = 2, 
              callbacks = [tbCallback]) #switch epochs to 40
    # Final evaluation of the model
#     scores = model.evaluate_generator(generator=validation, verbose = 0)
#     print("Baseline error : %.2f%%" %(100-scores[1]*100))
    return

In [5]:

# Parameters
params = {'dim': (8,1),
          'batch_size': 50,
          'n_classes': 2,
          'shuffle': True}

# Xraw = np.array([
#                 [1,1,1,1,1,1,1,1],
#                 [2,2,2,2,2,2,2,2],
#                 [3,3,3,3,3,3,3,3],
#                 [4,4,4,4,4,4,4,4],
#                 [5,5,5,5,5,5,5,5],
#                 ])
# Yraw = np.array([[0],[1],[1],[0],[0]])

partition, labels = partitionize(Xraw,Yraw, ratio = (3,1))

training_generator = DataGenerator(partition["train"], labels, **params)
validation_generator = DataGenerator(partition["validation"], labels, **params)

model = model_v0_trial(8, 1)

train_model_batch(training = training_generator, validation = validation_generator, model=model, epochs=10, folder="")


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/10
 - 2s - loss: 0.3777 - accuracy: 0.8973 - val_loss: 0.4218 - val_accuracy: 0.9028

Epoch 2/10
 - 2s - loss: 0.3328 - accuracy: 0.9071 - val_loss: 0.3903 - val_accuracy: 0.9028
Epoch 3/10
 - 2s - loss: 0.3226 - accuracy: 0.9069 - val_loss: 0.3538 - val_accuracy: 0.9028
Epoch 4/10
 - 2s - loss: 0.3192 - accuracy: 0.9069 - val_loss: 0.2386 - val_accuracy: 0.9028
Epoch 5/10
 - 2s - loss: 0.3166 - accuracy: 0.9069 - val_loss: 0.2079 - val_accuracy: 0.9028
Epoch 6/10
 - 2s - loss: 0.3150 - accuracy: 0.9072 - val_loss: 0.2735 - val_accuracy: 0.9028
Epoch 7/10
 - 2s - loss: 0.3144 - accuracy: 0.9068 - val_loss: 0.4221 - val_accuracy: 0.9028
Epoch 8/10
 - 2s - loss: 0.3136 - accuracy: 0.9069 - val_loss: 0.3044 - val_accuracy: 0.9028
Epoch 9/10
 - 2s - loss: 0.3133 - accuracy: 0.9069 - val_loss: 0.1888 - val_accuracy: 0.9028
Epoch 10/10
 - 2s - loss: 0.3096 - accuracy: 0.9072 - val_loss: 0