In [283]:
# keras version : 2.4.3
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from keras.utils import to_categorical
import pickle
# from sklearn.ensemble import RandomForestClassifier
#from keras.utils import np_utils
#from sklearn.preprocessing import LabelEncoder

In [284]:
# Random seed for reproducibility
seed = 12
np.random.seed(seed)
# Import data
df = pd.read_csv('training_dataset_remove_duplicates.csv')
# Print first 10 samples
# print(df.head(10))

In [285]:
# Check missing values
print(df.isna().sum()) # No missing values!

AP_1       0
AP_2       0
AP_3       0
AP_4       0
AP_5       0
          ..
AP_1595    0
AP_1596    0
AP_1597    0
AP_1598    0
target     0
Length: 1599, dtype: int64


In [286]:
num_row, num_col = df.shape

In [287]:
num_row, num_col

(287, 1599)

In [288]:
# Divide data into features X and target (Classes) Y
X = df.iloc[:,0:num_col-1]
Y = df.iloc[:, num_col-1]
print(X.shape)
print(Y.shape)

(287, 1598)
(287,)


In [289]:
X.describe()

Unnamed: 0,AP_1,AP_2,AP_3,AP_4,AP_5,AP_6,AP_7,AP_8,AP_9,AP_10,...,AP_1589,AP_1590,AP_1591,AP_1592,AP_1593,AP_1594,AP_1595,AP_1596,AP_1597,AP_1598
count,287.0,287.0,287.0,287.0,287.0,287.0,287.0,287.0,287.0,287.0,...,287.0,287.0,287.0,287.0,287.0,287.0,287.0,287.0,287.0,287.0
mean,-105.909408,-109.905923,-105.184669,-109.84669,-102.61324,-109.132404,-109.850174,-102.797909,-109.909408,-109.961672,...,-106.062718,-105.247387,-109.947735,-109.954704,-109.825784,-102.38676,-109.954704,-107.933798,-106.703833,-109.759582
std,12.432425,1.59376,13.221779,1.534287,17.050529,5.690431,1.498901,17.226005,1.534731,0.649309,...,11.804478,14.160844,0.885422,0.767366,1.8257,17.492063,0.767366,8.111492,10.860499,2.097627
min,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,...,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0
25%,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,...,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0
50%,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,...,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0
75%,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,...,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0,-110.0
max,-40.0,-83.0,-41.0,-92.0,-41.0,-59.0,-92.0,-29.0,-84.0,-99.0,...,-47.0,-35.0,-95.0,-97.0,-85.0,-41.0,-97.0,-68.0,-55.0,-86.0


In [290]:
# Check for class imbalance
print(df.groupby(Y).size())

target
REGION1     19
REGION10     4
REGION11     8
REGION12    17
REGION13     3
REGION14    15
REGION15    24
REGION16     6
REGION17    12
REGION18    21
REGION2     20
REGION3     15
REGION4     18
REGION5     26
REGION6     17
REGION7     11
REGION8     28
REGION9     23
dtype: int64


In [291]:
# Normalize features within range 0 (minimum) and 1 (maximum)
scaler = MinMaxScaler(feature_range=(0, 1))
X_scale = scaler.fit_transform(X)
X_scale = pd.DataFrame(X_scale)

In [292]:
# Encode target values as integers(0~17) from string(REGION1, ... REGION18)
# Y_en = pd.get_dummies(Y)
encoder = LabelEncoder()
encoder.fit(Y)
Y_en = encoder.transform(Y)
Y_en = to_categorical(Y_en)

In [324]:
# pickle.dump(scaler, open('scaler.pkl', 'wb'))
# pickle.dump(encoder, open('label_encoder.pkl', 'wb'))

In [325]:
sc_temp = pickle.load(open('scaler.pkl', 'rb'))
en_temp = pickle.load(open('label_encoder.pkl', 'rb'))

In [326]:
# test scaler saved as pickle file

# sc_temp.transform(np.expand_dims(X.values[0], axis=0))

array([[0.        , 0.        , 0.        , ..., 0.66666667, 0.        ,
        0.        ]])

In [329]:
# test encoder saved as pickle file
# en_temp.transform(['REGION16', 'REGION9'])

array([ 7, 17], dtype=int64)

In [296]:
# For Keras, convert dataframe to array values (Inbuilt requirement of Keras)
X_scale = X_scale.values

In [297]:
X_scale

array([[0.        , 0.        , 0.        , ..., 0.66666667, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.76190476, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.92857143, 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.66666667, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.17391304, ..., 0.        , 0.        ,
        0.        ]])

In [298]:
Y_en

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [299]:
# Define baseline model. Then use it in Keras Classifier to implement cross validation
def baseline_model():
    # Create model here
    model = Sequential()
    model.add(Dense(25, input_dim = num_col-1, activation = 'relu')) # Rectified Linear Unit Activation Function
    model.add(Dense(25, activation = 'relu'))
    model.add(Dense(18, activation = 'softmax')) # Softmax for multi-class classification(18 target classes)
    # Compile model here
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model

In [300]:
# Create Keras Classifier and use predefined baseline model
estimator = KerasClassifier(build_fn = baseline_model, epochs = 100, batch_size = 10, verbose = 0)

In [301]:
# KFold Stratified Cross Validation
kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = seed)

In [302]:
results = cross_val_score(estimator, X_scale, Y, cv = kfold)
# Result
# print("Result: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))



In [303]:
results # cross-validation result

array([0.93103451, 0.84482759, 0.92982459, 0.92982459, 0.94736844])

In [304]:
estimator.fit(X_scale, Y_en)

<tensorflow.python.keras.callbacks.History at 0x15d8a867308>

In [305]:
estimator.score(X_scale, Y_en)

0.9790940880775452

In [316]:
# scaler.transform(np.expand_dims(X.values[0], axis=0)).squeeze()

array([0.        , 0.        , 0.        , ..., 0.66666667, 0.        ,
       0.        ])

In [358]:
# Prediction test with 1st row of training set
test_data = scaler.transform(np.expand_dims(X.values[7], axis=0)) 

In [362]:
test_data

array([[0.85714286, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [363]:
estimator.predict(test_data)

array([11])

In [364]:
# get prediction result
# estimator.predict(test_data)
result = encoder.inverse_transform(estimator.predict(test_data))[0]

In [365]:
result

'REGION3'

In [332]:
# estimator.save('my_model.hdf5')

AttributeError: 'KerasClassifier' object has no attribute 'save'

In [307]:
X

Unnamed: 0,AP_1,AP_2,AP_3,AP_4,AP_5,AP_6,AP_7,AP_8,AP_9,AP_10,...,AP_1589,AP_1590,AP_1591,AP_1592,AP_1593,AP_1594,AP_1595,AP_1596,AP_1597,AP_1598
0,-110,-110,-110,-110,-97,-110,-110,-74,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,-82,-110,-110
1,-110,-110,-110,-110,-110,-110,-110,-66,-110,-110,...,-110,-91,-110,-110,-110,-110,-110,-78,-110,-110
2,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110,...,-110,-82,-110,-110,-110,-110,-110,-71,-110,-110
3,-110,-110,-110,-110,-110,-110,-110,-61,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,-71,-110,-110
4,-110,-110,-110,-110,-97,-110,-110,-74,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,-82,-110,-110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282,-110,-110,-57,-110,-110,-110,-110,-110,-110,-110,...,-95,-110,-110,-110,-110,-110,-110,-110,-110,-110
283,-110,-110,-98,-110,-110,-110,-110,-110,-110,-110,...,-110,-110,-110,-110,-110,-67,-110,-110,-110,-110
284,-110,-110,-64,-110,-110,-110,-110,-110,-110,-110,...,-87,-110,-110,-110,-110,-110,-110,-110,-110,-110
285,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110,...,-86,-110,-110,-110,-110,-71,-110,-110,-110,-110


In [327]:
Y

0      REGION16
1      REGION16
2      REGION16
3      REGION16
4      REGION16
         ...   
282     REGION9
283     REGION9
284     REGION9
285     REGION9
286     REGION9
Name: target, Length: 287, dtype: object

In [339]:
# now actually make model
model = Sequential()
model.add(Dense(25, input_dim = num_col-1, activation = 'relu')) # Rectified Linear Unit Activation Function
model.add(Dense(25, activation = 'relu'))
model.add(Dense(18, activation = 'softmax')) # Softmax for multi-class classification(18 target classes)
# Compile model here
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [341]:
model.fit(X_scale, Y_en, epochs = 100, batch_size = 10)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x15d84bf0048>

In [359]:
actual_result = np.argmax(model.predict(test_data).squeeze()) # predict result

In [360]:
actual_result

11

In [361]:
# result
encoder.inverse_transform([actual_result])[0]

'REGION3'

In [366]:
# model.save('keras_model1')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: keras_model1\assets
