In [1]:
import pandas as pd
import numpy as np
import os
from sklearn import metrics, preprocessing
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model, load_model
from scipy import spatial
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

In [2]:
def get_model(data, catcols):    
    inputs = []
    outputs = []
    
    for c in catcols:
        num_unique_values = int(data[c].nunique())
        embed_dim = int(min(np.ceil((num_unique_values)/2), 50))
        inp = layers.Input(shape=(1,))
        out = layers.Embedding(num_unique_values + 1, embed_dim, name=c)(inp)
        out = layers.SpatialDropout1D(0.3)(out)
        out = layers.Reshape(target_shape=(embed_dim, ))(out)
        inputs.append(inp)
        outputs.append(out)
    
    x = layers.Concatenate()(outputs)
    x = layers.BatchNormalization()(x)
    
    x = layers.Dense(300, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Dense(300, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    x = layers.BatchNormalization()(x)
    
    y = layers.Dense(1, activation="softmax")(x)

    model = Model(inputs=inputs, outputs=y)
    return model

In [3]:
data1 = pd.read_csv("C:\\research\\Analysis\\dataset-result-reduced.csv", engine='python', dtype={"src_image_text": "string", "background_image_text": "string"})
data3 = pd.read_csv("C:\\research\\Analysis\\dataset-b-reduced-labeled.csv", engine='python', dtype={"src_image_text": "string", "background_image_text": "string"})
data2 = pd.read_csv("C:\\research\\Analysis\\dataset-m-reduced-labeled.csv", engine='python', dtype={"src_image_text": "string", "background_image_text": "string"})
data = pd.concat([data1, data2], ignore_index=True, sort=False)
dataCombined = pd.concat([data, data2, data3], ignore_index=True, sort=False)

In [4]:
# data2 = pd.read_csv("C:\\research\\Analysis\\dataset-m-labeled.csv", engine='python', dtype={"src_image_text": "string", "background_image_text": "string"})
# data1 = pd.read_csv("C:\\research\\Analysis\\dataset-result.csv", engine='python', dtype={"src_image_text": "string", "background_image_text": "string"})
# data = data1.append(data2, ignore_index=True)

In [5]:
# Features are all column except the target and id
features = [x for x in data.columns if x not in ["id", "target"]]
features_need_encoding = [x for x in data.columns if x in ["elemID","permission","layout_width","layout_height","textColor","text","textSize","src_image_text","background_image_text"]]
features_no_need_encoding = [x for x in data.columns if x not in ["id", "target","elemID","permission","layout_width","layout_height","textColor","text","textSize","src_image_text","background_image_text"]]

In [6]:
len(dataCombined)

2259

In [7]:
le_dict = {}
# encode all the categorical data
# fill in "-1" for missing values
for feat in features:
    lbl_enc = preprocessing.LabelEncoder()
    
    #dataCombined[feat] = lbl_enc.fit_transform(dataCombined[feat].fillna("-1").astype(str).values)
    #le_dict[feat] = dict(zip(lbl_enc.classes_, lbl_enc.transform(lbl_enc.classes_)))
    
    data[feat] = lbl_enc.fit_transform(data[feat].fillna("-1").astype(str).values)
    #data3[feat] = lbl_enc.transform(data3[feat].fillna("-1").astype(str).values)

# for feat in features_no_need_encoding:
#     data[feat] = data[feat].fillna(0.0)

In [8]:
dataCombined

Unnamed: 0,target,elemID,permission,layout_width,layout_height,textColor,text,textSize,src_image_text,background_image_text
0,1,LLNavBuy,ACCESS_COARSE_LOCATION,wrap_content,wrap_content,@color/Gray,@string/navigation_buy,11.0sp,$,
1,0,lv_sch_search_list,INTERNET,fill_parent,wrap_content,,,,,
2,0,map_view,ACCESS_COARSE_LOCATION,fill_parent,fill_parent,,,,,
3,1,btnAddMember,WRITE_EXTERNAL_STORAGE,fill_parent,wrap_content,@color/Black,@string/add_member,,,
4,1,wrap_filter_1,RECORD_AUDIO,0.0dip,wrap_content,@color/white,@string/lightning_filter_c_to_g,16.0dip,,
...,...,...,...,...,...,...,...,...,...,...
2254,0,wv_simple_webview,ACCESS_NETWORK_STATE,fill_parent,0.0dip,,,,,
2255,0,wv_simple_webview,INTERNET,fill_parent,0.0dip,,,,,
2256,0,emergency_call_widget_ok_btn,BROADCAST_STICKY,fill_parent,wrap_content,#ffffffff,Ok,18.0sp,,
2257,0,emergency_call_widget_ok_btn,WAKE_LOCK,fill_parent,wrap_content,#ffffffff,Ok,18.0sp,,


In [11]:
# split data set into train and test
# train, test = train_test_split(data, test_size=0.2)

X = data[features] # Features
y = data.target # Target variable
X1 = data3[features]
y1 = data3.target

X_train = X
X_test = X1
y_train = y
y_test = y1

#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#test_data = [data3.loc[:, features].values[:, k] for k in range(data3.loc[:, features].values.shape[1])]

In [12]:
my_model = get_model(data, features)

In [13]:
my_model.compile(loss='binary_crossentropy', optimizer='adam')

In [11]:
#[X_train.loc[:, f].values for f in features]

In [12]:
#X_train.values

In [13]:
#y_train.values

In [14]:
my_model.fit([X_train.loc[:, f].values for f in features], y_train.values, verbose=1, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x21cc3a65c10>

In [15]:
y_pred = my_model.predict([X_test.loc[:, f].values for f in features])
score = metrics.accuracy_score(y_test.values, y_pred)

In [16]:
print(score)

0.0


In [49]:
print((y_pred))

[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.

In [51]:
print(np.array(y_test))

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 

In [14]:
scores = []
recalls = []
precisions = []

folds = StratifiedKFold(n_splits=5)

for train_index, test_index in folds.split(X, y):
    
    
    X_train, X_test, y_train, y_test = X.iloc[train_index], X.iloc[test_index], y.iloc[train_index], y.iloc[test_index]

    my_model.fit([X_train.loc[:, f].values for f in features], y_train.values, verbose=1, epochs=100)
    
    y_pred = my_model.predict([X_test.loc[:, f].values for f in features])
    scores.append(metrics.accuracy_score(y_test.values, y_pred))
    recalls.append(metrics.recall_score(y_test.values, y_pred))
    precisions.append(metrics.recall_score(y_test.values, y_pred))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 

Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch

Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch

In [15]:
print("Accuracy:",scores)
print("Recall:",recalls)
print("Precision:",precisions)

Accuracy: [0.8940397350993378, 0.9, 0.9, 0.9, 0.8933333333333333]
Recall: [1.0, 1.0, 1.0, 1.0, 1.0]
Precision: [1.0, 1.0, 1.0, 1.0, 1.0]


In [70]:
[X_test.loc[:, f].values for f in features]

[array([  0,   0,   0,   0,   0,   0,   1,   1,   1,   1,   1,   1,   1,
          2,   2,   2,   3,   3,   3,   3,   3,   3,   3,   4,   4,   4,
          4,   4,   4,   4,   5,   5,   5,   5,   5,   5,   5,   6,   6,
          6,   6,   6,   6,   6,   7,   7,   7,   7,   7,   7,   7,   8,
          8,   8,   8,   8,   8,   8,   9,  10,  10,  10,  10,  10,  10,
         10,  10,  10,  10,  10,  10,  10,  10,  11,  11,  11,  12,  12,
         12,  13,  13,  13,  14,  14,  14,  14,  15,  16,  16,  18,  18,
         18,  18,  18,  19,  19,  19,  19,  19,  19,  19,  19,  19,  19,
         19,  19,  19,  19,  19,  19,  19,  19,  19,  19,  19,  21,  21,
         21,  21,  22,  22,  22,  22,  22,  22,  22,  22,  22,  22,  22,
         22,  22,  22,  24,  24,  24,  24,  24,  24,  24,  26,  26,  27,
         28,  28,  28,  28,  28,  28,  28,  29,  29,  29,  29,  29,  29,
         30,  30,  30,  30,  30,  30,  30,  30,  31,  31,  31,  31,  31,
         31,  31,  32,  32,  33,  34,  34,  34,  34

In [125]:
#my_model.fit([train.loc[:, f].values for f in features], train.target.values)
my_model.fit([X_train.loc[:, f].values for f in features], y_train.values, verbose=1, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x207c7a4eaf0>

In [126]:
my_model.save("my_new_model")

INFO:tensorflow:Assets written to: my_new_model\assets


In [127]:
my_model.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_55 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_56 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_57 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_58 (InputLayer)           [(None, 1)]          0                                            
____________________________________________________________________________________________

In [128]:
y_pred = my_model.predict([X_test.loc[:, f].values for f in features])

In [129]:
print("Accuracy:",metrics.accuracy_score(y_test.values, y_pred))

Accuracy: 0.896


In [17]:
my_model.get_layer("permission").get_weights()

[array([[ 0.0236971 , -0.02439447,  0.04988014, ..., -0.03329818,
          0.01238285, -0.01653262],
        [-0.02649513, -0.0303742 , -0.00501538, ..., -0.03904109,
         -0.00638444, -0.02259495],
        [-0.0424075 ,  0.01156838,  0.04868475, ...,  0.00492169,
         -0.031917  , -0.03535283],
        ...,
        [ 0.02493793,  0.01048891, -0.01990288, ..., -0.01620464,
          0.01831177,  0.02150958],
        [-0.01409584, -0.04994596,  0.04830456, ..., -0.02942899,
         -0.01914295, -0.01040977],
        [ 0.04107216,  0.01693685,  0.02811709, ..., -0.02186021,
          0.03424224,  0.01443794]], dtype=float32)]

In [30]:
permission_embeddings = {idx:my_model.get_layer("permission").get_weights()[0][idx] for w, idx in le_dict["permission"].items()}

In [35]:
permission_embedding_df = pd.DataFrame(permission_embeddings)
#permission_embedding_df = permission_embedding_df.T.reset_index()
permission_embedding_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,0.032629,-0.045375,0.023691,0.037191,-0.011904,0.033297,-0.039782,0.016069,-0.021738,0.043111,...,0.047323,-0.03696,-0.022107,-0.044109,0.027986,0.002047,0.033356,0.036334,-0.021651,0.046466
1,0.031119,-0.010701,-0.013801,-0.039492,-0.01403,-0.038986,0.005771,0.034196,-0.003438,0.01558,...,0.045278,0.037575,-0.007817,-0.030444,-0.041709,-0.048312,0.026893,-0.00478,-0.030721,0.028002
2,0.017952,0.031193,-0.027952,-0.028192,0.008781,0.044052,-0.001028,-0.031131,-0.015659,-0.043965,...,0.028248,0.044628,0.011676,0.034615,-0.032429,-0.00016,-0.026391,-0.008523,0.038459,0.018668
3,-0.030226,-0.031321,0.003618,0.034095,0.047185,0.034276,-0.030898,0.032977,-0.016838,0.049376,...,-0.025781,-0.0413,-0.011116,-0.02052,-0.000356,-0.044129,0.034618,0.017882,-0.025857,0.021383
4,0.037154,-0.004197,0.044984,-0.048356,0.012484,0.034368,-0.035818,-0.012171,-0.015074,0.004183,...,-0.037524,0.0248,0.040639,-0.024927,-0.048248,0.044075,-0.004159,0.015933,-0.045526,-0.004688
5,0.005073,0.049996,-0.015102,-0.047156,0.040626,-0.000618,0.011928,0.004867,-0.046875,0.027154,...,-0.011989,-0.010595,-0.028256,-0.027804,-0.008918,0.001026,0.046485,0.037144,-0.015061,-0.048354
6,0.02207,0.012359,0.020972,-0.032919,-0.037272,0.015368,-0.00012,0.026036,0.007118,0.032916,...,-0.043838,-0.023412,-0.032838,0.028695,-0.044244,-0.042545,0.033719,-0.000427,0.027758,-0.031552
7,0.019597,-0.000331,-0.022365,0.027868,-0.003248,0.048044,0.002511,0.040904,0.034356,-0.026975,...,-0.027607,0.00019,0.030654,0.047115,-0.014275,-0.025273,-0.026795,-0.015634,0.008284,-0.033926
8,-0.013688,-0.016156,0.02919,0.044086,0.047218,-0.02093,-0.009323,-0.020942,-0.019589,0.039026,...,0.047855,-0.009904,-0.048085,-0.00066,0.046111,-0.00606,-0.045492,0.014946,-0.041766,0.048877
9,0.00084,0.00604,-0.049866,0.015247,0.037644,-0.029393,-0.03197,0.008562,0.004544,-0.011436,...,0.029154,0.038628,-0.032455,0.027392,-0.033383,0.026653,-0.020745,-0.043171,-0.026407,0.049922


In [44]:
permission_embedding_df[0]
permission_dict = dict()
index = 0
for row in permission_embedding_df:
    permission_dict[str(index)] = list(permission_embedding_df[index])
    index += 1
print(permission_dict)

{'0': [0.03262854740023613, 0.031118523329496384, 0.01795211061835289, -0.03022627905011177, 0.0371541865170002, 0.005072973668575287, 0.022069964557886124, 0.019596923142671585, -0.013687945902347565, 0.0008403174579143524, 0.007809747010469437, 0.040121737867593765], '1': [-0.045374657958745956, -0.010701429098844528, 0.031192611902952194, -0.03132051229476929, -0.004197381436824799, 0.04999622330069542, 0.012358643114566803, -0.00033086538314819336, -0.016155730932950974, 0.006040416657924652, -0.048423826694488525, 0.007924556732177734], '2': [0.023691419512033463, -0.013801049441099167, -0.02795201539993286, 0.003617800772190094, 0.044984448701143265, -0.015102170407772064, 0.020971599966287613, -0.022365165874361992, 0.02918977662920952, -0.04986613988876343, -0.019242633134126663, 0.019024956971406937], '3': [0.03719137981534004, -0.039491795003414154, -0.02819218672811985, 0.03409517928957939, -0.0483560673892498, -0.04715639352798462, -0.03291868045926094, 0.02786809578537941,

In [37]:
result = 1 - spatial.distance.cosine(permission_embedding_df[0], permission_embedding_df[1])

In [45]:
permission_embedding_df[0]

0     0.032629
1     0.031119
2     0.017952
3    -0.030226
4     0.037154
5     0.005073
6     0.022070
7     0.019597
8    -0.013688
9     0.000840
10    0.007810
11    0.040122
Name: 0, dtype: float32

In [38]:
result

0.02659454755485058