In [1]:
import os
import csv
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

In [81]:
# all attributes and class
attr_list = ["buying", "maint", "doors", "persons", "lug_boot", "safety", "class_values"]
to_train = ["maint", "doors", "lug_boot", "safety", "class_values"]
attr_index = {
    "buying": {0:"vhigh", 1:"high", 2:"med", 3:"low"},
    "maint": {0:"vhigh", 1:"high", 2:"med", 3:"low"},
    "doors": {0:"2", 1:"3", 2:"4", 3:"5more"},
    "persons": {0:"2", 1:"4", 2:"more"},
    "lug_boot": {0:"small", 1:"med", 2:"big"},
    "safety": {0:"low", 1:"med", 2:"high"},
    "class_values": {0:"unacc", 1:"acc", 2:"good", 3:"vgood"}
}

#  index list
reverse_index = {k:{attr_index[k][index]: index for index in attr_index[k]} for k in attr_list}

def convert_to_index(row):
    retval = []
    for i, attr in enumerate(row):
        retval.append((reverse_index[attr_list[i]][attr]+1)/4)
    return retval
        

In [92]:
data_path = os.path.join('.', 'data', 'car.data')
df_data = []
# load data
with open(data_path, "r") as csvfile:
    csvreader = csv.reader(csvfile, delimiter=',')
    for row in csvreader:
        df_data.append(convert_to_index(row))
        
df = pd.DataFrame(df_data, columns=attr_list)
df = df.drop(['persons'], axis=1)
df = df.sample(frac = 1)
df = df.reset_index(drop=True)

df['train_data'] = df[to_train].values.tolist()

label_data = df['buying'].to_list()
train_data = df['train_data'].to_list()
           
# len(train_data), len(label_data)
# label_data[1000], train_data[1000]

In [93]:
# Select training set/validation set/test set
x_train = np.asarray(train_data)
y_train = np.asarray(label_data)


In [94]:
%%time
tf.keras.backend.clear_session()
max_features = 6000
embedding_dim = 25
#create model
model = tf.keras.Sequential([
  keras.layers.Embedding(max_features + 1, embedding_dim),
  keras.layers.Dropout(0.2),
  keras.layers.GlobalAveragePooling1D(),
  keras.layers.Dropout(0.2),
  keras.layers.Dense(128, activation='relu'),
  keras.layers.Dense(4, activation='sigmoid')])

model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 25)          150025    
_________________________________________________________________
dropout (Dropout)            (None, None, 25)          0         
_________________________________________________________________
global_average_pooling1d (Gl (None, 25)                0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 25)                0         
_________________________________________________________________
dense (Dense)                (None, 128)               3328      
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 516       
Total params: 153,869
Trainable params: 153,869
Non-trainable params: 0
__________________________________________________

In [95]:
%%time

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
fitModel = model.fit(x=x_train, y=y_train, epochs=10, batch_size=500, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Wall time: 416 ms


In [96]:
test_loss, test_acc = model.evaluate(x_train,  y_train, verbose=2)
print('\nTest accuracy:', test_acc)

54/54 - 0s - loss: 0.7680 - accuracy: 0.0000e+00

Test accuracy: 0.0


In [97]:
probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])
predictions = probability_model.predict([x_train[1000]])
print(predictions[0], label_data[1000], train_data[1000])

[0.32433265 0.25857854 0.20868695 0.20840186] 0.5 [0.75, 0.5, 0.25, 0.75, 0.25]


In [98]:
def get_list(attr):
    retval = []
    for key in attr_list[1:]:
        if key in attr:
            retval.append((reverse_index[key][attr[key]]+1)/4)
        else:
            retval.append(0)
    return retval
        

check_attr = {
    "maint": "high",
    "doors": "4",
    "lug_boot": "big",
    "safety": "high",
    "class_values": "good"
    }
check_attr_index = get_list(check_attr)
predictions = probability_model.predict([check_attr_index])
print(predictions[0])

[0.32433265 0.25857854 0.20868695 0.20840186]
