In [None]:
%matplotlib notebook

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)

import json
from collections import OrderedDict
import copy
import numpy as np
import matplotlib.pyplot as plt

Features = OrderedDict([
    ("Id",""),
    ("Type", ""),
    ("ScreenPrinter/PositionX",0),
    ("ScreenPrinter/PositionY",0),
    ("PasteInspection/PosX1",0),
    ("PasteInspection/PosY1",0),
    ("PasteInspection/PosX2",0),
    ("PasteInspection/PosY2",0),
    ("PasteInspection/PosX3",0),
    ("PasteInspection/PosY3",0),
    ("PasteInspection/PosX4",0),
    ("PasteInspection/PosY4",0),
    ("PasteInspection/PosX5",0),
    ("PasteInspection/PosY5",0),
    ("PasteInspection/PosX6",0),
    ("PasteInspection/PosY6",0),
    ("PickAndPlace/MarkerX1",0),
    ("PickAndPlace/MarkerY1",0),
    ("PickAndPlace/MarkerX2",0),
    ("PickAndPlace/MarkerY2",0),
    ("AOI1/PosX1",0),
    ("AOI1/PosY1",0),
    ("AOI1/PosX2",0),
    ("AOI1/PosY2",0),
    ("AOI1/PosX3",0),
    ("AOI1/PosY3",0),
    ("AOI1/PosX4",0),
    ("AOI1/PosY4",0),
    ("AOI1/PosX5",0),
    ("AOI1/PosY5",0),
    ("AOI1/PosX6",0),
    ("AOI1/PosY6",0),
    ("Owen1/Temp1",0),
    ("Owen2/Temp2",0),
    ("Owen3/Temp3",0),
    ("AOI2/PosX1",0),
    ("AOI2/PosY1",0),
    ("AOI2/PosX2",0),
    ("AOI2/PosY2",0),
    ("AOI2/PosX3",0),
    ("AOI2/PosY3",0),
    ("AOI2/PosX4",0),
    ("AOI2/PosY4",0),
    ("AOI2/PosX5",0),
    ("AOI2/PosY5",0),
    ("AOI2/PosX6",0),
    ("AOI2/PosY6",0),
    ("Housing/HScrew",0),
    ("ConAssembly1or2/Con1or2Screw",0),
    ("PtAssembly1/Pt1Screw1",0),
    ("PtAssembly1/Pt1Screw2",0),
    ("PtAssembly2or3/Pt2or3",0),
    ("Welding/WeldFrequency",0),
    ("Label", False)
])

In [None]:
with open("/data/ABU1.txt") as f:
    data = []
    for line in f:
        features = copy.deepcopy(Features)
        j = json.loads(line)
        total = j['ResultValue']['total']
        #type = j['ResultValue']['type']['e'][0]['sv']
        #label = j['ResultValue']['label']['e'][0]['bv']
        #name = j['ResultValue']['label']['bn']
        measurements = j['ResultValue']['measurements']['e']
        features["Id"] = j['ResultValue']['label']['bn']
        features["Type"] = j['ResultValue']['type']['e'][0]['sv']
        features["Label"] = j['ResultValue']['label']['e'][0]['bv']
        for entry in measurements:
            #print('("{}",0),'.format(entry['n']))
            feature_name = entry['n']
            # parallel stations
            if feature_name == "ConAssembly1/Con1Screw" or feature_name == "ConAssembly2/Con2Screw":
                feature_name = "ConAssembly1or2/Con1or2Screw"
            elif feature_name == "PtAssembly2/Pt2" or feature_name == "PtAssembly3/Pt2":
                feature_name = "PtAssembly2or3/Pt2or3"
            
            features[feature_name] = entry['v']
        #print(features.values())
        data.append(list(features.values()))
    print(len(data))

In [None]:
data = np.asarray(data)
#print(data[:,2:-1])
numerics = data[:,2:-1]
# for n in numerics:
#     plt.plot(n);
mask = np.random.rand(len(data)) < 0.7
train = data[mask]
test = data[~mask]
print("Train Total: {} Good: {} Faulty: {} Ratio: {}".format(len(train), len(train[train[:,-1]=='True']), len(train[train[:,-1]=='False']), float(len(train[train[:,-1]=='False']))/len(train)))
print("Test  Total: {} Good: {} Faulty: {} Ratio: {}".format(len(test), len(test[test[:,-1]=='True']), len(test[test[:,-1]=='False']), float(len(test[test[:,-1]=='False']))/len(train)))

In [None]:
faulty = train[train[:,-1]=='False']
not_faulty = train[train[:,-1]=='True']
#train = np.concatenate((not_faulty, np.repeat(faulty, 10, axis=0))) # repeate faulties
train = np.concatenate((not_faulty[:len(faulty)], faulty))
print("Train Total: {} Good: {} Faulty: {} Ratio: {}".format(len(train), len(train[train[:,-1]=='True']), len(train[train[:,-1]=='False']), float(len(train[train[:,-1]=='False']))/len(train)))
train = train[np.random.permutation(train.shape[0])] # shuffle data

In [None]:
train_data = train[:,2:-1].astype(np.float32)
test_data = test[:,2:-1].astype(np.float32)
train_labels = np.array(train[:,-1]=='False').astype(np.int32)
test_labels = np.array(test[:,-1]=='False').astype(np.int32)

In [None]:
""" train """
model_path = "./tmp"
from subprocess import call
call(["rm", "-fR", model_path])

# feature_columns = tf.contrib.layers.sparse_column_with_hash_bucket("", hash_bucket_size=1000)
# columns_emb = tf.contrib.layers.embedding_column(sparse_id_column=feature_columns, dimension=total_features, combiner="mean")

feature_columns = tf.contrib.layers.real_valued_column("", dimension=51)

print(feature_columns)
# Build 3 layer DNN with 10, 20, 10 units respectively.
classifier = tf.contrib.learn.DNNClassifier(feature_columns=[feature_columns],
                                            hidden_units=[25, 50, 25],
                                            n_classes=3,
                                            model_dir=model_path)

# Fit model.
f = classifier.fit(x=train_data, y=train_labels, steps=30000)
print(f)

In [None]:
""" evaluate """

score = classifier.evaluate(x=train_data, y=train_labels)
print('Accuracy:', score)

score = classifier.evaluate(x=test_data, y=test_labels)
print('Testing:', score)

score = classifier.evaluate(x=test_data[test_labels==0], y=test_labels[test_labels==0])
print('Testing 0s:', score)

score = classifier.evaluate(x=test_data[test_labels==1], y=test_labels[test_labels==1])
print('Testing 1s:', score)

In [None]:
pwd