In [62]:
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten
from tensorflow.keras.optimizers import Adam

# Load the dataset
upc_data = pd.read_csv(r"D:\2024_09_MSIB Bangkit\scaneat_\upc_type.csv")

# Encode the 'type' column (food or beverage) into numeric values
label_encoder = LabelEncoder()
upc_data['encoded_type'] = label_encoder.fit_transform(upc_data['type'])

# Define the model
model = Sequential()

# Use an embedding layer to handle the UPC codes as categorical data
model.add(Embedding(input_dim=10000, output_dim=64, input_length=1))  # Assuming max UPC code is <10000
model.add(Flatten())

# Add a dense layer to output the class prediction (food or beverage)
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Output: 0 for food, 1 for beverage

# Compile the model
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# Prepare the UPC codes (convert them to numeric values)
X = upc_data['upc'].astype('category').cat.codes.values
y = upc_data['encoded_type'].values

# Train the model
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)

# Save the model
model.save("upc_type_predictor.h5")
print("Model saved successfully!")


Epoch 1/10




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 160ms/step - accuracy: 0.5243 - loss: 0.6925 - val_accuracy: 0.5000 - val_loss: 0.6914
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.6389 - loss: 0.6867 - val_accuracy: 0.5000 - val_loss: 0.6887
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.6354 - loss: 0.6806 - val_accuracy: 0.5000 - val_loss: 0.6851
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.6007 - loss: 0.6733 - val_accuracy: 0.5000 - val_loss: 0.6817
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.6736 - loss: 0.6635 - val_accuracy: 0.5000 - val_loss: 0.6783
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.7639 - loss: 0.6520 - val_accuracy: 0.5000 - val_loss: 0.6754
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37



Model saved successfully!


In [63]:
from tensorflow.keras.layers import Dropout
model.add(Dropout(0.5))  # Add after Dense layers


from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Adding Dropout and EarlyStopping
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=64, input_length=1))  
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))  # Adding dropout
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))  # Adding dropout
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  

model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Model training with validation and early stopping
model.fit(X, y, epochs=30, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Save the model
model.save("upc_type_predictor_optimized.h5")
print("Optimized model saved successfully!")


Epoch 1/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 189ms/step - accuracy: 0.3993 - loss: 0.6944 - val_accuracy: 0.5000 - val_loss: 0.6906
Epoch 2/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.5243 - loss: 0.6925 - val_accuracy: 0.5000 - val_loss: 0.6905
Epoch 3/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.5868 - loss: 0.6910 - val_accuracy: 0.5000 - val_loss: 0.6896
Epoch 4/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.6771 - loss: 0.6871 - val_accuracy: 0.5000 - val_loss: 0.6892
Epoch 5/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - accuracy: 0.6007 - loss: 0.6897 - val_accuracy: 0.5000 - val_loss: 0.6883
Epoch 6/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.6250 - loss: 0.6841 - val_accuracy: 0.5000 - val_loss: 0.6880
Epoch 7/30
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━



Optimized model saved successfully!


In [66]:
from sklearn.metrics import confusion_matrix
import numpy as np

y_pred = (model.predict(X) > 0.5).astype("int32")
cm = confusion_matrix(y, y_pred)
print(cm)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[[24  4  0]
 [ 1 30  0]
 [ 0  1  0]]


In [64]:
import pandas as pd
from pyzbar.pyzbar import decode
import tensorflow as tf
import numpy as np

# Function to load the trained model
def load_model(model_path="upc_type_predictor.h5"):
    return tf.keras.models.load_model(model_path)

# Load the UPC type CSV
upc_data = pd.read_csv("D:/2024_09_MSIB Bangkit/scaneat_/upc_type.csv")

# Ensure the UPC codes in the CSV are strings and remove any extra whitespace
upc_data['upc'] = upc_data['upc'].astype(str).str.strip()

# Encode the 'type' column (food or beverage) into numeric values
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
upc_data['encoded_type'] = label_encoder.fit_transform(upc_data['type'])

# Prepare the UPC dictionary for quick lookup
upc_dict = dict(zip(upc_data['upc'], upc_data['type']))

# Function to preprocess and predict the product type based on UPC
def predict_product_type(upc_code, model):
    # Encode the UPC code (convert to numeric representation)
    upc_code_numeric = pd.Series(upc_code).astype('category').cat.codes[0]

    # Predict the product type using the trained model
    prediction = model.predict(np.array([upc_code_numeric]))
    predicted_class = np.round(prediction[0][0])

    # Map prediction to type
    if predicted_class == 0:
        return "Food"
    else:
        return "Beverage"

# Function to process the uploaded image and make predictions
def process_image(image_path, model):
    # Load the image
    import cv2
    frame = cv2.imread(image_path)

    # Decode the UPC barcode from the image
    barcodes = decode(frame)
    for barcode in barcodes:
        # Get the barcode data (UPC code) as a string
        upc_code = barcode.data.decode('utf-8').strip()

        # Debug: print the UPC code being processed
        print(f"Detected UPC code: {upc_code}")

        # Check if the UPC code is in the dictionary
        if upc_code in upc_dict:
            product_type = upc_dict[upc_code]
            print(f"UPC: {upc_code}, Type: {product_type}")

            # Predict the product type (Food or Beverage) based on UPC code
            predicted_type = predict_product_type(upc_code, model)
            print(f"Predicted Type for UPC {upc_code}: {predicted_type}")

        else:
            print(f"UPC {upc_code} not found in the database")

# Load the trained model
model = load_model("upc_type_predictor_optimized.h5")



In [65]:
# Example image path (replace with the image path you want to use)
image_path = r"D:\2024_09_MSIB Bangkit\scaneat_\dataset\8996001354063\27.png"  # Update this path

# Process the uploaded image and print predictions
process_image(image_path, model)


Detected UPC code: 8996001354063
UPC: 8996001354063, Type: food
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Predicted Type for UPC 8996001354063: Beverage
