## Imports 

In [1]:
import pandas as pd
import tensorflow as tf
import pickle
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
import nltk

## NLTK imports

In [2]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pipki\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\pipki\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\pipki\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


## load in the model + vectorizer 

In [3]:
model = tf.keras.models.load_model("../models/grocery_classifier_model.keras")

with open("../models/vectorizer.pkl", "rb") as f:
    vectorizer = pickle.load(f)

  saveable.load_own_variables(weights_store.get(inner_path))


## load the catagory mapping 

In [4]:
df = pd.read_csv("../data/processed/groceries_processed.csv")
categories = df[['category', 'category_id']].drop_duplicates().sort_values('category_id')
id_to_category = dict(zip(categories['category_id'], categories['category']))

## defining the preprocessing model

In [5]:
def clean_item(text):
    text = str(text).lower()
    text = re.sub(r"[^a-z\s]", "", text)
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return " ".join(tokens)

## Prediction funct

In [6]:
def predict_category(item_name):
    item_clean = clean_item(item_name)
    X_vec = vectorizer.transform([item_clean]).toarray()
    pred_prob = model.predict(X_vec)
    pred_class = pred_prob.argmax(axis=1)[0]
    category_name = id_to_category[pred_class]
    return category_name

## Example user uses it... try it for yourself! 

In [None]:
while True:
    user_input = input("Enter a grocery item (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    category = predict_category(user_input)
    print(f"Predicted category: {category}")

Enter a grocery item (or 'exit' to quit):  cheese


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Predicted category: Pantry / Dry Goods


Enter a grocery item (or 'exit' to quit):  eggs


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Predicted category: Household


Enter a grocery item (or 'exit' to quit):  chicken 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Predicted category: Frozen Foods


KeyboardInterrupt: Interrupted by user

## There still needs to be some improvements... the predictions are a little off, for example try "fish" (my favourite fruit and veg!). 