## Imports 

In [None]:
import pandas as pd
import tensorflow as tf
import pickle
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
import nltk

## NLTK imports

In [None]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

lemmatizer = WordNetLemmatizer()

## load in the model + vectorizer 

In [None]:
model = tf.keras.models.load_model("../models/grocery_classifier_model.keras")

with open("../models/vectorizer.pkl", "rb") as f:
    vectorizer = pickle.load(f)

## load the catagory mapping 

In [None]:
df = pd.read_csv("../data/processed/groceries_processed.csv")
categories = df[['category', 'category_id']].drop_duplicates().sort_values('category_id')
id_to_category = dict(zip(categories['category_id'], categories['category']))

## defining the preprocessing model

In [None]:
def clean_item(text):
    text = str(text).lower()
    text = re.sub(r"[^a-z\s]", "", text)
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return " ".join(tokens)

## Prediction funct

In [None]:
def predict_category(item_name):
    item_clean = clean_item(item_name)
    X_vec = vectorizer.transform([item_clean]).toarray()
    pred_prob = model.predict(X_vec)
    pred_class = pred_prob.argmax(axis=1)[0]
    category_name = id_to_category[pred_class]
    return category_name

## Example user uses it... try it for yourself! 

In [None]:
while True:
    user_input = input("Enter a grocery item (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    category = predict_category(user_input)
    print(f"Predicted category: {category}")

## There still needs to be some improvements... the predictions are a little off, for example try "fish" (my favourite fruit and veg!). 