In [1]:
from config import *
import cv2
import pytesseract
import re
from nltk.corpus import stopwords
import requests
import pandas as pd


"""
Steps:
1. user uploads photo of ingredients ✓        
2. convert photo to text ✓
3. convert text to list ✓
4. call Cosmily API for EWG analysis ✓
5. return ingredients report to user

Error Handling:
- user input is a jpg, jpeg, or png file.
- Blurry Image
- Ingredients are not found in Cosmily database
- Image is not of ingredients or does not have any text in it

If extra time:
- scrape a website for a product's ingredients
- add dictionary instead of hardcoded phrasing
"""

"\nSteps:\n1. user uploads photo of ingredients ✓        \n2. convert photo to text ✓\n3. convert text to list ✓\n4. call Cosmily API for EWG analysis ✓\n5. return ingredients report to user\n\nError Handling:\n- user input is a jpg, jpeg, or png file.\n- Blurry Image\n- Ingredients are not found in Cosmily database\n- Image is not of ingredients or does not have any text in it\n\nIf extra time:\n- scrape a website for a product's ingredients\n- add dictionary instead of hardcoded phrasing\n"

In [7]:
image_path = './images/sensodyne.png'

In [8]:
def convert_image_to_text(path):
    # open image and convert to rgb
    img = cv2.imread(path)
    b,g,r = cv2.split(img)
    img_rgb = cv2.merge([r,g,b])

    # convert image to string
    return re.findall(r'[^.,:/\n]+', pytesseract.image_to_string(img_rgb))      # custom_config = r'--oem 3 --psm 6' --> add config param if words are too blurry

text = convert_image_to_text(image_path)

In [9]:
def preprocess(text):
    stop_words = set(stopwords.words('english'))
    ingredients_list = []

    # remove stop words, the word "ingredients", and leading spaces
    for item in text:
        item = item.lstrip()
        if "ingredients" not in item.lower() and item not in stop_words:
            ingredients_list.append(item)

    ingredients = ', '.join(ingredients_list)
    return ingredients

ingredients = preprocess(text)

In [None]:
def analyze(ingredients):
    # get analysis of ingredient list from cosmily database api
    api_url = 'https://api.cosmily.com/api/v1/analyze/ingredient_list'
    headers =  {"Content-Type":"application/json", "Authorization": AUTH_TOKEN}
    params = {"ingredients": ingredients}
    response = requests.post(api_url, params=params, headers=headers)
    return response.json()['analysis']

analysis = analyze(ingredients)

In [None]:
def report(analysis):
    description = f'This product contains {analysis["total_ingredients"]} ingredients.'
    print(description)

    # # TODO: EWG stats
    # self.analysis["ewg"]

    # print("DETRIMENTS")
    # # TODO: loop through and list negatives and harmful
    # self.analysis["negatives"]
    # self.analysis["harmful"]

    # print("BENEFITS")
    # # TODO: loop through and list positives and notables
    # self.analysis["positives"]
    # self.analysis["notable"]

    # # TODO: loop though each ingredient in ingredients_table and provide breakdown
    # print("\nWould you like to view the full ingredients data breakdown? (Y/N)")
    # show_breakdown = input

    # if show_breakdown.lower() == 'y':
    #     print(pd.DataFrame(self.analysis["ingredients_table"]))
    
    # # TODO: End report

report(analysis)