# Importing Modules

In [None]:
import os
import io
import json
from PIL import Image, ImageDraw
import cv2
import re
import sys
import pandas as pd
import numpy as np
import copy
import csv
import pytesseract
from pytesseract import Output
import pyttsx3
import requests

# Pytesseract exe + custom cofiguration

In [None]:
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
# Adding custom options
custom_config = r'--oem 3 --psm 6'

# Preprocessing functions

In [None]:
# get grayscale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)
 
#thresholding
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated


# Model

In [None]:
def is_all_ascii(s):
    return all(ord(c) < 128 for c in s)

def is_ascii(s):
    result = ""
    for i in s:
        if ord(i) < 128:
            result = result + i
    return result

In [None]:
def model_extraction(pic, data):
    model_id = '9150c16c-2874-4779-aa7d-8c6caaa70d70'
    api_key = 'MhqqrwSw5FmZe10HlwZMVIQ55_lL9HPs'
    url = 'https://app.nanonets.com/api/v2/ObjectDetection/Model/' + model_id + '/LabelFile/'
    
    image_path = pic #r"drive-download-20210529T164846Z-001\for_testing (25).jpg"
    data2 = {'file': open(image_path, 'rb'),    'modelId': ('', model_id)}
    response = requests.post(url, auth=requests.auth.HTTPBasicAuth(api_key, ''), files=data2)
    
    resp = response.json()["result"][0]["prediction"]
    
    name = []
    mrp = []
    mfg = []
    exp = []
    
    for i in resp:
        x = i["ocr_text"].replace("\n", " ")
        if i["label"]=="name":
            name.append(x)
        elif i["label"]=="mrp":
            mrp.append(x)
        elif i["label"]=="manufacturing_date":
            mfg.append(x)
        elif i["label"]=="expiry_date":
            exp.append(x)
    
                        #########################################  NAME Extraction
    final = ""
    for i in name:
        if is_all_ascii(i):
            final = final.strip() + " " + i
        else:
            result = is_ascii(i)
            if result!="":
                final = final.strip() + " " + result.strip()
    
    l = final.split()
    final = ""
    for i in l:
        if i not in final:
            final = final.strip() + " " + i

    if final!="":
        data["Name"] = final.strip()
    
    
                        #########################################  M R P Extraction
    mrp_pattern = "(\d{3}|\d{2})(\.|-|\/|\s)*(\d{2})"

    mrp_tuples = []
    for i in mrp:
        tuples = re.findall(mrp_pattern, i.lower())
        if tuples!=[]:
            mrp_tuples.extend(tuples)
            #print(mrp_tuples)

    if len(mrp_tuples)>0:
        price = float(mrp_tuples[0][0] + "." + mrp_tuples[0][2])
        data["M R P"] = price
    
    
    
                        #########################################  M R P Extraction
        
    date_pattern = "(((jan(uary)?|feb(ruary)?|mar(ch)?|apr(il)?|may|jun(e)?|jul(y)?|aug(ust)?|sep(tember)?|oct(ober)?|nov(ember)?|dec(ember)?)|(\d{2}|\d{1}))(\.|-|\/|\s)*(\d{4}|\d{2}))"  
    
    month_to_number = {
                    "jan" : 1,
                    "feb" : 2,
                    "mar" : 3,
                    "apr" : 4,
                    "may" : 5,
                    "jun" : 6,
                    "jul" : 7,
                    "aug" : 8,
                    "sep" : 9,
                    "oct" : 10,
                    "nov" : 11,
                    "dec" : 12,
                    "january" : 1,
                    "february" : 2,
                    "march" : 3,
                    "april" : 4,
                    "may" : 5,
                    "june" : 6,
                    "july" : 7,
                    "august" : 8,
                    "september" : 9,
                    "october" : 10,
                    "november" : 11,
                    "december" : 12
                    }
    
    month_in_string = {
                    1: "january",
                    2: "february",
                    3: "march",
                    4: "april",
                    5: "may",
                    6: "june",
                    7: "july",
                    8: "august",
                    9: "september",
                    10: "october",
                    11: "november",
                    12: "december"
                    }
    
    
                                                                                    ### MFG
        
    
    mdate_tuples = []
    for i in mfg:
        mdate_tuples = re.findall(date_pattern, i.lower())
    
    mdates = []
    for dtuple in mdate_tuples:
        d = re.findall("(\w+)(\.|-|\s|\/)+(\d+)", dtuple[0])
        #print(d)
        if len(d)==0:
            continue
        new_month = month_to_number.get(d[0][0], -1)
        if not new_month==-1:
            #print("word")
            d = [new_month, d[0][2]]
        else:  
            d = [d[0][0], d[0][2]]
        #print("d: ", d)    
        flag = 0
        #print(tuple[0])           # month
        #print(tuple[1])           # year
        month = int(d[0])
        year = d[1]
        #print("date: ", month, year)
        if month>0 and month<13:   # correct month
            #print("length:", len(year))
            if len(year)==2 or len(year)==4:
                flag = 1
        if flag==1:
            mdates.append([month, int(year)])


    
                                                                                    ### EXP
    
    edate_tuples = []
    for i in exp:
        edate_tuples = re.findall(date_pattern, i.lower())

    edates = []
    for dtuple in edate_tuples:
        d = re.findall("(\w+)(\.|\s|\/)+(\d+)", dtuple[0])
        #print(d)
        new_month = month_to_number.get(d[0][0], -1)
        if not new_month==-1:
            #print("word")
            d = [new_month, d[0][2]]
        else:  
            d = [d[0][0], d[0][2]]
        #print("d: ", d)    
        flag = 0
        #print(tuple[0])           # month
        #print(tuple[1])           # year
        month = int(d[0])
        year = d[1]
        #print("date: ", month, year)
        if month>0 and month<13:   # correct month
            #print("length:", len(year))
            if len(year)==2 or len(year)==4:
                flag = 1
        if flag==1:
            edates.append([month, int(year)])
    
                                                                                    ### BOTH
    
    if len(mdates)>1 and len(edates)==0:
        og = []
        more = []
        if mdates[0][1]==mdates[1][1]:
            if mdates[0][0]<mdates[1][0]:
                og.extend(mdates[0])
                more.extend(mdates[1])
            else:
                og.extend(mdates[1])
                more.extend(mdates[0])
        elif mdates[0][1]<mdates[1][1]:
            og.extend(mdates[0])
            more.extend(mdates[1])
        else:
            og.extend(mdates[1])
            more.extend(mdates[0])
        data["Manufacturing Date"] = month_in_string.get(og[0])+" "+str(og[1])
        data["Expiry Date"] = month_in_string.get(more[0])+" "+str(more[1])
    elif len(edates)>1 and len(mdates)==0:
        og = []
        more = []
        if edates[0][1]==edates[1][1]:
            if edates[0][0]<edates[1][0]:
                og.extend(edates[0])
                more.extend(edates[1])
            else:
                og.extend(edates[1])
                more.extend(edates[0])
        elif edates[0][1]<edates[1][1]:
            og.extend(edates[0])
            more.extend(edates[1])
        else:
            og.extend(edates[1])
            more.extend(edates[0])
        data["Manufacturing Date"] = month_in_string.get(og[0])+" "+str(og[1])
        data["Expiry Date"] = month_in_string.get(more[0])+" "+str(more[1])
    else:
        if len(mdates)>0:
            data["Manufacturing Date"] = month_in_string.get(mdates[0][0])+" "+str(mdates[0][1])
        if len(edates)>0:
            data["Expiry Date"] = month_in_string.get(edates[0][0])+" "+str(edates[0][1])
    
    return data


# Name extraction logic

In [None]:
def extract_name(pic, data):
    #pic = "med (1).jpg"   #image name
    image = cv2.imread(pic)

    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    #threshold_img = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]    # method 1: thresholding
    threshold_img = opening(gray_image)

    details = pytesseract.image_to_data(threshold_img, output_type=Output.DICT, config=custom_config, lang="eng")

    max_height = [0, -1]
    total_boxes = len(details['text'])
    for sequence_number in range(total_boxes):
        if int(details['conf'][sequence_number]) >30:
            (x, y, w, h) = (details['left'][sequence_number], details['top'][sequence_number], details['width'][sequence_number],  details['height'][sequence_number])
            if max_height[0]<h and len(details['text'][sequence_number])>5:
                max_height = [h, sequence_number]
            threshold_img = cv2.rectangle(threshold_img, (x, y), (x + w, y + h), (0, 255, 0), 2)

    parse_text = []
    count = 0
    index = -1
    word_list = []
    last_word = ''
    for seq, word in enumerate(details['text']):
        if max_height[1]==seq:
            #print(word, count)
            index = count
        if word!='':
            word_list.append(word)
            last_word = word
        if (last_word!='' and word == '') or (word==details['text'][-1]):
            count += 1
            parse_text.append(word_list)
            word_list = []

    
    data.update({"Name": details['text'][max_height[1]]})    # setting name
    return data


# Purple extraction logic

In [None]:
def purple(pic, data):
    #pic = "med (1).jpg"   #image name
    image = cv2.imread(pic)
    
    frame = image
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    lower_hsv = np.array([90,59,0], dtype = "uint16")
    upper_hsv = np.array([155,200,255], dtype = "uint16")
    hsv_mask = cv2.inRange(hsv, lower_hsv, upper_hsv)       # for purple color

    hsv = cv2.rotate(hsv, cv2.ROTATE_90_CLOCKWISE)
    gray_image = cv2.cvtColor(hsv, cv2.COLOR_BGR2GRAY)
    #threshold_img = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]    # method 1: thresholding
    threshold_img = opening(gray_image)

    details = pytesseract.image_to_data(threshold_img, output_type=Output.DICT, config=custom_config, lang="eng")

    #max_height = [0, -1]
    total_boxes = len(details['text'])
    for sequence_number in range(total_boxes):
        if int(details['conf'][sequence_number]) >30:
            (x, y, w, h) = (details['left'][sequence_number], details['top'][sequence_number], details['width'][sequence_number],  details['height'][sequence_number])
            #if max_height[0]<h and len(details['text'][sequence_number])>5:
            #    max_height = [h, sequence_number]
            threshold_img = cv2.rectangle(threshold_img, (x, y), (x + w, y + h), (0, 255, 0), 2)

    parse_text = []
    count = 0
    index = -1
    word_list = []
    last_word = ''
    for seq, word in enumerate(details['text']):
        #if max_height[1]==seq:
        #    #print(word, count)
        #    index = count
        if word!='':
            word_list.append(word)
            last_word = word
        if (last_word!='' and word == '') or (word==details['text'][-1]):
            count += 1
            parse_text.append(word_list)
            word_list = []

    sentences = []
    full = ""
    for i in parse_text:
        s = ""
        for j in i:
            s = s.strip() + " " + j.strip()
        sentences.append(s)
        full = full.strip() + " " + s.strip()

    
    #date_pattern = "(\d{2}).+(\d{4}|\d{2})"
    date_pattern = "(((jan(uary)?|feb(ruary)?|mar(ch)?|apr(il)?|may|jun(e)?|jul(y)?|aug(ust)?|sep(tember)?|oct(ober)?|nov(ember)?|dec(ember)?)|(\d{2}))(\.|\/|\s)+(\d{4}|\d{2}))"
    mrp_pattern = "(rs.*(\d{3}|\d{2}).(\d{2}))"
    #date_pattern = "(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Hov(ember)?|Dec(ember)?)\s+\d{1,2}(|,)\s+\d{4}|[0-9]{1,2}(\/|-){1}([0-9]{2}|[a-zA-Z]+)(\/|-){1}([0-9]{4}|[0-9]{2})|\d{4}\s(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Hov(ember)?|Dec(ember)?)\s+\d{1,2}|\d{1,2}(th|st|rd)\sday\sof\s(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Hov(ember)?|Dec(ember)?)(|,)\s+\d{4}|(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Hov(ember)?|Dec(ember)?)[-]\d{2}[-]\d{2}"

    date_tuples = re.findall(date_pattern, full.lower())
    mrp_tuples = re.findall(mrp_pattern, full.lower())

    #print(date_tuples)
    #print(mrp_tuples)

    if len(mrp_tuples)>0:
        p = re.findall("(\d+).(\d+)", mrp_tuples[0][0])
        #print(p)
        price = float(p[0][0] + "." + p[0][1])
        if data["M R P"] == None:
            data.update({"M R P": price})    # setting price

    
    month_to_number = {
                "jan" : 1,
                "feb" : 2,
                "mar" : 3,
                "apr" : 4,
                "may" : 5,
                "jun" : 6,
                "jul" : 7,
                "aug" : 8,
                "sep" : 9,
                "oct" : 10,
                "nov" : 11,
                "dec" : 12,
                "january" : 1,
                "february" : 2,
                "march" : 3,
                "april" : 4,
                "may" : 5,
                "june" : 6,
                "july" : 7,
                "august" : 8,
                "september" : 9,
                "october" : 10,
                "november" : 11,
                "december" : 12
                }

    dates = []
    for dtuple in date_tuples:
        d = re.findall("(\w+)(\.|\s|\/)+(\d+)", dtuple[0])
        #print(d)
        new_month = month_to_number.get(d[0][0], -1)
        if not new_month==-1:
            #print("word")
            d = [new_month, d[0][2]]
        else:  
            d = [d[0][0], d[0][2]]
        #print("d: ", d)    
        flag = 0
        #print(tuple[0])           # month
        #print(tuple[1])           # year
        month = int(d[0])
        year = d[1]
        #print("date: ", month, year)
        if month>0 and month<12:   # correct month
            #print("length:", len(year))
            if (len(year)==2 or len(year)==4) and int(year)!=0:
                flag = 1
        if flag==1:
            dates.append([month, int(year)])


    month_in_string = {
                1: "january",
                2: "february",
                3: "march",
                4: "april",
                5: "may",
                6: "june",
                7: "july",
                8: "august",
                9: "september",
                10: "october",
                11: "november",
                12: "december"
                }

    dates_in_string = []

    for date in dates:
        dates_in_string.append(month_in_string.get(date[0])+" "+str(date[1]))

    #print(dates_in_string)
    if len(dates_in_string)>1:
        if data["Manufacturing Date"] == None:
            data.update({"Manufacturing Date": dates_in_string[0]})    # setting mfg date
        if data["Expiry Date"] == None:
            data.update({"Expiry Date": dates_in_string[1]})    # setting exp date
    elif len(dates_in_string)>0:
        if data["Manufacturing Date"] == None:
            data.update({"Manufacturing Date": dates_in_string[0]})    # setting mfg date
    
    

    return data


# Closing extraction logic

In [None]:
def closing(image):
    kernel = np.ones((3,3), np.uint8)
    return cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

def extract_closing(pic, data):
    
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 
    # defining the range of masking 
    blue1 = np.array([110, 50, 50]) 
    blue2 = np.array([130, 255, 255]) 

        # initializing the mask to be 
        # convoluted over input image 
    mask = cv2.inRange(hsv, blue1, blue2) 
    
    #pic = "med (1).jpg"   #image name
    image = cv2.imread(pic)
    
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    #threshold_img = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]    # method 1: thresholding
    threshold_img = closing(gray_image) 
    hsv = cv2.rotate(threshold_img, cv2.ROTATE_90_CLOCKWISE) 
    details = pytesseract.image_to_data(hsv, output_type=Output.DICT, config=custom_config, lang="eng")
    
    total_boxes = len(details['text'])
    for sequence_number in range(total_boxes):
        if int(details['conf'][sequence_number]) >30:
            (x, y, w, h) = (details['left'][sequence_number], details['top'][sequence_number], details['width'][sequence_number],  details['height'][sequence_number])
            #if max_height[0]<h and len(details['text'][sequence_number])>5:
            #    max_height = [h, sequence_number]
            threshold_img = cv2.rectangle(hsv, (x, y), (x + w, y + h), (255, 255, 255), 2)
    
    parse_text = []
    count = 0
    index = -1
    word_list = []
    last_word = ''
    for seq, word in enumerate(details['text']):
        #if max_height[1]==seq:
        #    print(word, count)
        #    index = count
        if word!='':
            word_list.append(word)
            last_word = word
        if (last_word!='' and word == '') or (word==details['text'][-1]):
            count += 1
            parse_text.append(word_list)
            word_list = []

    sentences = []
    full = ""
    for i in parse_text:
        s = ""
        for j in i:
            s = s.strip() + " " + j.strip()
        sentences.append(s)
        full = full.strip() + " " + s.strip()

    #date_pattern = "(\d{2}).+(\d{4}|\d{2})"
    date_pattern = "(((jan(uary)?|feb(ruary)?|mar(ch)?|apr(il)?|may|jun(e)?|jul(y)?|aug(ust)?|sep(tember)?|oct(ober)?|nov(ember)?|dec(ember)?)|(\d{2}))(\.|\/|\s)+(\d{4}|\d{2}))"
    mrp_pattern = "(rs.*(\d{3}|\d{2}).(\d{2}))"
    #date_pattern = "(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Hov(ember)?|Dec(ember)?)\s+\d{1,2}(|,)\s+\d{4}|[0-9]{1,2}(\/|-){1}([0-9]{2}|[a-zA-Z]+)(\/|-){1}([0-9]{4}|[0-9]{2})|\d{4}\s(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Hov(ember)?|Dec(ember)?)\s+\d{1,2}|\d{1,2}(th|st|rd)\sday\sof\s(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Hov(ember)?|Dec(ember)?)(|,)\s+\d{4}|(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Hov(ember)?|Dec(ember)?)[-]\d{2}[-]\d{2}"

    date_tuples = re.findall(date_pattern, full.lower())
    mrp_tuples = re.findall(mrp_pattern, full.lower())

    #print(date_tuples)
    #print(mrp_tuples)

    if len(mrp_tuples)>0:
        p = re.findall("(\d+).(\d+)", mrp_tuples[0][0])
        #print(p)
        price = float(p[0][0] + "." + p[0][1])
        if data["M R P"] == None:
            data.update({"M R P": price})    # setting price

    
    month_to_number = {
                "jan" : 1,
                "feb" : 2,
                "mar" : 3,
                "apr" : 4,
                "may" : 5,
                "jun" : 6,
                "jul" : 7,
                "aug" : 8,
                "sep" : 9,
                "oct" : 10,
                "nov" : 11,
                "dec" : 12,
                "january" : 1,
                "february" : 2,
                "march" : 3,
                "april" : 4,
                "may" : 5,
                "june" : 6,
                "july" : 7,
                "august" : 8,
                "september" : 9,
                "october" : 10,
                "november" : 11,
                "december" : 12
                }

    dates = []
    for dtuple in date_tuples:
        d = re.findall("(\w+)(\.|\s|\/)+(\d+)", dtuple[0])
        #print(d)
        new_month = month_to_number.get(d[0][0], -1)
        if not new_month==-1:
            #print("word")
            d = [new_month, d[0][2]]
        else:  
            d = [d[0][0], d[0][2]]
        #print("d: ", d)    
        flag = 0
        #print(tuple[0])           # month
        #print(tuple[1])           # year
        month = int(d[0])
        year = d[1]
        #print("date: ", month, year)
        if month>0 and month<12:   # correct month
            #print("length:", len(year))
            if (len(year)==2 or len(year)==4) and int(year)!=0:
                flag = 1
        if flag==1:
            dates.append([month, int(year)])


    month_in_string = {
                1: "january",
                2: "february",
                3: "march",
                4: "april",
                5: "may",
                6: "june",
                7: "july",
                8: "august",
                9: "september",
                10: "october",
                11: "november",
                12: "december"
                }

    dates_in_string = []

    for date in dates:
        dates_in_string.append(month_in_string.get(date[0])+" "+str(date[1]))

    #print(dates_in_string)
    if len(dates_in_string)>1:
        if data["Manufacturing Date"] == None:
            data.update({"Manufacturing Date": dates_in_string[0]})    # setting mfg date
        if data["Expiry Date"] == None:
            data.update({"Expiry Date": dates_in_string[1]})    # setting exp date
    elif len(dates_in_string)>0:
        if int(dates_in_string[:-4])>2021: 
            if data["Expiry Date"] == None:
                data.update({"Expiry Date": dates_in_string[1]})    # setting exp date
        if data["Manufacturing Date"] == None:
            data.update({"Manufacturing Date": dates_in_string[0]})    # setting mfg date
    
            
        
    return data

In [None]:
def  final_audio(data):
    engine = pyttsx3.init("sapi5")
    rate = engine.getProperty('rate')
    #print(rate)
    engine.setProperty('rate', 160)
    for key in data:
        if data[key]==None:
            sent=str(key)+str(" has not been identified")
            engine.say(sent)
        elif(key=="M R P"):
            sent=str(key)+str(" is Rupees ")+str(data[key])
            engine.say(sent)
        else:
            sent=str(key)+str(" is ")+str(data[key])
            engine.say(sent)
    engine.runAndWait()
    engine.stop()

In [None]:
if __name__ == '__main__':
    
    image = "for_testing (8).jpg"   #image name
    #input_dir = r"C:\Users\angel\btech capstone" # address where images are
    
    data = {"Name": None, "M R P": None, "Manufacturing Date": None, "Expiry Date": None}
    
    #data = model_extraction(image, data)
    
    print(data)
    
    if data["Name"]==None:
        data = extract_name(image, data)         # to extract name
    
    if data["M R P"]==None or data["Manufacturing Date"]==None or data["Expiry Date"]==None:
        data = purple(image, data)               # to extract mrp and dates using purple logic
        #data = extract_closing(pic, data)      # to extract mrp and dates using closing logic
    
    print(data)
    final_audio(data)                      # audio
    print("done")
    
    

In [None]:
data