#Flipkart Grid 6.0

In [None]:
!pip install easyocr
!apt-get update && apt-get install -y libzbar0
!pip install pyzbar

###Mounting Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#@title Imports and function definitions

# For running inference on the TF-Hub module.
import tensorflow as tf

import tensorflow_hub as hub

# For downloading the image.
import matplotlib.pyplot as plt
import tempfile
from six.moves.urllib.request import urlopen
from six import BytesIO

# For drawing onto the image.
import numpy as np
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

# For measuring the inference time.
import time

import easyocr
import cv2
import re
from datetime import datetime
from dateutil.relativedelta import relativedelta

import re
from datetime import datetime
from pyzbar.pyzbar import decode

# Print Tensorflow version
print(tf.__version__)

# Check available GPU devices.
print("The following GPU devices are available: %s" % tf.test.gpu_device_name())

###Function for Displaying Image

In [None]:
def display_image(image):
  fig = plt.figure(figsize=(20, 15))
  plt.grid(False)
  plt.imshow(image)

###Function for downloading image from url

In [None]:
def download_and_resize_image(url, new_width=256, new_height=256, display=False):
  _, filename = tempfile.mkstemp(suffix=".jpg")
  response = urlopen(url)
  image_data = response.read()
  image_data = BytesIO(image_data)
  pil_image = Image.open(image_data)
  pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.LANCZOS)
  pil_image_rgb = pil_image.convert("RGB")
  pil_image_rgb.save(filename, format="JPEG", quality=90)
  print("Image downloaded to %s." % filename)
  if display:
    display_image(pil_image)
  return filename

###Function for draw bounding boxes

In [None]:
def draw_bounding_box_on_image(image,ymin,xmin,ymax,xmax,color,font,thickness=4,display_str_list=()):

  """Adds a bounding box to an image."""
  draw = ImageDraw.Draw(image)
  im_width, im_height = image.size
  (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height)
  draw.line([(left, top), (left, bottom), (right, bottom), (right, top),(left, top)], width=thickness, fill=color)

  # If the total height of the display strings added to the top of the bounding
  # box exceeds the top of the image, stack the strings below the bounding box
  # instead of above.
  display_str_heights = [font.getbbox(ds)[3] for ds in display_str_list]

  # Each display_str has a top and bottom margin of 0.05x.
  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

  if top > total_display_str_height:
    text_bottom = top
  else:
    text_bottom = top + total_display_str_height

  # Reverse list and print from bottom to top.
  for display_str in display_str_list[::-1]:
    bbox = font.getbbox(display_str)
    text_width, text_height = bbox[2], bbox[3]
    margin = np.ceil(0.05 * text_height)
    draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                    (left + text_width, text_bottom)],
                   fill=color)
    draw.text((left + margin, text_bottom - text_height - margin),
              display_str,
              fill="black",
              font=font)
    text_bottom -= text_height - 2 * margin

###Function for returning coordinates

In [None]:
def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1):

  """Overlay labeled boxes on an image with formatted scores and label names."""
  colors = list(ImageColor.colormap.values())

  try:
    font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf", 25)
  except IOError:
    print("Font not found, using default font.")
    font = ImageFont.load_default()

  coordinates = []
  for i in range(min(boxes.shape[0], max_boxes)):
    if scores[i] >= min_score:
      ymin, xmin, ymax, xmax = tuple(boxes[i])
      coordinates.append((xmin, ymin, xmax, ymax))
      display_str = "{}: {}%".format(class_names[i].decode("ascii"), int(100 * scores[i]))
      color = colors[hash(class_names[i]) % len(colors)]
      image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
      draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, font, display_str_list=[display_str])
      np.copyto(image, np.array(image_pil))

  return image, coordinates

In [None]:
module_handle = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1" #@param ["https://tfhub.dev/google/openimages_v4/ssd/mobilenet_v2/1", "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1"]

detector = hub.load(module_handle).signatures['default']

###Function for loading Image as tf.tensor

In [None]:
def load_img(path):
  img = tf.io.read_file(path)
  img = tf.image.decode_jpeg(img, channels=3)
  return img

###Cropping Image

In [None]:
# Function to crop image using center coordinates and dimensions
def crop_image(image_path, xmin, ymin, xmax, ymax):
    # Load the image
    image = cv2.imread(image_path)
    cropped_image = image[ymin:ymax, xmin:xmax]
    display_image(cropped_image)

    return cropped_image

###Parsing date into standard format

In [None]:
import re
from datetime import datetime

# Dictionary to map month names/abbreviations to numbers
months = {
    "jan": "01", "feb": "02", "mar": "03", "apr": "04", "may": "05", "jun": "06",
    "jul": "07", "aug": "08", "sep": "09", "oct": "10", "nov": "11", "dec": "12",
    "january": "01", "february": "02", "march": "03", "april": "04", "may": "05",
    "june": "06", "july": "07", "august": "08", "september": "09", "october": "10",
    "november": "11", "december": "12"
}

def parse_date(date_str):
    # Patterns for different date formats
    patterns = [
        (r"^\d{1,2}/\d{1,2}/\d{4}$", "%d/%m/%Y"),     # dd/mm/yyyy
        (r"^\d{1,2}/\d{4}$", "%m/%Y"),                # mm/yyyy
        (r"^\d{1,2}/\d{2}$", "%m/%y"),                # mm/yy
        (r"^\d{1,2}-\d{1,2}-\d{4}$", "%d-%m-%Y"),     # dd-mm-yyyy
        (r"^\d{1,2}-\d{4}$", "%m-%Y"),                # mm/yyyy
        (r"^\d{1,2}-\d{2}$", "%m-%y"),                # mm/yy
        (r"^\d{2}/\d{2}/\d{2}$", "%d/%m/%y"),         # dd/mm/yy
        (r"^\d{2}\d{2}\d{2}$", "%d%m%y"),             # ddmmyy
        (r"^\d{2}\d{2}\d{4}$", "%d%m%Y"),             # ddmmyyyy
        (r"^(\d{1,2})\s+([A-Za-z]+)\s+(\d{4})$", None),  # dd Month yyyy (full or abbreviated)
        (r"^(?P<month>[a-zA-Z]{3,9})[\. ]?'?(?P<year>\d{2,4})$", None)  # month/yyyy or month yy with . or space
    ]

    for pattern, fmt in patterns:
        match = re.match(pattern, date_str, re.IGNORECASE)
        if match:
            if fmt:  # For direct datetime formatting
                parsed_date = datetime.strptime(date_str, fmt)
                return parsed_date.strftime("%d/%m/%Y")
            else:  # Handle custom formats like dd Month yyyy or month/yy
                if "month" in match.groupdict():
                    month = match.group("month").lower()  # Get month in lower case for mapping
                    year = match.group("year")
                    if len(year) == 2:
                        year = f"20{year}"  # Convert 'yy' to '20yy'
                    month_num = months.get(month[:3])  # Get first 3 letters for matching
                    if month_num:
                        return f"15/{month_num}/{year}"  # Default day to 15 if not specified
                else:
                    day = match.group(1)
                    month = match.group(2).lower()  # Get month in lower case for mapping
                    year = match.group(3)
                    if len(year) == 2:
                        year = f"20{year}"  # Convert 'yy' to '20yy'
                    month_num = months.get(month)  # Get month number from dictionary
                    if month_num:
                        return f"{day}/{month_num}/{year}"

    raise ValueError(f"Invalid date format: {date_str}")


In [None]:
def parse_date1(date_str):
    # Parse the date string to a datetime object
    try:
        return datetime.strptime(date_str, '%d/%m/%Y')  # Adjust format if necessary
    except ValueError:
        return datetime.strptime(date_str, '%m/%d/%Y')  # Try another format if needed

###Pre-Process Image

In [None]:
def preprocess_image(image):

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply noise reduction
    noise_reduced = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)

    return noise_reduced

###Extracting text using OCR

In [None]:
def extract_text_easyocr(image_path):
    reader = easyocr.Reader(['en'])
    result = reader.readtext(image_path,detail=0)
    return "[]".join(result)

###Extracting dates and mrp using regex patterns

In [None]:
def extract_dates_and_mrp(text):

    date_pattern = r'\b(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})\b'
    mrp_pattern = r'\b(?:MRP|mrp|M\.R\.P|MAX\.? RETAIL PRICE|price|Rs|₹)\s*[:\-]?\s*([₹$£]?[\d,]+(?:\.\d{1,2})?)\b'

    duration_pattern = r'\b(?:best before|valid until)\s*(\d+)\s*(days?|months?|years?)\b'

    lines = text.split('[]')
    pp = []

    for i in lines:
        try:
            pp.append(parse_date(i))
        except:
            pp.append(i)
    ppp = " ".join(pp)
    # Find all date and MRP matches in the text
    dates = re.findall(date_pattern, ppp, re.IGNORECASE)
    mrp_matches = re.findall(mrp_pattern, ppp, re.IGNORECASE | re.DOTALL)
    # Initialize variables for identified dates
    expiry_date = None
    manufactured_date = None

    # Identify context for dates
    lines = ppp.split('\n')
    for line in lines:
        if any(keyword in line.lower() for keyword in ['valid until', 'best before','expires in']):
            # Check for duration related to expiry
            duration_match = re.search(duration_pattern, line, re.IGNORECASE)
            if duration_match:
                num_days = int(duration_match.group(1))
                time_unit = duration_match.group(2).lower()
                # Calculate the expiry date based on the duration
                base = parse_date1(dates[0])
                manufactured_date = base.strftime('%d/%m/%Y')
                if 'day' in time_unit:
                    expiry_date = (base + relativedelta(days=num_days)).strftime('%d/%m/%Y')
                elif 'month' in time_unit:
                    expiry_date = (base + relativedelta(months=num_days)).strftime('%d/%m/%Y')
                elif 'year' in time_unit:
                    expiry_date = (base + relativedelta(years=num_days)).strftime('%d/%m/%Y')
            else:
                expiry_date_matches = re.findall(date_pattern, line, re.IGNORECASE)
                if expiry_date_matches:
                    expiry_date = parse_date(expiry_date_matches[0])

        if any(keyword in line.lower() for keyword in ['exp', 'Exp Date', 'use by']):
            expiry_date_matches = re.findall(date_pattern, line, re.IGNORECASE)
            if expiry_date_matches:
                expiry_date = expiry_date_matches[0]
        if any(keyword in line.lower() for keyword in ['mfd', 'Mfg Date', 'mfg.date']):
            manufactured_date_matches = re.findall(date_pattern, line, re.IGNORECASE)
            if manufactured_date_matches:
                manufactured_date = manufactured_date_matches[0]
    if len(dates)==1:
        if not expiry_date:
            expiry_date = dates[0]
    if len(dates)==2:
        date1 = parse_date1(dates[0])
        date2 = parse_date1(dates[1])
        if date1 > date2:
            expiry_date = date1.strftime('%d/%m/%Y')
            manufactured_date = date2.strftime('%d/%m/%Y')
        elif date1 < date2:
            expiry_date = date2.strftime('%d/%m/%Y')
            manufactured_date = date1.strftime('%d/%m/%Y')
    return {
        "expiry_date": expiry_date,
        "manufactured_date": manufactured_date,
        "mrp": mrp_matches[0] if mrp_matches else None}

###Function for extracting information from QR code or bar code

In [None]:
#Reading QR Code
from pyzbar.pyzbar import decode

def extract_qr_barcode(image):
    """Extract QR and barcode data from the image."""
    preprocessed_image = preprocess_image(image)
    decoded_objects = decode(preprocessed_image)
    qr_barcode_data = []

    for obj in decoded_objects:
        data = obj.data.decode('utf-8')
        qr_barcode_data.append({
            'type': obj.type,
            'data': data,
            'bounding_box': obj.rect
        })

    return qr_barcode_data



In [None]:
def process_image(image_path, use_easyocr=False):

    # Preprocess the image
    preprocessed_image = preprocess_image(image_path)
    # Extract text using the selected OCR tool
    text = extract_text_easyocr(preprocessed_image)
    # Post-process the extracted text to find relevant information
    result = extract_dates_and_mrp(text)

    return result

###Running Model for object detection

In [None]:
def run_detector(detector, path):
  img = load_img(path)

  converted_img  = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]

  start_time = time.time()
  result = detector(converted_img)
  end_time = time.time()

  result = {key:value.numpy() for key,value in result.items()}

  print("Inference time: ", end_time-start_time)
  image_with_boxes, coords = draw_boxes(
      img.numpy(), result["detection_boxes"],
      result["detection_class_entities"], result["detection_scores"])

  img_pil = Image.fromarray(np.uint8(img.numpy())).convert("RGB")
  img_width, img_height = img_pil.size

  # display_image(image_with_boxes)
  reader = easyocr.Reader(['en'])
  for i in range(len(coords)):
    # Calculate pixel coordinates
    xmin = int(coords[i][0] * img_width)
    ymin = int(coords[i][1] * img_height)
    xmax = int(coords[i][2] * img_width)
    ymax = int(coords[i][3] * img_height)

    # Crop the image using the pixel coordinates
    cropped_image = crop_image(path, xmin, ymin, xmax, ymax)
    result = process_image(cropped_image)
    print(f"Detected text in cropped area {i}: {result}")

In [None]:
def detect_img(image_url):
  # image_path = download_and_resize_image(image_url, 640, 480)
  run_detector(detector, image_url)

###Image Input

In [None]:
image_path = f'/content/0.jpg' #Enter image path here
image = cv2.imread(image_path)
detect_img(image_path)
qr_result = extract_qr_barcode(image)
if qr_result:
  print("QR/Bar Code Data : ",qr_result[0]['data'])