In [5]:
%%writefile app.py
from flask import Flask, Response, render_template, make_response, jsonify, request as req
from flask_cors import CORS
from boto3.dynamodb.conditions import Key, Attr
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import psycopg2
import boto3
import json
import re
import os

load_dotenv('.env')
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")

dynamodb = boto3.resource('dynamodb', region_name='ap-southeast-2',
  aws_access_key_id=AWS_ACCESS_KEY_ID,
  aws_secret_access_key=AWS_SECRET_ACCESS_KEY
)

table_prices = dynamodb.Table('IngredientPrices')

conn = psycopg2.connect(
    "dbname=eltrial user=top password=1234"
)

app = Flask(__name__)
CORS(app)

includes = re.compile("‡∏´‡∏°‡∏π|‡πÑ‡∏Å‡πà|‡∏ß‡∏±‡∏ß|^‡πÑ‡∏Ç‡πà$|‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà|^‡∏ô‡∏°")
excludes = re.compile("‡∏ô‡∏°‡∏Ç‡πâ‡∏ô‡∏´‡∏ß‡∏≤‡∏ô|‡∏ú‡∏á|‡∏Ñ‡∏ô‡∏≠‡∏£‡πå|‡∏°‡∏≤‡∏°‡πà‡∏≤|‡∏Ñ‡∏≤‡∏£‡πå‡πÄ‡∏ô‡∏ä‡∏±‡πà‡∏ô|‡∏´‡∏°‡∏π‡∏Å‡∏£‡∏≠‡∏ö|‡∏Å‡∏£‡∏∞‡∏î‡∏π‡∏Å|‡∏õ‡∏π|^‡∏ã‡∏≠‡∏™|^‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡πÄ‡∏ó‡∏®|^‡πÄ‡∏ï‡πâ‡∏≤‡∏´‡∏π‡πâ|‡∏≠‡∏¥‡∏ô‡∏ó‡∏ú‡∏≤‡∏•‡∏±‡∏°|‡∏•‡∏π‡∏Å‡∏ä‡∏¥‡πâ‡∏ô|‡∏´‡∏°‡∏π‡πÅ‡∏Æ‡∏°|‡πÑ‡∏Ç‡πà‡πÅ‡∏î‡∏á‡πÄ‡∏Ñ‡πá‡∏°|‡∏ô‡πâ‡∏≥‡∏°‡∏±‡∏ô|‡∏Ñ‡∏≠‡∏£‡πå|‡πÑ‡∏™‡πâ‡∏Å‡∏£‡∏≠‡∏Å|‡∏´‡∏±‡∏ß‡πÉ‡∏à|‡∏Å‡∏£‡∏∞‡πÄ‡∏û‡∏≤‡∏∞|‡∏õ‡∏≠‡∏î|‡∏£‡∏™‡∏î‡∏µ|‡∏Ñ‡∏ô‡∏≠|‡∏ô‡πâ‡∏≥‡∏ã‡∏∏‡∏õ|‡∏´‡∏°‡∏π‡∏¢‡∏≠|‡∏ô‡πâ‡∏≥|‡∏ã‡∏µ‡∏≠‡∏¥‡πä‡∏ß|‡∏ä‡∏∏‡∏î|^‡∏û‡∏£‡∏¥‡∏Å‡πÑ‡∏ó‡∏¢|‡∏ã‡∏µ‡∏≠‡∏¥‡πâ‡∏ß|‡∏™‡∏ï‡πä‡∏≠‡∏Å|‡πÅ‡∏õ‡πâ‡∏á|^‡∏û‡∏£‡∏¥‡∏Å|‡πÄ‡∏•‡∏∑‡∏≠‡∏î|‡∏ä‡∏∏‡∏õ|‡∏£‡∏≤‡∏Å‡∏ú‡∏±‡∏Å‡∏ä‡∏µ|‡∏ö‡∏∞‡∏´‡∏°‡∏µ‡πà‡∏Å‡∏∂‡πà‡∏á‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à‡∏£‡∏π‡∏õ|‡∏Ç‡πâ‡∏ô‡∏´‡∏ß‡∏≤‡∏ô|‡∏Å‡∏≤‡∏Å‡∏´‡∏°‡∏π|‡∏ô‡∏°‡∏Ç‡πâ‡∏ô|‡∏´‡∏°‡∏π‡πÅ‡∏î‡∏á|‡πÅ‡∏Ñ‡∏õ‡∏´‡∏°‡∏π|‡∏°‡∏±‡∏ô‡∏´‡∏°‡∏π|^‡∏ã‡∏≠‡∏™|‡∏ô‡∏°‡∏ñ‡∏±‡πà‡∏ß‡πÄ‡∏´‡∏•‡∏∑‡∏≠‡∏á|\*\*|^‡πÄ‡∏´‡∏•‡πâ‡∏≤‡∏à‡∏µ‡∏ô|‡∏≠‡∏±‡∏•‡∏°‡∏≠‡∏ô‡∏î‡πå|‡∏´‡∏°‡∏π‡∏´‡∏¢‡∏≠‡∏á|‡∏™‡∏π‡∏ï‡∏£|‡∏ã‡∏∏‡∏õ‡∏Å‡πâ‡∏≠‡∏ô|^‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á|‡∏™‡πà‡∏ß‡∏ô‡∏ú‡∏™‡∏°|‡πÅ‡∏Ñ‡∏ö‡∏´‡∏°‡∏π")
excludes_units = re.compile("‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö|‡∏Ñ‡∏£‡∏∂‡πà1/2|‡∏õ‡∏£‡∏¥‡∏°‡∏≤‡∏ô‡∏ï‡∏≤‡∏°‡πÉ‡∏à‡∏ä‡∏≠‡∏ö")

ingredient_kws = re.compile('pork_‡∏Ç‡∏≤‡∏´‡∏°‡∏π|pork_‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á‡∏´‡∏°‡∏π|pork_‡∏ï‡∏±‡∏ö‡∏´‡∏°‡∏π|pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô|\
pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÅ‡∏î‡∏á|pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÑ‡∏´‡∏•‡πà|pork_‡∏™‡∏∞‡πÇ‡∏û‡∏Å‡∏´‡∏°‡∏π|pork_‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å|pork_‡∏™‡∏±‡∏ô‡πÉ‡∏ô|\
pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô|pork_‡πÑ‡∏™‡πâ‡∏ï‡∏±‡∏ô‡∏´‡∏°‡∏π|pork_‡πÑ‡∏™‡πâ‡πÉ‡∏´‡∏ç‡πà‡∏´‡∏°‡∏π|pork_‡πÑ‡∏™‡πâ‡∏≠‡πà‡∏≠‡∏ô‡∏´‡∏°‡∏π|pork_‡∏´‡∏°‡∏π‡∏ö‡∏î|\
beef_‡∏Ç‡∏≠‡∏ö‡∏Å‡∏£‡∏∞‡∏î‡πâ‡∏á|beef_‡∏Ç‡∏µ‡πâ‡∏£‡∏¥‡πâ‡∏ß|beef_‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡πÉ‡∏ô|beef_‡∏î‡∏≠‡∏Å‡∏à‡∏≠‡∏Å|beef_‡∏ô‡πà‡∏≠‡∏á|beef_‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á|\
beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÅ‡∏î‡∏á|beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏õ‡∏•‡∏µ‡∏Å|beef_‡πÄ‡∏®‡∏©‡πÄ‡∏ô‡∏∑‡πâ‡∏≠|beef_‡∏™‡∏±‡∏ô‡∏Ñ‡∏≠|beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏ß‡∏±‡∏ß‡∏ö‡∏î|beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô|\
beef_‡∏™‡∏±‡∏ô‡πÉ‡∏ô|beef_‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å|chicken_‡πÑ‡∏Å‡πà‡∏ö‡πâ‡∏≤‡∏ô|chicken_‡πÑ‡∏Å‡πà‡∏ó‡∏±‡πâ‡∏á‡∏ï‡∏±‡∏ß|chicken_‡πÑ‡∏Å‡πà‡∏ú‡πà‡∏≤‡∏ã‡∏µ‡∏Å|\
chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà|chicken_‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á‡πÑ‡∏Å‡πà|chicken_‡∏õ‡∏µ‡∏Å‡∏ö‡∏ô‡πÑ‡∏Å‡πà|chicken_‡∏õ‡∏µ‡∏Å‡∏Å‡∏•‡∏≤‡∏á‡πÑ‡∏Å‡πà|\
chicken_‡∏õ‡∏µ‡∏Å‡πÑ‡∏Å‡πà‡πÄ‡∏ï‡πá‡∏°|chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà|chicken_‡∏™‡∏∞‡πÇ‡∏û‡∏Å‡πÑ‡∏Å‡πà|chicken_‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡πÉ‡∏ô‡πÑ‡∏Å‡πà|\
chicken_‡∏ï‡∏µ‡∏ô‡πÑ‡∏Å‡πà|chicken_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÑ‡∏Å‡πà‡∏ö‡∏î|chicken_‡πÑ‡∏Å‡πà‡∏°‡∏µ‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï‡∏´‡∏ô‡πâ‡∏≤‡∏ü‡∏≤‡∏£‡πå‡∏°|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 0|\
eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 1|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 3|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 4|\
eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 5|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡∏ï‡πâ‡∏°|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡∏Ñ‡∏•‡∏∞‡∏´‡∏ô‡πâ‡∏≤‡∏ü‡∏≤‡∏£‡πå‡∏°|‡∏ô‡∏°')

ingredient_translate = {
    "‡∏´‡∏°‡∏π‡∏™‡∏±‡∏ö": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏´‡∏°‡∏π‡∏ö‡∏î": "pork_‡∏´‡∏°‡∏π‡∏ö‡∏î",
    "‡∏´‡∏°‡∏π‡∏ö‡∏î": "pork_‡∏´‡∏°‡∏π‡∏ö‡∏î",
    "‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà": "eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2",
    "‡πÑ‡∏Ç‡πÑ‡∏Å‡πà": "eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2",
    "‡∏´‡∏°‡∏π‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô": "pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô",
    "‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏´‡∏°‡∏π": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡∏õ‡∏µ‡∏Å‡πÑ‡∏Å‡πà‡∏ö‡∏ô": "chicken_‡∏õ‡∏µ‡∏Å‡∏ö‡∏ô‡πÑ‡∏Å‡πà", 
    "‡∏ô‡∏°‡∏™‡∏î": "‡∏ô‡∏°",
    "‡∏™‡∏±‡∏ô‡πÉ‡∏ô‡πÑ‡∏Å‡πà": "chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏≠‡∏Å‡πÑ‡∏Å‡πà": "chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏°‡∏±‡∏ô‡πÑ‡∏Å‡πà": "chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà",
    "‡πÑ‡∏Å‡πà‡∏™‡∏î": "chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏™‡∏∞‡πÇ‡∏û‡∏Å‡πÑ‡∏Å‡πà": "chicken_‡∏™‡∏∞‡πÇ‡∏û‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏™‡∏±‡∏ô‡∏Ñ‡∏≠‡∏´‡∏°‡∏π": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡∏Ñ‡∏≠‡∏´‡∏°‡∏π": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á‡∏´‡∏°‡∏π": "pork_‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á‡∏´‡∏°‡∏π",
    "‡∏õ‡∏µ‡∏Å‡πÑ‡∏Å‡πà‡∏Å‡∏•‡∏≤‡∏á": "chicken_‡∏õ‡∏µ‡∏Å‡∏Å‡∏•‡∏≤‡∏á‡πÑ‡∏Å‡πà",
    "‡πÑ‡∏Å‡πà‡∏≠‡∏Å": "chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏´‡∏°‡∏π‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å": "pork_‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å",
    "‡∏ï‡∏±‡∏ö‡πÑ‡∏Å‡πà": "chicken_‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡πÉ‡∏ô‡πÑ‡∏Å‡πà",
    "‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÑ‡∏Å‡πà": "chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏õ‡∏µ‡∏Å‡πÑ‡∏Å‡πà": "chicken_‡∏õ‡∏µ‡∏Å‡πÑ‡∏Å‡πà‡πÄ‡∏ï‡πá‡∏°",
    "‡∏´‡∏°‡∏π‡∏™‡πà‡∏ß‡∏ô‡∏™‡∏∞‡πÇ‡∏û‡∏Å": "pork_‡∏™‡∏∞‡πÇ‡∏û‡∏Å‡∏´‡∏°‡∏π",
    "‡∏´‡∏°‡∏π‡∏™‡∏±‡∏ô‡πÉ‡∏ô": "pork_‡∏™‡∏±‡∏ô‡πÉ‡∏ô",
    "‡πÄ‡∏ô‡∏∑‡πà‡∏≠‡∏´‡∏°‡∏π": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏™‡∏±‡∏ô‡πÉ‡∏ô‡∏ß‡∏±‡∏ß": "beef_‡∏™‡∏±‡∏ô‡πÉ‡∏ô",
    "‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏ß‡∏±‡∏ß": "beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡πÇ‡∏Ñ‡∏£‡∏á‡πÑ‡∏Å‡πà": "chicken_‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á‡πÑ‡∏Å‡πà",
    "‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å‡∏´‡∏°‡∏π": "pork_‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å",
    "‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà": "chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà",
    "‡πÑ‡∏Å‡πà‡∏ö‡∏î": "chicken_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÑ‡∏Å‡πà‡∏ö‡∏î",
    "‡∏õ‡∏µ‡∏Å‡∏ö‡∏ô‡πÑ‡∏Å‡πà": "chicken_‡∏õ‡∏µ‡∏Å‡∏ö‡∏ô‡πÑ‡∏Å‡πà",
    "‡∏´‡∏°‡∏π‡∏ö‡∏Å": "pork_‡∏´‡∏°‡∏π‡∏ö‡∏î",
    "‡∏´‡∏°‡∏π‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô": "pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô",
    "‡πÉ‡∏™‡πâ‡∏´‡∏°‡∏π": "pork_‡πÑ‡∏™‡πâ‡∏≠‡πà‡∏≠‡∏ô‡∏´‡∏°‡∏π",
    "‡πÑ‡∏™‡πâ‡∏´‡∏°‡∏π": "pork_‡πÑ‡∏™‡πâ‡∏≠‡πà‡∏≠‡∏ô‡∏´‡∏°‡∏π",
    "‡∏´‡∏ô‡∏±‡∏á‡πÑ‡∏Å‡πà": "chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà",
    "‡πÑ‡∏™‡πâ‡πÉ‡∏´‡∏ç‡πà‡∏´‡∏°‡∏π": "pork_‡πÑ‡∏™‡πâ‡πÉ‡∏´‡∏ç‡πà‡∏´‡∏°‡∏π",
    "‡πÄ‡∏≠‡πá‡∏ô‡∏Ç‡πâ‡∏≠‡πÑ‡∏Å‡πà": "chicken_‡∏ï‡∏µ‡∏ô‡πÑ‡∏Å‡πà",
    "‡∏™‡∏±‡∏ô‡πÉ‡∏ô‡∏´‡∏°‡∏π": "pork_‡∏™‡∏±‡∏ô‡πÉ‡∏ô",
    "‡∏ï‡∏µ‡∏ô‡πÑ‡∏Å‡πà": "chicken_‡∏ï‡∏µ‡∏ô‡πÑ‡∏Å‡πà",
    "‡∏ï‡∏±‡∏ö‡∏´‡∏°‡∏π": "pork_‡∏ï‡∏±‡∏ö‡∏´‡∏°‡∏π",
    "‡∏õ‡∏µ‡∏Å‡∏Å‡∏•‡∏≤‡∏á‡πÑ‡∏Å‡πà": "chicken_‡∏õ‡∏µ‡∏Å‡∏Å‡∏•‡∏≤‡∏á‡πÑ‡∏Å‡πà",
    "‡πÑ‡∏Å‡πà‡∏™‡∏±‡∏ö": "chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà",
    "‡∏´‡∏ô‡∏±‡∏á‡∏´‡∏°‡∏π": "pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô",
}

default_ingredient = {
    "‡∏ô‡∏°": "‡∏ô‡∏°",
    "‡πÑ‡∏Ç‡πà": "eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2",
    "‡∏´‡∏°‡∏π": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡πÑ‡∏Å‡πà": "chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà",
    "‡∏ß‡∏±‡∏ß": "beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
}

frac_pattern = re.compile("\d+\/\d+")
num_pattern = re.compile("\d+")

ingd_patterns = []
for u in ingredient_translate:
        ingd_patterns.append(u)
        
ingd_pattern = re.compile("|".join(ingd_patterns))

ingd_default_patterns = []
for u in default_ingredient:
        ingd_default_patterns.append(u)
        
ingd_default_pattern = re.compile("|".join(ingd_default_patterns))

units = {
    "‡∏Å‡∏£‡∏±‡∏°": (0.001, "‡∏Å‡∏Å."),
    "g": (0.001, "‡∏Å‡∏Å."),
    "grams": (0.001, "‡∏Å‡∏Å."),
    "‡∏Å‡∏£‡∏∞‡∏°": (0.001, "‡∏Å‡∏Å."),
    "‡∏Å‡∏¥‡πÇ‡∏•": (1., "‡∏Å‡∏Å."),
    "‡∏ü‡∏≠‡∏á": "‡∏ü‡∏≠‡∏á",
    "‡∏ä‡∏¥‡πâ‡∏ô‡πÉ‡∏´‡∏ç‡πà": "unknown",
    "‡∏°‡∏¥‡∏•‡∏•‡∏¥‡∏•‡∏¥‡∏ï‡∏£": (0.001, "‡∏•‡∏¥‡∏ï‡∏£"),
    "‡∏Å‡πâ‡∏≠‡∏ô": "unknown",
    "‡∏ñ‡πâ‡∏ß‡∏¢": "unknown",
    "‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡πä‡∏∞": (0.015, "‡∏•‡∏¥‡∏ï‡∏£"),
    "‡∏ä‡∏¥‡πâ‡∏ô": "unknown",
    "‡∏Å‡∏£‡πâ‡∏°": (0.001, "‡∏Å‡∏Å."),
    "ml": (0.001, "‡∏•‡∏¥‡∏ï‡∏£"),
    "kg": (1, "‡∏Å‡∏Å."),
    "‡∏Å‡∏Å": (1, "‡∏Å‡∏Å."),
    "‡∏Ç‡∏µ‡∏î": (0.1, "‡∏Å‡∏Å."),
    "‡∏Å‡∏¥‡πÇ‡∏•‡∏Å‡∏£‡∏±‡∏°": (1, "‡∏Å‡∏Å."),
    "‡∏Å‡∏£‡∏°": (0.001, "‡∏Å‡∏Å."),
    "‡∏ó‡πà‡∏≠‡∏ô": "unknown",
    "‡πÇ‡∏Ñ‡∏£‡∏á": "unknown",
    "‡πÉ‡∏ö": "‡∏ü‡∏≠‡∏á",
    "‡πÅ‡∏û‡πá‡∏Ñ": "unknown",
    "mg": (0.000001, "‡∏Å‡∏Å."),
    "‡∏Å$": (0.001, "‡∏Å‡∏Å."),
    "‡πÄ‡∏™‡πâ‡∏ô": "unknown",
    "‡∏ô‡πà‡∏≠‡∏á": "unknown",
    "‡∏õ‡∏µ‡∏Å": "unknown",
    "‡∏Ç‡∏≤": "unknown",
    "‡∏ß‡∏á": "unknown",
    "‡∏ï‡∏±‡∏ß": "unknown",
    "‡∏Ç‡∏∂‡∏î": "unknown",
    "‡∏ã‡∏≠‡∏á": "unknown",
    "‡∏≠‡∏Å": "unknown",
    "‡∏à‡∏≤‡∏ô": "unknown",
    "‡πÇ‡∏•": (1, "‡∏Å‡∏Å."),
    "‡πÅ‡∏ó‡πà‡∏á": "unknown",
    "‡∏ö‡∏≤‡∏ó": "baht",
    "‡πÑ‡∏°‡πâ": "unknown",
    "‡∏î‡∏∏‡πâ‡∏ô‡∏™‡∏±‡πâ‡∏ô": "unknown",
    "‡∏ï‡∏µ‡∏ô": "unknown",
    "Kg": (1, "‡∏Å‡∏Å."),
    "‡∏ó‡∏±‡∏û‡∏û‡∏µ": "unknown",
    "‡∏ü‡∏≠‡∏ß": "unit",
    "‡∏ñ‡∏≤‡∏î": "unknown",
    "‡∏ä‡πâ‡∏¥‡∏ô‡πÇ‡∏ï‡πä‡∏∞": (0.015, "‡∏Å‡∏Å."),
    "‡∏´‡πà‡∏≠": "unknown",
    "‡∏•‡∏π‡∏Å": "unit",
    "KG": (1., "‡∏Å‡∏Å."),
    "‡πÅ‡∏ú‡πà‡∏ô": "unknown",
    "G": (0.001, "‡∏Å‡∏Å."),
    "‡∏ï‡∏≤‡∏°‡∏™‡∏∞‡∏î‡∏ß‡∏Å": "unknown",
    "‡∏ä‡∏ï": (0.015, "‡∏Å‡∏Å."),
    "pack": "unknown",
    "‡∏û‡∏ß‡∏á": "unknown",
    "‡∏ñ‡∏∏‡∏á": "unknown",
    "pcs": "‡∏ü‡∏≠‡∏á",
    "‡∏™‡πÑ‡∏•‡∏î‡πå": "unknown",
    "‡πÇ‡∏Ñ‡∏£": "unknown",
    "‡∏ü$": "‡∏ü‡∏≠‡∏á",
    "‡∏ä‡∏≤‡∏°": "unknown",
    "‡πÅ‡∏û‡∏Ñ": "unknown",
    "‡∏Ç‡∏µ‡πÄ": (0.1, "‡∏Å‡∏Å."),
    "‡∏ä‡πâ‡∏≠‡∏ô$": "unknown",
    "‡∏≠‡∏±‡∏ô": "unknown",
    "‡πÅ‡∏û‡πä‡∏Ñ": "unknown",
    "‡∏Ñ‡∏£‡∏∂‡πà‡∏á‡πÇ‡∏•": (0.5, "‡∏Å‡∏Å."),
    "‡∏´‡∏°‡πâ‡∏≠": "unknown",
    "‡∏Å‡∏•‡πà‡∏≠‡∏á": "unknown",
    "‡∏Å‡∏£‡∏µ‡∏°": (0.001, "‡∏Å‡∏Å."),
    "‡∏Å‡∏ô‡∏±‡∏°": (0.001, "‡∏Å‡∏Å."),
    "‡∏™‡∏∞‡πÇ‡∏û‡∏Å": "unknown",
    "‡∏ã‡∏µ‡∏Å": "unknown",
    "‡∏Ç‡∏£‡∏î": "unknown",
    "‡∏ä‡∏µ‡∏î": "unknown",
    "‡∏´‡∏¢‡∏¥‡∏ö‡∏°‡∏∑‡∏≠": "unknown",
    "‡πÇ‡∏Ñ‡∏•‡∏á": "unknown",
    "‡∏°‡∏∑‡∏≠": "unknown",
    "‡∏ñ‡∏ï": (0.14, "‡∏Å‡∏Å."),
    "‡∏Å‡∏£‡∏±‡∏ö": (0.001, "‡∏Å‡∏Å."),
    "‡πÅ$": (0.001, "‡∏Å‡∏Å."),
    "‡∏Å‡∏°": (0.001, "‡∏Å‡∏Å."),
    "‡∏ä‡∏∏‡∏î": "unknown",
    "‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡∏ì‡∏∞": (0.015, "‡∏Å‡∏Å."),
    "‡∏´‡∏≠‡∏á": "‡∏ü‡∏≠‡∏á",
    "‡∏´‡∏≤‡∏á": "unknown",
    "‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡πç‡∏∞": "unknown",
    "‡∏ó‡∏µ‡πà": "unknown",
}

patterns = [
    "\d+ - \d+\ *{}",
    "\d+-\d+\ *{}",
    "\d+\ *{}",
    "\d+\t*{}",
]

unit_patterns = []
for u in units:
    for p in patterns:
        unit_patterns.append(p.format(u))
        
unit_pattern = re.compile("|".join(unit_patterns))
unit_type_pattern = re.compile("|".join(list(units.keys())))

@app.route('/')
def search():
    kw = req.args.get('keyword')
    cursor = conn.cursor()
    cursor.execute(f"SELECT menu_name, ingredient_json FROM ingredients where menu_name ilike '%{kw}%';")
    result = cursor.fetchall()
    cursor.close()
    
    if len(result) == 0:
        return '{"msg": "not found."}'
    else:
        menus = {f"{i+1} {k}": json.loads(l) for i, (k, l) in enumerate(result)}
#         print(menus)
        
    queried_prices = {}
    menu_prices = {}
    show_str = {}
    ingd_prices = {}

    for m in menus:
        date_price = None
        show_str[m] = []
        for ig in menus[m]:
            _use_to_cal = False
            _ig = ig.split(" ")[0].replace("\u200b", "").replace("(", "").replace(")", "")
            if includes.search(_ig) and not excludes.search(_ig) and not excludes_units.search(menus[m][ig]):
                ingd_type = ingd_pattern.search(_ig)
                ingd_default_type = ingd_default_pattern.search(_ig)
                if ingd_type or ingd_default_type:
                    if ingd_type:
                        str_ingd_type = ingredient_translate[ingd_type.group()]
                    else:
                        str_ingd_type = default_ingredient[ingd_default_type.group()]

                    unit_type = unit_pattern.search(menus[m][ig])
                    if unit_type:
                        str_unit_type = unit_type.string
                        unit_type_p = unit_type_pattern.search(menus[m][ig]).group()
    #                     print(unit_type_p)
                        try:
                            common_unit_type = units[unit_type_p]
                        except:
                            common_unit_type = units[unit_type_p+"$"]

                        if common_unit_type == "unknown":
                            show_str[m].append(f"*{_ig} {menus[m][ig]} (unknown unit type)")
                        else:
                            _frac = frac_pattern.search(menus[m][ig])
                            _num = num_pattern.search(menus[m][ig])
                            if _frac or _num:

                                _use_to_cal = True

                                if _frac:
                                    fr = _frac.group()
                                    magn = float(fr.split("/")[0])/float(fr.split("/")[1])
                                else:
                                    magn = float(_num.group())

                                if str_ingd_type not in queried_prices:
                                    prices = pd.DataFrame(dict(table_prices.query(KeyConditionExpression=Key("type").eq(str_ingd_type) & Key("date").between("2020-07-23", "2020-08-22")).items())["Items"])
                                    prices["price"] = prices["price"].astype(float)
                                    queried_prices[str_ingd_type] = prices
                                else:
                                    prices = queried_prices[str_ingd_type]

                                if not isinstance(date_price, np.ndarray):
                                    date_price = np.zeros([prices.shape[0]])

                                if isinstance(common_unit_type, tuple):
                                    show_str[m].append(f"*{_ig} {menus[m][ig]} ({common_unit_type[1]})")
                                    magn *= common_unit_type[0]
                                    date_price += (prices["price"] * magn).values
                                else:
                                    show_str[m].append(f"*{_ig} {menus[m][ig]} ({common_unit_type})")
                                    if common_unit_type == "‡∏ö‡∏≤‡∏ó":
                                        date_price += magn
                                    else:
                                        date_price += (prices["price"] * magn).values

            if not _use_to_cal:
                show_str[m].append(f"{_ig} {menus[m][ig]}")


    #                             if str_ingd_type == "‡∏ô‡∏°":
    #                             print(prices)
    #                             print(str_ingd_type, magn, common_unit_type)

        
#         print(type(date_price))
        if isinstance(date_price, np.ndarray) and not np.sum(date_price) == 0:
            menu_prices[m] = date_price.tolist()
        else:
            menu_prices[m] = 0
        
    if len(queried_prices) == 0:
        qdate = []
    else:
        k = list(queried_prices.keys())[0]
        qdate = queried_prices[k]["date"].values.tolist()
        
    return {
        "menu_prices": menu_prices,
        "queried_prices": {k:queried_prices[k]["price"].values.tolist() for k in queried_prices},
        "date": qdate,
        "show_str": show_str,
    }

if __name__ == '__main__':
    app.debug = True
    app.run(host='0.0.0.0', port=8000)

Overwriting app.py


In [None]:
!python3 app.py

 * Serving Flask app "app" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
 * Running on http://0.0.0.0:8000/ (Press CTRL+C to quit)
 * Restarting with stat
 * Debugger is active!
 * Debugger PIN: 257-460-940
49.230.13.211 - - [26/Jul/2020 11:56:55] "[37mGET /?keyword=‡∏ß‡∏∏‡πâ‡∏ô‡πÄ‡∏™‡πâ‡∏ô‡∏´‡∏°‡∏π‡∏Å‡∏£‡∏≠‡∏ö‡∏Ñ‡∏±‡πà‡∏ß‡∏û‡∏£‡∏¥‡∏Å‡πÄ‡∏Å‡∏•‡∏∑‡∏≠ HTTP/1.1[0m" 200 -
49.230.13.211 - - [26/Jul/2020 11:57:05] "[37mGET /?keyword=‡∏´‡∏°‡∏µ‡πà‡∏Å‡∏£‡∏≠‡∏ö‡∏ú‡∏±‡∏î‡∏ã‡∏µ‡∏≠‡∏¥‡πä‡∏ß HTTP/1.1[0m" 200 -


In [2]:
# import subprocess as sp

# server = sp.Popen("FLASK_APP=app.py flask run", shell=True)
# server

In [339]:
from flask import Flask, Response, render_template, make_response, jsonify, request as req
from boto3.dynamodb.conditions import Key, Attr
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import psycopg2
import boto3
import json
import re
import os

load_dotenv('.env')
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")

dynamodb = boto3.resource('dynamodb', region_name='ap-southeast-2',
  aws_access_key_id=AWS_ACCESS_KEY_ID,
  aws_secret_access_key=AWS_SECRET_ACCESS_KEY
)

table_prices = dynamodb.Table('IngredientPrices')

conn = psycopg2.connect(
    "dbname=eltrial user=top password=1234"
)

kw = ""
cursor = conn.cursor()
cursor.execute(f"SELECT menu_name, ingredient_json FROM ingredients where menu_name ilike '%{kw}%';")
result = cursor.fetchall()
cursor.close()

In [255]:
includes = re.compile("‡∏´‡∏°‡∏π|‡πÑ‡∏Å‡πà|‡∏ß‡∏±‡∏ß|^‡πÑ‡∏Ç‡πà$|‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà|^‡∏ô‡∏°")
excludes = re.compile("‡∏ô‡∏°‡∏Ç‡πâ‡∏ô‡∏´‡∏ß‡∏≤‡∏ô|‡∏ú‡∏á|‡∏Ñ‡∏ô‡∏≠‡∏£‡πå|‡∏°‡∏≤‡∏°‡πà‡∏≤|‡∏Ñ‡∏≤‡∏£‡πå‡πÄ‡∏ô‡∏ä‡∏±‡πà‡∏ô|‡∏´‡∏°‡∏π‡∏Å‡∏£‡∏≠‡∏ö|‡∏Å‡∏£‡∏∞‡∏î‡∏π‡∏Å|‡∏õ‡∏π|^‡∏ã‡∏≠‡∏™|^‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡πÄ‡∏ó‡∏®|^‡πÄ‡∏ï‡πâ‡∏≤‡∏´‡∏π‡πâ|‡∏≠‡∏¥‡∏ô‡∏ó‡∏ú‡∏≤‡∏•‡∏±‡∏°|‡∏•‡∏π‡∏Å‡∏ä‡∏¥‡πâ‡∏ô|‡∏´‡∏°‡∏π‡πÅ‡∏Æ‡∏°|‡πÑ‡∏Ç‡πà‡πÅ‡∏î‡∏á‡πÄ‡∏Ñ‡πá‡∏°|‡∏ô‡πâ‡∏≥‡∏°‡∏±‡∏ô|‡∏Ñ‡∏≠‡∏£‡πå|‡πÑ‡∏™‡πâ‡∏Å‡∏£‡∏≠‡∏Å|‡∏´‡∏±‡∏ß‡πÉ‡∏à|‡∏Å‡∏£‡∏∞‡πÄ‡∏û‡∏≤‡∏∞|‡∏õ‡∏≠‡∏î|‡∏£‡∏™‡∏î‡∏µ|‡∏Ñ‡∏ô‡∏≠|‡∏ô‡πâ‡∏≥‡∏ã‡∏∏‡∏õ|‡∏´‡∏°‡∏π‡∏¢‡∏≠|‡∏ô‡πâ‡∏≥|‡∏ã‡∏µ‡∏≠‡∏¥‡πä‡∏ß|‡∏ä‡∏∏‡∏î|^‡∏û‡∏£‡∏¥‡∏Å‡πÑ‡∏ó‡∏¢|‡∏ã‡∏µ‡∏≠‡∏¥‡πâ‡∏ß|‡∏™‡∏ï‡πä‡∏≠‡∏Å|‡πÅ‡∏õ‡πâ‡∏á|^‡∏û‡∏£‡∏¥‡∏Å|‡πÄ‡∏•‡∏∑‡∏≠‡∏î|‡∏ä‡∏∏‡∏õ|‡∏£‡∏≤‡∏Å‡∏ú‡∏±‡∏Å‡∏ä‡∏µ|‡∏ö‡∏∞‡∏´‡∏°‡∏µ‡πà‡∏Å‡∏∂‡πà‡∏á‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à‡∏£‡∏π‡∏õ|‡∏Ç‡πâ‡∏ô‡∏´‡∏ß‡∏≤‡∏ô|‡∏Å‡∏≤‡∏Å‡∏´‡∏°‡∏π|‡∏ô‡∏°‡∏Ç‡πâ‡∏ô|‡∏´‡∏°‡∏π‡πÅ‡∏î‡∏á|‡πÅ‡∏Ñ‡∏õ‡∏´‡∏°‡∏π|‡∏°‡∏±‡∏ô‡∏´‡∏°‡∏π|^‡∏ã‡∏≠‡∏™|‡∏ô‡∏°‡∏ñ‡∏±‡πà‡∏ß‡πÄ‡∏´‡∏•‡∏∑‡∏≠‡∏á|\*\*|^‡πÄ‡∏´‡∏•‡πâ‡∏≤‡∏à‡∏µ‡∏ô|‡∏≠‡∏±‡∏•‡∏°‡∏≠‡∏ô‡∏î‡πå|‡∏´‡∏°‡∏π‡∏´‡∏¢‡∏≠‡∏á|‡∏™‡∏π‡∏ï‡∏£|‡∏ã‡∏∏‡∏õ‡∏Å‡πâ‡∏≠‡∏ô|^‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á|‡∏™‡πà‡∏ß‡∏ô‡∏ú‡∏™‡∏°|‡πÅ‡∏Ñ‡∏ö‡∏´‡∏°‡∏π")
excludes_units = re.compile("‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö|‡∏Ñ‡∏£‡∏∂‡πà1/2|‡∏õ‡∏£‡∏¥‡∏°‡∏≤‡∏ô‡∏ï‡∏≤‡∏°‡πÉ‡∏à‡∏ä‡∏≠‡∏ö")



In [256]:
menus = {k: json.loads(l) for k, l in result}

In [337]:
ingredient_kws = re.compile('pork_‡∏Ç‡∏≤‡∏´‡∏°‡∏π|pork_‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á‡∏´‡∏°‡∏π|pork_‡∏ï‡∏±‡∏ö‡∏´‡∏°‡∏π|pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô|\
pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÅ‡∏î‡∏á|pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÑ‡∏´‡∏•‡πà|pork_‡∏™‡∏∞‡πÇ‡∏û‡∏Å‡∏´‡∏°‡∏π|pork_‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å|pork_‡∏™‡∏±‡∏ô‡πÉ‡∏ô|\
pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô|pork_‡πÑ‡∏™‡πâ‡∏ï‡∏±‡∏ô‡∏´‡∏°‡∏π|pork_‡πÑ‡∏™‡πâ‡πÉ‡∏´‡∏ç‡πà‡∏´‡∏°‡∏π|pork_‡πÑ‡∏™‡πâ‡∏≠‡πà‡∏≠‡∏ô‡∏´‡∏°‡∏π|pork_‡∏´‡∏°‡∏π‡∏ö‡∏î|\
beef_‡∏Ç‡∏≠‡∏ö‡∏Å‡∏£‡∏∞‡∏î‡πâ‡∏á|beef_‡∏Ç‡∏µ‡πâ‡∏£‡∏¥‡πâ‡∏ß|beef_‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡πÉ‡∏ô|beef_‡∏î‡∏≠‡∏Å‡∏à‡∏≠‡∏Å|beef_‡∏ô‡πà‡∏≠‡∏á|beef_‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á|\
beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÅ‡∏î‡∏á|beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏õ‡∏•‡∏µ‡∏Å|beef_‡πÄ‡∏®‡∏©‡πÄ‡∏ô‡∏∑‡πâ‡∏≠|beef_‡∏™‡∏±‡∏ô‡∏Ñ‡∏≠|beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏ß‡∏±‡∏ß‡∏ö‡∏î|beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô|\
beef_‡∏™‡∏±‡∏ô‡πÉ‡∏ô|beef_‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å|chicken_‡πÑ‡∏Å‡πà‡∏ö‡πâ‡∏≤‡∏ô|chicken_‡πÑ‡∏Å‡πà‡∏ó‡∏±‡πâ‡∏á‡∏ï‡∏±‡∏ß|chicken_‡πÑ‡∏Å‡πà‡∏ú‡πà‡∏≤‡∏ã‡∏µ‡∏Å|\
chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà|chicken_‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á‡πÑ‡∏Å‡πà|chicken_‡∏õ‡∏µ‡∏Å‡∏ö‡∏ô‡πÑ‡∏Å‡πà|chicken_‡∏õ‡∏µ‡∏Å‡∏Å‡∏•‡∏≤‡∏á‡πÑ‡∏Å‡πà|\
chicken_‡∏õ‡∏µ‡∏Å‡πÑ‡∏Å‡πà‡πÄ‡∏ï‡πá‡∏°|chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà|chicken_‡∏™‡∏∞‡πÇ‡∏û‡∏Å‡πÑ‡∏Å‡πà|chicken_‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡πÉ‡∏ô‡πÑ‡∏Å‡πà|\
chicken_‡∏ï‡∏µ‡∏ô‡πÑ‡∏Å‡πà|chicken_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÑ‡∏Å‡πà‡∏ö‡∏î|chicken_‡πÑ‡∏Å‡πà‡∏°‡∏µ‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï‡∏´‡∏ô‡πâ‡∏≤‡∏ü‡∏≤‡∏£‡πå‡∏°|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 0|\
eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 1|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 3|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 4|\
eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 5|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡∏ï‡πâ‡∏°|eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡∏Ñ‡∏•‡∏∞‡∏´‡∏ô‡πâ‡∏≤‡∏ü‡∏≤‡∏£‡πå‡∏°|‡∏ô‡∏°')

ingredient_translate = {
    "‡∏´‡∏°‡∏π‡∏™‡∏±‡∏ö": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏´‡∏°‡∏π‡∏ö‡∏î": "pork_‡∏´‡∏°‡∏π‡∏ö‡∏î",
    "‡∏´‡∏°‡∏π‡∏ö‡∏î": "pork_‡∏´‡∏°‡∏π‡∏ö‡∏î",
    "‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà": "eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2",
    "‡πÑ‡∏Ç‡πÑ‡∏Å‡πà": "eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2",
    "‡∏´‡∏°‡∏π‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô": "pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô",
    "‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏´‡∏°‡∏π": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡∏õ‡∏µ‡∏Å‡πÑ‡∏Å‡πà‡∏ö‡∏ô": "chicken_‡∏õ‡∏µ‡∏Å‡∏ö‡∏ô‡πÑ‡∏Å‡πà", 
    "‡∏ô‡∏°‡∏™‡∏î": "‡∏ô‡∏°",
    "‡∏™‡∏±‡∏ô‡πÉ‡∏ô‡πÑ‡∏Å‡πà": "chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏≠‡∏Å‡πÑ‡∏Å‡πà": "chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏°‡∏±‡∏ô‡πÑ‡∏Å‡πà": "chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà",
    "‡πÑ‡∏Å‡πà‡∏™‡∏î": "chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏™‡∏∞‡πÇ‡∏û‡∏Å‡πÑ‡∏Å‡πà": "chicken_‡∏™‡∏∞‡πÇ‡∏û‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏™‡∏±‡∏ô‡∏Ñ‡∏≠‡∏´‡∏°‡∏π": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡∏Ñ‡∏≠‡∏´‡∏°‡∏π": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á‡∏´‡∏°‡∏π": "pork_‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á‡∏´‡∏°‡∏π",
    "‡∏õ‡∏µ‡∏Å‡πÑ‡∏Å‡πà‡∏Å‡∏•‡∏≤‡∏á": "chicken_‡∏õ‡∏µ‡∏Å‡∏Å‡∏•‡∏≤‡∏á‡πÑ‡∏Å‡πà",
    "‡πÑ‡∏Å‡πà‡∏≠‡∏Å": "chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏´‡∏°‡∏π‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å": "pork_‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å",
    "‡∏ï‡∏±‡∏ö‡πÑ‡∏Å‡πà": "chicken_‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡πÉ‡∏ô‡πÑ‡∏Å‡πà",
    "‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÑ‡∏Å‡πà": "chicken_‡∏≠‡∏Å‡πÑ‡∏Å‡πà",
    "‡∏õ‡∏µ‡∏Å‡πÑ‡∏Å‡πà": "chicken_‡∏õ‡∏µ‡∏Å‡πÑ‡∏Å‡πà‡πÄ‡∏ï‡πá‡∏°",
    "‡∏´‡∏°‡∏π‡∏™‡πà‡∏ß‡∏ô‡∏™‡∏∞‡πÇ‡∏û‡∏Å": "pork_‡∏™‡∏∞‡πÇ‡∏û‡∏Å‡∏´‡∏°‡∏π",
    "‡∏´‡∏°‡∏π‡∏™‡∏±‡∏ô‡πÉ‡∏ô": "pork_‡∏™‡∏±‡∏ô‡πÉ‡∏ô",
    "‡πÄ‡∏ô‡∏∑‡πà‡∏≠‡∏´‡∏°‡∏π": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏™‡∏±‡∏ô‡πÉ‡∏ô‡∏ß‡∏±‡∏ß": "beef_‡∏™‡∏±‡∏ô‡πÉ‡∏ô",
    "‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏ß‡∏±‡∏ß": "beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡πÇ‡∏Ñ‡∏£‡∏á‡πÑ‡∏Å‡πà": "chicken_‡∏ã‡∏µ‡πà‡πÇ‡∏Ñ‡∏£‡∏á‡πÑ‡∏Å‡πà",
    "‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å‡∏´‡∏°‡∏π": "pork_‡∏™‡∏±‡∏ô‡∏ô‡∏≠‡∏Å",
    "‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà": "chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà",
    "‡πÑ‡∏Å‡πà‡∏ö‡∏î": "chicken_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡πÑ‡∏Å‡πà‡∏ö‡∏î",
    "‡∏õ‡∏µ‡∏Å‡∏ö‡∏ô‡πÑ‡∏Å‡πà": "chicken_‡∏õ‡∏µ‡∏Å‡∏ö‡∏ô‡πÑ‡∏Å‡πà",
    "‡∏´‡∏°‡∏π‡∏ö‡∏Å": "pork_‡∏´‡∏°‡∏π‡∏ö‡∏î",
    "‡∏´‡∏°‡∏π‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô": "pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô",
    "‡πÉ‡∏™‡πâ‡∏´‡∏°‡∏π": "pork_‡πÑ‡∏™‡πâ‡∏≠‡πà‡∏≠‡∏ô‡∏´‡∏°‡∏π",
    "‡πÑ‡∏™‡πâ‡∏´‡∏°‡∏π": "pork_‡πÑ‡∏™‡πâ‡∏≠‡πà‡∏≠‡∏ô‡∏´‡∏°‡∏π",
    "‡∏´‡∏ô‡∏±‡∏á‡πÑ‡∏Å‡πà": "chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà",
    "‡πÑ‡∏™‡πâ‡πÉ‡∏´‡∏ç‡πà‡∏´‡∏°‡∏π": "pork_‡πÑ‡∏™‡πâ‡πÉ‡∏´‡∏ç‡πà‡∏´‡∏°‡∏π",
    "‡πÄ‡∏≠‡πá‡∏ô‡∏Ç‡πâ‡∏≠‡πÑ‡∏Å‡πà": "chicken_‡∏ï‡∏µ‡∏ô‡πÑ‡∏Å‡πà",
    "‡∏™‡∏±‡∏ô‡πÉ‡∏ô‡∏´‡∏°‡∏π": "pork_‡∏™‡∏±‡∏ô‡πÉ‡∏ô",
    "‡∏ï‡∏µ‡∏ô‡πÑ‡∏Å‡πà": "chicken_‡∏ï‡∏µ‡∏ô‡πÑ‡∏Å‡πà",
    "‡∏ï‡∏±‡∏ö‡∏´‡∏°‡∏π": "pork_‡∏ï‡∏±‡∏ö‡∏´‡∏°‡∏π",
    "‡∏õ‡∏µ‡∏Å‡∏Å‡∏•‡∏≤‡∏á‡πÑ‡∏Å‡πà": "chicken_‡∏õ‡∏µ‡∏Å‡∏Å‡∏•‡∏≤‡∏á‡πÑ‡∏Å‡πà",
    "‡πÑ‡∏Å‡πà‡∏™‡∏±‡∏ö": "chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà",
    "‡∏´‡∏ô‡∏±‡∏á‡∏´‡∏°‡∏π": "pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô",
}

default_ingredient = {
    "‡∏ô‡∏°": "‡∏ô‡∏°",
    "‡πÑ‡∏Ç‡πà": "eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2",
    "‡∏´‡∏°‡∏π": "pork_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
    "‡πÑ‡∏Å‡πà": "chicken_‡∏ô‡πà‡∏≠‡∏á‡πÑ‡∏Å‡πà",
    "‡∏ß‡∏±‡∏ß": "beef_‡πÄ‡∏ô‡∏∑‡πâ‡∏≠‡∏Å‡πâ‡∏≠‡∏ô",
}

ingd_patterns = []
for u in ingredient_translate:
        ingd_patterns.append(u)
        
ingd_pattern = re.compile("|".join(ingd_patterns))

ingd_default_patterns = []
for u in default_ingredient:
        ingd_default_patterns.append(u)
        
ingd_default_pattern = re.compile("|".join(ingd_default_patterns))

units = {
    "‡∏Å‡∏£‡∏±‡∏°": (0.001, "‡∏Å‡∏Å."),
    "g": (0.001, "‡∏Å‡∏Å."),
    "grams": (0.001, "‡∏Å‡∏Å."),
    "‡∏Å‡∏£‡∏∞‡∏°": (0.001, "‡∏Å‡∏Å."),
    "‡∏Å‡∏¥‡πÇ‡∏•": (1., "‡∏Å‡∏Å."),
    "‡∏ü‡∏≠‡∏á": "‡∏ü‡∏≠‡∏á",
    "‡∏ä‡∏¥‡πâ‡∏ô‡πÉ‡∏´‡∏ç‡πà": "unknown",
    "‡∏°‡∏¥‡∏•‡∏•‡∏¥‡∏•‡∏¥‡∏ï‡∏£": (0.001, "‡∏•‡∏¥‡∏ï‡∏£"),
    "‡∏Å‡πâ‡∏≠‡∏ô": "unknown",
    "‡∏ñ‡πâ‡∏ß‡∏¢": "unknown",
    "‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡πä‡∏∞": (0.015, "‡∏•‡∏¥‡∏ï‡∏£"),
    "‡∏ä‡∏¥‡πâ‡∏ô": "unknown",
    "‡∏Å‡∏£‡πâ‡∏°": (0.001, "‡∏Å‡∏Å."),
    "ml": (0.001, "‡∏•‡∏¥‡∏ï‡∏£"),
    "kg": (1, "‡∏Å‡∏Å."),
    "‡∏Å‡∏Å": (1, "‡∏Å‡∏Å."),
    "‡∏Ç‡∏µ‡∏î": (0.1, "‡∏Å‡∏Å."),
    "‡∏Å‡∏¥‡πÇ‡∏•‡∏Å‡∏£‡∏±‡∏°": (1, "‡∏Å‡∏Å."),
    "‡∏Å‡∏£‡∏°": (0.001, "‡∏Å‡∏Å."),
    "‡∏ó‡πà‡∏≠‡∏ô": "unknown",
    "‡πÇ‡∏Ñ‡∏£‡∏á": "unknown",
    "‡πÉ‡∏ö": "‡∏ü‡∏≠‡∏á",
    "‡πÅ‡∏û‡πá‡∏Ñ": "unknown",
    "mg": (0.000001, "‡∏Å‡∏Å."),
    "‡∏Å$": (0.001, "‡∏Å‡∏Å."),
    "‡πÄ‡∏™‡πâ‡∏ô": "unknown",
    "‡∏ô‡πà‡∏≠‡∏á": "unknown",
    "‡∏õ‡∏µ‡∏Å": "unknown",
    "‡∏Ç‡∏≤": "unknown",
    "‡∏ß‡∏á": "unknown",
    "‡∏ï‡∏±‡∏ß": "unknown",
    "‡∏Ç‡∏∂‡∏î": "unknown",
    "‡∏ã‡∏≠‡∏á": "unknown",
    "‡∏≠‡∏Å": "unknown",
    "‡∏à‡∏≤‡∏ô": "unknown",
    "‡πÇ‡∏•": (1, "‡∏Å‡∏Å."),
    "‡πÅ‡∏ó‡πà‡∏á": "unknown",
    "‡∏ö‡∏≤‡∏ó": "baht",
    "‡πÑ‡∏°‡πâ": "unknown",
    "‡∏î‡∏∏‡πâ‡∏ô‡∏™‡∏±‡πâ‡∏ô": "unknown",
    "‡∏ï‡∏µ‡∏ô": "unknown",
    "Kg": (1, "‡∏Å‡∏Å."),
    "‡∏ó‡∏±‡∏û‡∏û‡∏µ": "unknown",
    "‡∏ü‡∏≠‡∏ß": "unit",
    "‡∏ñ‡∏≤‡∏î": "unknown",
    "‡∏ä‡πâ‡∏¥‡∏ô‡πÇ‡∏ï‡πä‡∏∞": (0.015, "‡∏Å‡∏Å."),
    "‡∏´‡πà‡∏≠": "unknown",
    "‡∏•‡∏π‡∏Å": "unit",
    "KG": (1., "‡∏Å‡∏Å."),
    "‡πÅ‡∏ú‡πà‡∏ô": "unknown",
    "G": (0.001, "‡∏Å‡∏Å."),
    "‡∏ï‡∏≤‡∏°‡∏™‡∏∞‡∏î‡∏ß‡∏Å": "unknown",
    "‡∏ä‡∏ï": (0.015, "‡∏Å‡∏Å."),
    "pack": "unknown",
    "‡∏û‡∏ß‡∏á": "unknown",
    "‡∏ñ‡∏∏‡∏á": "unknown",
    "pcs": "‡∏ü‡∏≠‡∏á",
    "‡∏™‡πÑ‡∏•‡∏î‡πå": "unknown",
    "‡πÇ‡∏Ñ‡∏£": "unknown",
    "‡∏ü$": "‡∏ü‡∏≠‡∏á",
    "‡∏ä‡∏≤‡∏°": "unknown",
    "‡πÅ‡∏û‡∏Ñ": "unknown",
    "‡∏Ç‡∏µ‡πÄ": (0.1, "‡∏Å‡∏Å."),
    "‡∏ä‡πâ‡∏≠‡∏ô$": "unknown",
    "‡∏≠‡∏±‡∏ô": "unknown",
    "‡πÅ‡∏û‡πä‡∏Ñ": "unknown",
    "‡∏Ñ‡∏£‡∏∂‡πà‡∏á‡πÇ‡∏•": (0.5, "‡∏Å‡∏Å."),
    "‡∏´‡∏°‡πâ‡∏≠": "unknown",
    "‡∏Å‡∏•‡πà‡∏≠‡∏á": "unknown",
    "‡∏Å‡∏£‡∏µ‡∏°": (0.001, "‡∏Å‡∏Å."),
    "‡∏Å‡∏ô‡∏±‡∏°": (0.001, "‡∏Å‡∏Å."),
    "‡∏™‡∏∞‡πÇ‡∏û‡∏Å": "unknown",
    "‡∏ã‡∏µ‡∏Å": "unknown",
    "‡∏Ç‡∏£‡∏î": "unknown",
    "‡∏ä‡∏µ‡∏î": "unknown",
    "‡∏´‡∏¢‡∏¥‡∏ö‡∏°‡∏∑‡∏≠": "unknown",
    "‡πÇ‡∏Ñ‡∏•‡∏á": "unknown",
    "‡∏°‡∏∑‡∏≠": "unknown",
    "‡∏ñ‡∏ï": (140, "g"),
    "‡∏Å‡∏£‡∏±‡∏ö": (0.001, "‡∏Å‡∏Å."),
    "‡πÅ$": (0.001, "‡∏Å‡∏Å."),
    "‡∏Å‡∏°": (0.001, "‡∏Å‡∏Å."),
    "‡∏ä‡∏∏‡∏î": "unknown",
    "‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡∏ì‡∏∞": (0.015, "‡∏Å‡∏Å."),
    "‡∏´‡∏≠‡∏á": "‡∏ü‡∏≠‡∏á",
    "‡∏´‡∏≤‡∏á": "unknown",
    "‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡πç‡∏∞": "unknown",
    "‡∏ó‡∏µ‡πà": "unknown",
}

patterns = [
    "\d+ - \d+\ *{}",
    "\d+-\d+\ *{}",
    "\d+\ *{}",
    "\d+\t*{}",
]

unit_patterns = []
for u in units:
    for p in patterns:
        unit_patterns.append(p.format(u))
        
unit_pattern = re.compile("|".join(unit_patterns))
unit_type_pattern = re.compile("|".join(list(units.keys())))


In [308]:
unit_type_pattern

re.compile(r'‡∏Å‡∏£‡∏±‡∏°|g|grams|‡∏Å‡∏£‡∏∞‡∏°|‡∏Å‡∏¥‡πÇ‡∏•|‡∏ü‡∏≠‡∏á|‡∏ä‡∏¥‡πâ‡∏ô‡πÉ‡∏´‡∏ç‡πà|‡∏°‡∏¥‡∏•‡∏•‡∏¥‡∏•‡∏¥‡∏ï‡∏£|‡∏Å‡πâ‡∏≠‡∏ô|‡∏ñ‡πâ‡∏ß‡∏¢|‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡πä‡∏∞|‡∏ä‡∏¥‡πâ‡∏ô|‡∏Å‡∏£‡πâ‡∏°|ml|kg|‡∏Å‡∏Å|‡∏Ç‡∏µ‡∏î|‡∏Å‡∏¥‡πÇ‡∏•‡∏Å‡∏£‡∏±‡∏°|‡∏Å‡∏£‡∏°|‡∏ó‡πà‡∏≠‡∏ô|‡πÇ‡∏Ñ‡∏£‡∏á|‡πÉ‡∏ö|‡πÅ‡∏û‡πá‡∏Ñ|mg|‡∏Å$|‡πÄ‡∏™‡πâ‡∏ô|‡∏ô‡πà‡∏≠‡∏á|‡∏õ‡∏µ‡∏Å|‡∏Ç‡∏≤|‡∏ß‡∏á|‡∏ï‡∏±‡∏ß|‡∏Ç‡∏∂‡∏î|‡∏ã‡∏≠‡∏á|‡∏≠‡∏Å|‡∏à‡∏≤‡∏ô|‡πÇ‡∏•|‡πÅ‡∏ó‡πà‡∏á|‡∏ö‡∏≤‡∏ó|‡πÑ‡∏°‡πâ|‡∏î‡∏∏‡πâ‡∏ô‡∏™‡∏±‡πâ‡∏ô|‡∏ï‡∏µ‡∏ô|Kg|‡∏ó‡∏±‡∏û‡∏û‡∏µ|‡∏ü‡∏≠‡∏ß|‡∏ñ‡∏≤‡∏î|‡∏ä‡πâ‡∏¥‡∏ô‡πÇ‡∏ï‡πä‡∏∞|‡∏´‡πà‡∏≠|‡∏•‡∏π‡∏Å|KG|‡πÅ‡∏ú‡πà‡∏ô|G|‡∏ï‡∏≤‡∏°‡∏™‡∏∞‡∏î‡∏ß‡∏Å|‡∏ä‡∏ï|pack|‡∏û‡∏ß‡∏á|‡∏ñ‡∏∏‡∏á|pcs|‡∏™‡πÑ‡∏•‡∏î‡πå|‡πÇ‡∏Ñ‡∏£|‡∏ü$|‡∏ä‡∏≤‡∏°|‡πÅ‡∏û‡∏Ñ|‡∏Ç‡∏µ‡πÄ|‡∏ä‡πâ‡∏≠‡∏ô$|‡∏≠‡∏±‡∏ô|‡πÅ‡∏û‡πä‡∏Ñ|‡∏Ñ‡∏£‡∏∂‡πà‡∏á‡πÇ‡∏•|‡∏´‡∏°‡πâ‡∏≠|‡∏Å‡∏•‡πà‡∏≠‡∏á|‡∏Å‡∏£‡∏µ‡∏°|‡∏Å‡∏ô‡∏±‡∏°|‡∏™‡∏∞‡πÇ‡∏û‡∏Å|‡∏ã‡∏µ‡∏Å|‡∏Ç‡∏£‡∏î|‡∏ä‡∏µ‡∏î|‡∏´‡∏¢‡∏¥‡∏ö‡∏°‡∏∑‡∏≠|‡πÇ‡∏Ñ‡∏•‡∏á|‡∏°‡∏∑‡∏≠|‡∏ñ‡∏ï|‡∏Å‡∏£‡∏±‡∏ö|‡πÅ$|‡∏Å‡∏°|‡∏ä‡∏∏‡∏î|‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡∏ì‡∏∞|‡∏´‡∏≠‡∏á|‡∏´‡∏≤‡∏á|

In [315]:
frac_pattern = re.compile("\d+\/\d+")
num_pattern = re.compile("\d+")

In [336]:
dict(prices())

{'Items': [{'price': Decimal('182.01702163319416'),
   'date': '2020-07-23',
   'type': 'pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô'},
  {'price': Decimal('181.8467674206031'),
   'date': '2020-07-24',
   'type': 'pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô'},
  {'price': Decimal('181.994167428968'),
   'date': '2020-07-25',
   'type': 'pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô'},
  {'price': Decimal('182.24495456567672'),
   'date': '2020-07-26',
   'type': 'pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô'},
  {'price': Decimal('182.30987309635077'),
   'date': '2020-07-27',
   'type': 'pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô'},
  {'price': Decimal('182.3756543486512'),
   'date': '2020-07-28',
   'type': 'pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô'},
  {'price': Decimal('182.4579703953696'),
   'date': '2020-07-29',
   'type': 'pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô'},
  {'price': Decimal('182.5827306438472'),
   'date': '2020-07-30',
   'type': 'pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô'},
  {'price': Decimal('182.69303899636168'),
   'date': '2020-07-31',
   'type': 'pork_‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô'},
  {'pri

In [345]:
prices

Unnamed: 0,price,date,type
0,2.835654172231381,2020-07-23,eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2
1,2.841633149467288,2020-07-24,eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2
2,2.8475087490903706,2020-07-25,eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2
3,2.853282758518512,2020-07-26,eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2
4,2.8589569342648127,2020-07-27,eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2
5,2.8645330024719384,2020-07-28,eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2
6,2.870012659437233,2020-07-29,eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2
7,2.8753975721287466,2020-07-30,eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2
8,2.8806893786923475,2020-07-31,eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2
9,2.8858896889500607,2020-08-01,eggs_‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà‡πÄ‡∏ö‡∏≠‡∏£‡πå 2


In [360]:
queried_prices = {}
menu_prices = {}
show_str = {}
ingd_prices = {}

for m in menus:
    date_price = None
    show_str[m] = []
    for ig in menus[m]:
        _use_to_cal = False
        _ig = ig.split(" ")[0].replace("\u200b", "").replace("(", "").replace(")", "")
        if includes.search(_ig) and not excludes.search(_ig) and not excludes_units.search(menus[m][ig]):
            ingd_type = regex_pattern.search(_ig)
            ingd_default_type = ingd_default_pattern.search(_ig)
            if ingd_type or ingd_default_type:
                if ingd_type:
                    str_ingd_type = ingredient_translate[ingd_type.group()]
                else:
                    str_ingd_type = default_ingredient[ingd_default_type.group()]
                    
                unit_type = unit_pattern.search(menus[m][ig])
                if unit_type:
                    str_unit_type = unit_type.string
                    unit_type_p = unit_type_pattern.search(menus[m][ig]).group()
#                     print(unit_type_p)
                    try:
                        common_unit_type = units[unit_type_p]
                    except:
                        common_unit_type = units[unit_type_p+"$"]
                        
                    if common_unit_type == "unknown":
                        show_str[m].append(f"*{_ig} {menus[m][ig]} (unknown unit type)")
                    else:
                        _frac = frac_pattern.search(menus[m][ig])
                        _num = num_pattern.search(menus[m][ig])
                        if _frac or _num:
                            
                            _use_to_cal = True
                            
                            if _frac:
                                fr = _frac.group()
                                magn = float(fr.split("/")[0])/float(fr.split("/")[1])
                            else:
                                magn = float(_num.group())
                                
                            if str_ingd_type not in queried_prices:
                                prices = pd.DataFrame(dict(table_prices.query(KeyConditionExpression=Key("type").eq(str_ingd_type) & Key("date").between("2020-07-23", "2020-08-22")).items())["Items"])
                                prices["price"] = prices["price"].astype(float)
                                queried_prices[str_ingd_type] = prices
                            else:
                                prices = queried_prices[str_ingd_type]
                                
                            if not isinstance(date_price, np.ndarray):
                                date_price = np.zeros([prices.shape[0]])
                                
                            if isinstance(common_unit_type, tuple):
                                show_str[m].append(f"*{_ig} {menus[m][ig]} ({common_unit_type[1]})")
                                magn *= common_unit_type[0]
                                date_price += prices["price"] * magn
                            else:
                                show_str[m].append(f"*{_ig} {menus[m][ig]} ({common_unit_type})")
                                if common_unit_type == "‡∏ö‡∏≤‡∏ó":
                                    date_price += magn
                                else:
                                    date_price += prices["price"] * magn
                                    
        if not _use_to_cal:
            show_str[m].append(f"{_ig} {menus[m][ig]}")
                                    
                            
#                             if str_ingd_type == "‡∏ô‡∏°":
#                             print(prices)
#                             print(str_ingd_type, magn, common_unit_type)
        
    if isinstance(date_price, np.ndarray):
        menu_prices.append(date_price)
    else:
        menu_prices.append(0)
#     print(date_price)
#                         show_str[m].append(f"*{_ig} {menus[m][ig]} (unknown unit type)")
#                     print(str_ingd_type, common_unit_type)
                    
#                 show_str[m].append(f"*{_ig} {menus[m][ig]}")
                
#                 ingd_prices.append()
#         else:
# #             print(ig, includes.search(ig), excludes.search(ig))
#             show_str[m].append(f"{ig} {menus[m][ig]}")

In [361]:
show_str

{'‡πÑ‡∏Ç‡πà‡πÄ‡∏à‡∏µ‡∏¢‡∏ß': ['*‡πÑ‡∏Ç‡πà 2 ‡∏ü‡∏≠‡∏á (‡∏ü‡∏≠‡∏á)', '‡∏ô‡πâ‡∏≥ 1 ‡∏ñ‡πâ‡∏ß‡∏¢‡∏ï‡∏ß‡∏á', '‡∏ô‡πâ‡∏≥‡∏õ‡∏•‡∏≤ ‡∏ô‡∏¥‡∏î‡∏´‡∏ô‡πà‡∏≠‡∏¢'],
 '‡πÑ‡∏Ç‡πà‡πÄ‡∏à‡∏µ‡∏¢‡∏ß‡∏ó‡∏π‡∏ô‡πà‡∏≤‡∏û‡∏£‡∏¥‡∏Å‡∏™‡∏±‡∏ö': ['‡∏õ‡∏•‡∏≤‡∏ó‡∏π‡∏ô‡πà‡∏≤ 1 ‡∏Å‡∏£‡∏∞‡∏õ‡πã‡∏≠‡∏á',
  '‡∏û‡∏£‡∏¥‡∏Å‡∏Ç‡∏µ‡πâ‡∏´‡∏ô‡∏π 6 ‡πÄ‡∏°‡πá‡∏î',
  '*‡πÑ‡∏Ç‡πà‡πÑ‡∏Å‡πà 2 ‡∏ü‡∏≠‡∏á (‡∏ü‡∏≠‡∏á)',
  '‡∏ô‡πâ‡∏≥‡∏õ‡∏•‡∏≤ ‡πÄ‡∏•‡πá‡∏Å‡∏ô‡πâ‡∏≠‡∏¢',
  '‡∏ô‡πâ‡∏≥‡∏°‡∏±‡∏ô‡∏û‡∏∑‡∏ä ‡∏û‡∏≠‡∏õ‡∏£‡∏∞‡∏°‡∏≤‡∏ì'],
 '‡∏ß‡∏∏‡πâ‡∏ô‡πÄ‡∏™‡πâ‡∏ô‡∏´‡∏°‡∏π‡∏Å‡∏£‡∏≠‡∏ö‡∏Ñ‡∏±‡πà‡∏ß‡∏û‡∏£‡∏¥‡∏Å‡πÄ‡∏Å‡∏•‡∏∑‡∏≠': ['‡∏ß‡∏∏‡πâ‡∏ô‡πÄ‡∏™‡πâ‡∏ô 2 ‡∏´‡πà‡∏≠\xa0\xa0',
  '*‡∏´‡∏°‡∏π‡∏™‡∏≤‡∏°‡∏ä‡∏±‡πâ‡∏ô 400 ‡∏Å‡∏£‡∏±‡∏°\xa0 (‡∏Å‡∏Å.)',
  '‡πÄ‡∏Å‡∏•‡∏∑‡∏≠ 1 ‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡πä‡∏∞ ',
  '‡∏ô‡πâ‡∏≥‡∏™‡πâ‡∏°‡∏™‡∏≤‡∏¢‡∏ä‡∏π 2 ‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡πä‡∏∞',
  '‡∏û‡∏£‡∏¥‡∏Å‡πÅ‡∏î‡∏á‡∏à‡∏¥‡∏ô‡∏î‡∏≤‡∏™‡∏±‡∏ö 1¬Ω ‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡πä‡∏∞',
  '‡∏Å‡∏£‡∏∞‡πÄ‡∏ó‡∏µ‡∏¢‡∏°‡∏™‡∏±‡∏ö 1¬Ω ‡∏ä‡πâ‡∏≠‡∏ô‡πÇ‡∏ï‡πä‡∏∞',
  '‡πÄ‡∏Å‡∏•‡∏∑‡∏≠ 1 ‡∏ä‡πâ‡∏≠‡∏ô‡∏ä‡∏≤',
  '‡∏ô‡πâ‡