In [2]:
pip install xmltodict

Collecting xmltodict
  Downloading xmltodict-1.0.2-py3-none-any.whl.metadata (15 kB)
Downloading xmltodict-1.0.2-py3-none-any.whl (13 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-1.0.2


In [89]:
import torch
import xmltodict
from transformers import AutoProcessor, AutoModelForVision2Seq
from PIL import Image
from pprint import pprint

In [101]:
model_name = "naver-clova-ix/donut-base-finetuned-cord-v2"
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModelForVision2Seq.from_pretrained(model_name)

In [102]:
image_path = "receiptkopi.jpg"
image = Image.open(image_path).convert("RGB")

decoder_input_ids = processor.tokenizer(
    "<s_cord-v2>", add_special_tokens=False
).input_ids
decoder_input_ids = torch.tensor(decoder_input_ids).unsqueeze(0)
pixel_values = processor(image, return_tensors="pt").pixel_values


In [103]:
generation_output = model.generate(
    pixel_values,
    decoder_input_ids=decoder_input_ids,
    max_length=model.decoder.config.max_position_embeddings,
    pad_token_id=processor.tokenizer.pad_token_id,
    eos_token_id=processor.tokenizer.eos_token_id,
    use_cache=True,
    num_beams=1,
    bad_words_ids=[[processor.tokenizer.unk_token_id]],
    return_dict_in_generate=True,
)

In [104]:
generation_output

GenerateEncoderDecoderOutput(sequences=tensor([[57579, 57526, 57528,  9824, 32450, 55243, 36277,  5040, 57527, 57530,
          1314, 57529, 57532, 22081, 34504, 57531, 57522, 57528, 34837, 34067,
         47088, 40723, 11395, 57527, 57530,  1314, 57529, 57532, 44306, 35815,
         56239, 57531, 57522, 57528, 13663, 56992, 50990, 27009, 55148, 17390,
         42990, 52838, 40769, 12162, 57527, 57530,  1314, 57529, 57532, 38873,
         34504, 57531, 57522, 57528, 42205, 40220, 27587,  1921, 44957, 37349,
         22251, 35161, 57527, 57530,  1314, 57529, 57532, 20017, 34504, 57531,
         57522, 57528, 13663,  4778, 36549, 18018, 35887,  4261, 56271, 11257,
         57527, 57530,  1314, 57529, 57532,  9066, 34504, 57531, 57522, 57528,
         34579, 38022,  2253, 36277,  6461, 32450, 57527, 57530,  3822, 57529,
         57532, 27861, 35815, 56239, 57531, 57522, 57528, 34579, 38022,  2253,
         43673, 43324, 57527, 57530,  1314, 57529, 57532, 41361, 35815, 56239,
         5753

In [105]:
decoded_sequence = processor.batch_decode(generation_output.sequences)[0]
decoded_sequence = decoded_sequence.replace(processor.tokenizer.eos_token, "")
decoded_sequence = decoded_sequence.replace(processor.tokenizer.pad_token, "")
decoded_sequence += "</s_cord-v2>"
decoded_sequence

'<s_cord-v2><s_menu><s_nm> CHEESE CAKE</s_nm><s_cnt> 1</s_cnt><s_price> 45,000</s_price><sep/><s_nm> MINERAL WATER</s_nm><s_cnt> 1</s_cnt><s_price> 18,000</s_price><sep/><s_nm> SAUSAGE SMOKED BF PZ</s_nm><s_cnt> 1</s_cnt><s_price> 85,000</s_price><sep/><s_nm> SPAGHETTI FANTASIA</s_nm><s_cnt> 1</s_cnt><s_price> 55,000</s_price><sep/><s_nm> SUMMER ORANGE NOIR</s_nm><s_cnt> 1</s_cnt><s_price> 35,000</s_price><sep/><s_nm> TIRAMISU COFFEE</s_nm><s_cnt> 2</s_cnt><s_price> 70,000</s_price><sep/><s_nm> TIRAMISU MATCHA</s_nm><s_cnt> 1</s_cnt><s_price> 38,000</s_price></s_menu><s_sub_total><s_subtotal_price> 346,000</s_subtotal_price><s_tax_price> 34,600</s_tax_price></s_sub_total><s_total><s_total_price> 397,900</s_total_price><s_menutype_cnt> : 7</s_menutype_cnt><s_menuqty_cnt> : 8</s_menuqty_cnt></s_total></s_cord-v2>'

In [106]:
my_dict = xmltodict.parse(decoded_sequence)
pprint(my_dict)

{'s_cord-v2': {'s_menu': {'s_cnt': ['1', '1', '1', '1', '1', '2', '1'],
                          's_nm': ['CHEESE CAKE',
                                   'MINERAL WATER',
                                   'SAUSAGE SMOKED BF PZ',
                                   'SPAGHETTI FANTASIA',
                                   'SUMMER ORANGE NOIR',
                                   'TIRAMISU COFFEE',
                                   'TIRAMISU MATCHA'],
                          's_price': ['45,000',
                                      '18,000',
                                      '85,000',
                                      '55,000',
                                      '35,000',
                                      '70,000',
                                      '38,000'],
                          'sep': [None, None, None, None, None, None]},
               's_sub_total': {'s_subtotal_price': '346,000',
                               's_tax_price': '34,600'},
               '

In [107]:
my_dict['s_cord-v2']['s_menu']['s_nm']

['CHEESE CAKE',
 'MINERAL WATER',
 'SAUSAGE SMOKED BF PZ',
 'SPAGHETTI FANTASIA',
 'SUMMER ORANGE NOIR',
 'TIRAMISU COFFEE',
 'TIRAMISU MATCHA']

In [108]:
nama = ["Feby", "Tona", "Ady"]
menu_feby = ['CHEESE CAKE', 'MINERAL WATER']
menu_tona = ['SAUSAGE SMOKED BF PZ', 'TIRAMISU COFFEE']
menu_ida = ['SPAGHETTI FANTASIA', 'TIRAMISU MATCHA']
menu_ika = ['SUMMER ORANGE NOIR', 'TIRAMISU COFFEE']


total_per_orang = {
    'Feby': 0,
    'Tona': 0,
    'Ida': 0,
    'Ika': 0
}

total_feby = 0
total_tona = 0
total_ida = 0
total_ika = 0

list_nama_menu = my_dict['s_cord-v2']['s_menu']['s_nm']
list_harga_menu = my_dict['s_cord-v2']['s_menu']['s_price']
list_qty_menu   = my_dict['s_cord-v2']['s_menu']['s_cnt']

for nama_menu, harga_menu, list_qty_menu in zip(list_nama_menu, list_harga_menu, list_qty_menu):
    harga_menu_as_int = int(harga_menu.replace(',', ''))
    quantity = int(list_qty_menu) if list_qty_menu else 1

    harga_per_item = harga_menu_as_int / quantity
    harga_per_item = int(harga_per_item)

    if nama_menu in menu_feby:
        total_feby += harga_per_item
    if nama_menu in menu_tona:
        total_tona += harga_per_item
    if nama_menu in menu_ida:
        total_ida += harga_per_item
    if nama_menu in menu_ika:
        total_ika += harga_per_item


In [109]:
total_feby

63000

In [110]:
total_tona

120000

In [111]:
total_ida

93000

In [112]:
total_ika

70000