Moved this to a separate file (i.e. once OCR is done, I don't need to keep re-running it for tests)

In [2]:
%pip install -q \
    pandas

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd

In [4]:
# grab the CSVs with results from the OCR to save time
receipts_df = pd.read_csv('receipts.csv')
expenses_df = pd.read_csv('expenses.csv')

In [None]:
# # at first I tried with sqlite3 but I might as well use Prisma on both ends

# import sqlite3

# con = sqlite3.connect('../receipts-app/prisma/dev.db')
# df1 = pd.read_sql_query("SELECT * from UnitOfMeasure", con)

# print(df.head())

# con.close()

# # using sqlite3 directly

# import sqlite3

# con = sqlite3.connect('../receipts-app/prisma/dev.db')

# cur = con.cursor()

# # clear data first
# cur.execute("DELETE FROM UnitOfMeasure")

# # seed to sql lite my data UnitOfMeasure
# for unit in ['g', 'mL', 'Count']:
#     cur.execute("INSERT INTO UnitOfMeasure (name) VALUES (?)", (unit,))

# con.commit()
# con.close()


In [None]:
# rather do it with prisma, so it's prisma on both sides

% prisma generate

import asyncio
from prisma import Prisma

async def main():
    # Initialize the Prisma client
    db = Prisma()
    await db.connect()

    # Static data
    unitOfMeasures_df = pd.DataFrame([
        {'name': 'g'},
        {'name': 'mL'},
        {'name': 'Count'}
    ])
    # Dynamic data, sample for now
    referenceItems = [
        {
            'name': 'Item 1',
            'quantity': 1,
            'unitOfMeasure': 'Count',
            'price': 1.99,
            'referenceUrl': 'https://www.example.com/item1'
        },
        {
            'name': 'Item 2',
            'quantity': 1,
            'unitOfMeasure': 'Count',
            'price': 2.99,
            'referenceUrl': 'https://www.example.com/item1'
        },
        {
            'name': 'Item A',
            'quantity': 1,
            'unitOfMeasure': 'Count',
            'price': 3.99,
            'referenceUrl': 'https://www.example.com/item1'
        },
        {
            'name': 'Item B',
            'quantity': 1,
            'unitOfMeasure': 'Count',
            'price': 4.99,
            'referenceUrl': 'https://www.example.com/item1'
        }
    ]
    receipts = [
        {
            'filename': 'IMG_4549.jpg',
            'receiptTexts': [
                {'text': 'Item 1', 'boundingBox': '{"x":0,"y":0,"width":100,"height":20}', 'confidenceScore': 0.95},
                {'text': 'Item 2', 'boundingBox': '{"x":0,"y":20,"width":100,"height":20}', 'confidenceScore': 0.90},
            ]
        },
        {
            'filename': 'IMG_4550.jpg',
            'receiptTexts': [
                {'text': 'Item A', 'boundingBox': '{"x":0,"y":0,"width":100,"height":20}', 'confidenceScore': 0.93},
                {'text': 'Item B', 'boundingBox': '{"x":0,"y":20,"width":100,"height":20}', 'confidenceScore': 0.89},
            ]
        }
    ]
    expenses = [
        {
            'filename': 'IMG_4549.jpg',
            'referenceItem': 'Item 1',
            'bestMatch': 'Item 1',
            'price': 1.99
        },
        {
            'filename': 'IMG_4549.jpg',
            'referenceItem': 'Item 2',
            'bestMatch': 'Item 2',
            'price': 2.99
        },
        {
            'filename': 'IMG_4550.jpg',
            'referenceItem': 'Item A',
            'bestMatch': 'Item A',
            'price': 3.99
        },
        {
            'filename': 'IMG_4550.jpg',
            'referenceItem': 'Item B',
            'bestMatch': 'Item B',
            'price': 4.99
        }
    ]

    # create units of measure
    for unit in unitOfMeasures:
        UoM = await db.unitOfMeasure.create(data={'name': unit})
        # add UoM.id to unitOfMeasures
        unit['id'] = UoM.id
        print(f'Unit of Measure created: {UoM.name}')
    
    # create all the reference items
    for referenceItem in referenceItems:
        await db.referenceItem.create(data=referenceItem)

    # Iterate through the receipts and create them along with their texts
    # for receipt_data in receipts:
    #     # Create the Receipt
    #     receipt = await db.receipt.create(
    #         data={
    #             'filename': receipt_data['filename'],
    #             'receiptText': {
    #                 'create': receipt_data['receiptTexts']
    #             }
    #         },
    #         include={
    #             'receiptText': True
    #         }
    #     )
    #     print(f'Receipt created: {receipt.id}, with texts: {[text.text for text in receipt.receiptText]}')

    # Disconnect the Prisma client
    await db.disconnect()

# Run the main function
asyncio.run(main())