In [1]:
from google.cloud import aiplatform

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
GDC_location = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')

In [3]:
print(GDC_location)

instant-form-426202-b1-a245f3cae055.json


In [15]:
import base64

def encode_image(path):
    with open(path, 'rb') as f:
        image_data = f.read()
    encoded_image = base64.b64encode(image_data)
    return encoded_image


In [55]:
import base64
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models

prompt = """

The provided is a generic expense day-to-day receipt, please extract the fields listed and respond in JSON format:

Write "" into the unrecognizable field if necessary

Reponse Format:
{
  "date_of_spending": <extraction>,
  "location_of_spending": <extraction>,
  "spending_company": <extraction>,
  "dollar_spent": <extraction>

}

"""

def generate(file_path, prompt, file_type):
  vertexai.init(project="instant-form-426202-b1", location="us-central1")
  model = GenerativeModel(
    "gemini-1.5-flash-001",
  )
  image1 = Part.from_data(
    mime_type= f"image/{file_type}",
    data=base64.b64decode(encode_image(file_path)))

  generation_config = {
      "max_output_tokens": 500,
      "top_p": 0.95,
  }


  responses = model.generate_content(
      [image1, prompt],
      generation_config=generation_config,
      # safety_settings=safety_settings,
      stream=False,
  )

  return responses



In [66]:
output = generate('./sample/sample2.png', prompt, 'png')

In [67]:
output

candidates {
  content {
    role: "model"
    parts {
      text: "```json\n{\n  \"date_of_spending\": \"2024/05/06\",\n  \"location_of_spending\": \"landmark Prince\'s Bul\\n10 Chater Road, central\",\n  \"spending_company\": \"BOOKAZINE\",\n  \"dollar_spent\": \"39.0\"\n}\n```"
    }
  }
  finish_reason: STOP
  safety_ratings {
    category: HARM_CATEGORY_HATE_SPEECH
    probability: NEGLIGIBLE
    probability_score: 0.117572129
    severity: HARM_SEVERITY_NEGLIGIBLE
    severity_score: 0.159872144
  }
  safety_ratings {
    category: HARM_CATEGORY_DANGEROUS_CONTENT
    probability: NEGLIGIBLE
    probability_score: 0.355551094
    severity: HARM_SEVERITY_NEGLIGIBLE
    severity_score: 0.180539683
  }
  safety_ratings {
    category: HARM_CATEGORY_HARASSMENT
    probability: NEGLIGIBLE
    probability_score: 0.175679445
    severity: HARM_SEVERITY_NEGLIGIBLE
    severity_score: 0.131730705
  }
  safety_ratings {
    category: HARM_CATEGORY_SEXUALLY_EXPLICIT
    probability: NEGLIGIB

In [57]:

def parse_markdown_json(output_string):
    output_string = output_string.replace("""```json""","")
    output_string = output_string.replace("""```""","")
    return eval(output_string)


In [58]:
import os

# Specify the folder path
folder_path = 'sample/'

file_path_dict = {
    'path':[],
    'type':[],
}

# Loop through the files in the folder
for filename in os.listdir(folder_path):
    # Check if the file is an image
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tif')):
        file_path = os.path.join(folder_path, filename)
        file_type = filename.split('.')[-1].lower()
        file_path_dict['path'].append(file_path)
        file_path_dict['type'].append(file_type)

In [60]:
import time

output_list = []

for i in range(0, len(file_path_dict['path'])):
    if file_path_dict['type'][i] == 'jpg':
        file_path_dict['type'][i] = 'jpeg'
    print(file_path_dict['path'][i])
    print(file_path_dict['type'][i])
    output_raw = generate(

        file_path= file_path_dict['path'][i],
        prompt=prompt,
        file_type= file_path_dict['type'][i]
    )
    output_string = (output_raw.candidates[0].content.parts[0].text)
    print(output_string)
    time.sleep(60) ##has a limit on calling quota, work out later
    output_list.append(parse_markdown_json(output_string))

    



sample/receipt.jpg
jpeg
{
  "date_of_spending": "19/09/05",
  "location_of_spending": "百味閣餐廳",
  "spending_company": "",
  "dollar_spent": "342.00"
}
sample/3C34AQ7763FEF1FBF7ABC1lv.jpg
jpeg
{
  "date_of_spending": "1 July, 2015",
  "location_of_spending": "新城市廣場店 香港新界沙田正街2-8號 新城市廣場第三期一樓A132號店鋪,",
  "spending_company": "MACCHA HOUSE 游抹茶館",
  "dollar_spent": "454.30"
}
sample/sample2.png
png
```json
{
  "date_of_spending": "2024/05/06",
  "location_of_spending": "landmark Prince's Bul 10 Chater Road, central",
  "spending_company": "BOOKAZINE",
  "dollar_spent": "39.0"
}
```
sample/3BQ8250CF74DCEE80C7565lv.jpg
jpeg
```json
{
  "date_of_spending": "24/12/2014",
  "location_of_spending": "6 PAK SHA ROAD CAUSEWAY BAY HONG KONG",
  "spending_company": "T.P.K RESTAURANT",
  "dollar_spent": "1,951"
}
```


In [61]:
output_list

[{'date_of_spending': '19/09/05',
  'location_of_spending': '百味閣餐廳',
  'spending_company': '',
  'dollar_spent': '342.00'},
 {'date_of_spending': '1 July, 2015',
  'location_of_spending': '新城市廣場店 香港新界沙田正街2-8號 新城市廣場第三期一樓A132號店鋪,',
  'spending_company': 'MACCHA HOUSE 游抹茶館',
  'dollar_spent': '454.30'},
 {'date_of_spending': '2024/05/06',
  'location_of_spending': "landmark Prince's Bul 10 Chater Road, central",
  'spending_company': 'BOOKAZINE',
  'dollar_spent': '39.0'},
 {'date_of_spending': '24/12/2014',
  'location_of_spending': '6 PAK SHA ROAD CAUSEWAY BAY HONG KONG',
  'spending_company': 'T.P.K RESTAURANT',
  'dollar_spent': '1,951'}]

In [65]:
import pandas as pd

consolidated_data = {}

for d in output_list:
    for k, v in d.items():
        if k not in consolidated_data:
            consolidated_data[k] = [v]
        else:
            consolidated_data[k].append(v)

pd.DataFrame(consolidated_data)

Unnamed: 0,date_of_spending,location_of_spending,spending_company,dollar_spent
0,19/09/05,百味閣餐廳,,342.0
1,"1 July, 2015","新城市廣場店 香港新界沙田正街2-8號 新城市廣場第三期一樓A132號店鋪,",MACCHA HOUSE 游抹茶館,454.3
2,2024/05/06,"landmark Prince's Bul 10 Chater Road, central",BOOKAZINE,39.0
3,24/12/2014,6 PAK SHA ROAD CAUSEWAY BAY HONG KONG,T.P.K RESTAURANT,1951.0


In [29]:
from llama_index.multi_modal_llms.gemini import GeminiMultiModal

In [31]:
GOOGLE_API_KEY="AIzaSyDU_-52E8608WwlB0SM8gD8etQhNkjiUDI"

import os

os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY

gemini_pro = GeminiMultiModal(model_name="models/gemini-pro-vision")

FailedPrecondition: 400 User location is not supported for the API use.

In [12]:
prompt = """

The provided is a generic expense day-to-day receipt, please extract the fields listed and respond in JSON format:

Write "" into the unrecognizable field if necessary

Reponse Format:
{
  "date_of_spending": <extraction>,
  "location_of_spending": <extraction>,
  "spending_company": <extraction>,
  "dollar_spent": <extraction>,
  "shop_staff": <extraction>

}

"""

In [20]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(
    input_files = ["/Users/Caius.Chun/BOC_Competition/vertex_ai_gemini/sample/3BQ8250CF74DCEE80C7565lv.jpg"]
    # input_dir="/Users/Caius.Chun/BOC_Competition/vertex_ai_gemini/sample/3BQ8250CF74DCEE80C7565lv.jpg"
    )
documents = reader.load_data()

In [22]:
documents

[ImageDocument(id_='c792af11-c7cd-4dc6-9d02-eb679e119d19', embedding=None, metadata={'file_path': '/Users/Caius.Chun/BOC_Competition/vertex_ai_gemini/sample/3BQ8250CF74DCEE80C7565lv.jpg', 'file_name': '3BQ8250CF74DCEE80C7565lv.jpg', 'file_type': 'image/jpeg', 'file_size': 79029, 'creation_date': '2024-06-13', 'last_modified_date': '2024-06-13'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n', image=None, image_path='/Users/Caius.Chun/BOC_Competition/vertex_ai_gemini/sample/3BQ8250CF74DCEE80C7565lv.jpg', image_url=None, image_mimetype=None, text_embedding=None)]

In [25]:
complete_response = gemini_pro.complete(

	prompt=prompt,
	
	image_documents=[documents[0]],

)

FailedPrecondition: 400 User location is not supported for the API use.

In [16]:
print(complete_response.text)

 ```json
{
  "date_of_spending": "Wednesday, 1 July, 2015",
  "location_of_spending": "Maccha House New City Plaza",
  "spending_company": "KITTY",
  "dollar_spent": "HK$454.30",
  "shop_staff": ""
}
```


In [36]:
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_openai import OpenAI
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

import os

os.environ["SERPER_API_KEY"] = "f8131ba5315d4c187f9e02aaa00ea22bc01e5673"
os.environ['OPENAI_API_KEY'] = "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"

llm = OpenAI(temperature=0)
search = GoogleSerperAPIWrapper()
tools = [
    Tool(
        name="Search",
        func=search.run,
        description="Always ask with search"
    )
]

self_ask_with_search = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
result = self_ask_with_search.run("""
                                  
                        The following is the location or the company name of a merchant in Hong Kong:

                        MACCHA HOUSE
                        新城市廣場店
                        香港新界沙田正街2-8號
                        新城市廣場第三期一樓A132號店鋪
                                  
                                                        
                        Please help look up the shop and location to help determine what kind of shop it is in reference to the following category:
                        * food
                        * transport
                        * glossary
                        * shopping
                        * bill
                        * home

                        Output your final answer in JSON format:
                                                        
                        {
                        "category": <selection>,
                        "reasoning": <reasoning>
                                                        
                        }
                                                      
                                  
                                   """)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should use search to find information about MACCHA HOUSE in Hong Kong.
Action: Search
Action Input: MACCHA HOUSE Hong Kong[0m
Observation: [36;1m[1;3mMaccha House: Restaurant. Maccha House Address: Tai Hom荷里活廣場1樓134號舖, 3號 Lung Poon St, Diamond Hill, Hong Kong. Maccha House Hours: Closes soon ⋅ 10:30 PM ⋅ Opens 11:30 AM Fri. Maccha House Phone: +852 2799 0932. 抹茶館 (Plaza Hollywood)Maccha House (Plaza Hollywood)'s Address, Telephone Number, Ratings, Reviews, Photos and Menu, located at Shop 134, 1/F, ... Maccha House (Plaza Hollywood), Hong Kong: See 7 unbiased reviews of Maccha House (Plaza Hollywood), rated 3.5 of 5 on Tripadvisor and ranked #8995 of 13320 ... 抹茶館Maccha House's Address, Telephone Number, Ratings, Reviews, Photos and Menu, located at Shop 325, 3/F, Plaza Hollywood, 3 Lung Poon Street Diamond ... The Maacha House chain serves Quality budget matcha tea and desserts, with a tea set starting at $19 HKD. Wort

In [38]:
eval(result)

{'category': 'food',
 'reasoning': 'Based on the search results, Maccha House is a Japanese restaurant that serves matcha tea and desserts. Therefore, it falls under the food category.'}

In [40]:
import re

s = "1,956"
num_as_double = float(re.search(r'\d+', s).group())

In [41]:
num_as_double

1.0

In [46]:
strings = ["194Dollars", "1,946", "$1234.000"]
for s in strings:
    s = s.replace('$', '').replace(',', '')  # remove $ and ,
    match = re.search(r'(\d+)', s)
    if match:
        num = float(match.group(0))
        print(num)

194.0
1946.0
1234.0


In [45]:
import pandas as pd
def standardize_datetime(date_str, input_formats=None):
  """
  Standardizes a string into a datetime object.

  Args:
    date_str: The string representing the date.
    input_formats: A list of possible date formats. If None, a default list of formats is used.

  Returns:
    A datetime object representing the date, or None if the string cannot be parsed.
  """

  # Define default input formats
  if input_formats is None:
    input_formats = [
        '%Y/%m/%d',  # 2024/05/06
        '%d/%m/%Y',  # 25/12/2009
        '%Y年%m月',  # 2011年3月
    ]

  for fmt in input_formats:
    try:
      dt = pd.to_datetime(date_str, format=fmt)
      return dt
    except ValueError:
      pass

  return None

# Example usage
date_str1 = '2024/05/06'
date_str2 = '25/12/2009'
date_str3 = '2011年3月'

dt1 = standardize_datetime(date_str1)
dt2 = standardize_datetime(date_str2)
dt3 = standardize_datetime(date_str3)

print(f"Date 1: {dt1}")
print(f"Date 2: {dt2}")
print(f"Date 3: {dt3}")

Date 1: 2024-05-06 00:00:00
Date 2: 2009-12-25 00:00:00
Date 3: 2011-03-01 00:00:00
