### IMPORTANT : Read Readme section "Run model locally using Ollama" for setting up the model locally

In [14]:
! ollama create expense_analyzer_llama3 -f Modelfile

[?25ltransferring model data 
reading model metadata 
creating system layer 
creating parameters layer 
creating config layer 
using already created layer sha256:00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29 
using already created layer sha256:4fa551d4f938f68b8c1e6afa9d28befb70e3f33f75d0753248d530364aeea40f 
using already created layer sha256:8ab4849b038cf0abc5b1c9b8ee1443dca6b93a045c2272180d985126eb40bf6f 
writing layer sha256:aaa285176a6644a02f0f36f1110d8732658803188f36f2f115291fbcf970f9cb 
using already created layer sha256:d02f8eeff855caf58609588ee111469b8b4f8dc81315c278779901e23a25eddb 
writing layer sha256:3d9a7f4ea65e363a90ada992cc6e8bf5a94101cef82fe7b41bb4238ea2d4cb56 
writing manifest 
success [?25h


In [15]:
! ollama list

NAME                          	ID          	SIZE  	MODIFIED       
expense_analyzer_llama3:latest	cbe4d141a6d5	4.7 GB	18 seconds ago	
llama3:latest                 	a6990ed6be41	4.7 GB	10 days ago   	


In [None]:
! pip install langchain_community pandas

In [2]:
! pip freeze > requirements.txt

In [5]:
from langchain_community.llms import Ollama

In [7]:
# Test llm
llm = Ollama(model="expense_analyzer_llama3")
llm.invoke("what category does money spent on a dentist fall under?")

'As a financial assistant, I would categorize money spent on a dentist as "Healthcare" or more specifically, "Medical/Dental Expenses". This category includes any costs related to medical care, including dental procedures, doctor visits, and prescriptions. In QuickBooks, for example, this would likely be classified under the "Healthcare" expense account.'

In [12]:
# Process credit card bill
import pandas as pd

df = pd.read_csv("4988-xxxx-xxxx-6116_Transactions_2024-01-01_2024-05-08.csv")
df

Unnamed: 0,Card,Type,Amount,Details,TransactionDate,ProcessedDate,ForeignCurrencyAmount,ConversionCharge
0,4988-****-****-6568,D,2.90,7 Mart Convenient St Auckland Nz,06/05/2024,07/05/2024,,
1,4988-****-****-6568,D,5.10,Bean Grinding Auckland Nz,05/05/2024,06/05/2024,,
2,4988-****-****-6568,D,3.76,Seed Heritage Milford Nz,05/05/2024,06/05/2024,,
3,4988-****-****-6568,D,192.91,Pak N Save Wairau Road Northshore Ci Nz,04/05/2024,06/05/2024,,
4,4988-****-****-6568,D,7.50,Woolworths Nz/Glenfiel Glenfld Mall Nz,04/05/2024,06/05/2024,,
...,...,...,...,...,...,...,...,...
452,4988-****-****-6568,D,65.28,Regatta Bar & Eatery Takapuna Nz,30/12/2023,03/01/2024,,
453,4988-****-****-6568,D,3.99,Chemist Warehouse Newmarket Nz,29/12/2023,03/01/2024,,
454,4988-****-****-6568,D,20.00,Punjabi Kitchen Wairua Valley Nz,29/12/2023,03/01/2024,,
455,4988-****-****-6568,D,9.30,Gong Cha Westfield New Auckland Nz,29/12/2023,03/01/2024,,


In [20]:
# get unique transactions
unique_transactions = df["Details"].unique()
print(unique_transactions)
f"Unique transactions count : {len(unique_transactions)}"

['7 Mart Convenient St   Auckland      Nz '
 'Bean Grinding          Auckland      Nz '
 'Seed Heritage          Milford       Nz '
 'Pak N Save Wairau Road Northshore Ci Nz '
 'Woolworths Nz/Glenfiel Glenfld Mall  Nz '
 'Namaste Indian Superma Auckland      Nz '
 'Pizza Hutt Glenfield M Auckland      Nz '
 'Pasta Cuore            Mount Eden    Nz '
 'Liquorland Glenfield   Auckland      Nz '
 'Glenfield 7 Day Phar   Auckland      Nz '
 'Noughts Crosses Cake   Mount Eden    Nz '
 'Kara Cafe              Auckland      Nz '
 'Milford Optometrists   Milford       Nz '
 'Unichem Fred Thomas    Ellerslie     Nz '
 'Hell Pizza Glenfield   Auckland      Nz '
 'Mojo Victoria Square   Auckland      Nz '
 'Southern Cross Health  Auckland      Nz '
 'Pak N Save Fuel Wairau Wairau Valley Nz '
 'Chemist Warehouse      Glenfield     Nz '
 'Eb Games 308           Glenfield     Nz '
 'Amazon Music Unlimited Sydney South  Au '
 'Mcdonalds Glenfield    Auckland      Nz '
 'The Grange             Aucklan

'Unique transactions count : 180'

In [21]:
# Index List to batch transactions into 30 each
def hop(start, stop, step):
    for i in range(start, stop, step):
        yield i
    
    yield stop

index_list = list(hop(0, len(unique_transactions),30))
index_list

[0, 30, 60, 90, 120, 150, 180]

In [22]:
def categorize_transactions(transaction_names,llm):
    response = llm.invoke("Can you add an appropriate category to the following expenses. For example: Spotify AB by Adyen - Entertainment, Beta Boulders Ams Amsterdam Nld - Sport, etc.. Categories should be less than 4 words. " + transaction_names)
    response = response.split('\n')

    print(response)

    # Put in dataframe
    categories_df = pd.DataFrame({'Transaction vs category': response})
    categories_df[['Transaction', 'Category']] = categories_df['Transaction vs category'].str.split(' - ', expand=True)
    
    return categories_df

In [23]:
# initialise categories dataframe
categories_df_all = pd.DataFrame()

# Loop through index list
for i in range(0, len(index_list)-1):
    transaction_names = unique_transactions[index_list[i]:index_list[i+1]]
    transaction_names = '.'.join(transaction_names)

    categories_df = categorize_transactions(transaction_names,llm)
    categories_df_all = pd.concat([categories_df_all, categories_df], ignore_index=True)

['Here are the categorized expenses:', '', '* 7 Mart Convenient St - Grocery', '* Bean Grinding - Grocery', '* Seed Heritage - Clothing', '* Pak N Save Wairau Road Northshore Ci - Grocery', '* Woolworths Nz/Glenfiel Glenfld Mall - Grocery', '* Namaste Indian Superma - Food', '* Pizza Hutt Glenfield M - Food', '* Pasta Cuore - Food', '* Liquorland Glenfield - Liquor', '* Glenfield 7 Day Phar - Pharmacy', '* Noughts Crosses Cake - Bakery', '* Kara Cafe - Coffee', '* Milford Optometrists - Health', '* Unichem Fred Thomas - Health', '* Hell Pizza Glenfield - Food', '* Mojo Victoria Square - Entertainment', '* Southern Cross Health - Insurance', '* Pak N Save Fuel Wairau Wairau Valley - Gasoline', '* Chemist Warehouse - Pharmacy', '* Eb Games 308 - Entertainment', '* Amazon Music Unlimited - Entertainment', '* Mcdonalds Glenfield - Fast Food', '* The Grange - Home Services', '* Movenpick Takapuna - Accommodation', '* Clarence Cafe - Coffee', '* Online Payment - Miscellaneous', '* Woolworths

ValueError: Columns must be same length as key