In [100]:
import pandas as pd
from langchain_ollama.llms import OllamaLLM
df = pd.read_csv('./data/nordea_kiiski_2024.csv', sep=';')

In [101]:
df = df.rename(columns={'Kirjauspäivä':'postdate', 'Määrä':'amount',\
                        'Maksaja':'from', 'Maksunsaaja':'to',\
                        'Nimi':'name', 'Otsikko':'desc',\
                        'Viitenumero':'ref', 'Saldo':'balance',\
                        'Valuutta':'curr'})

In [102]:
df = df.drop(['postdate', 'from','to', 'ref'], axis=1)

In [103]:
df['amount'] = df['amount'].str.replace(',', '.').astype(float)

In [104]:
#df.shape
df.drop(df.columns[-1],axis=1,inplace=True)

In [105]:
df.head(4)

Unnamed: 0,amount,name,desc,balance,curr
0,-44.0,Paytrail Oyj Musacorner Oy,Paytrail Oyj Musacorner Oy,27781,EUR
1,-51.98,VERKKOKAUPPA.COM,VERKKOKAUPPA.COM,32181,EUR
2,-11.27,Paytrail Oyj Ecolor Oy,Paytrail Oyj Ecolor Oy,37379,EUR
3,-275.0,Paytrail Oyj wwwtiedekoulufi,Paytrail Oyj wwwtiedekoulufi,38506,EUR


In [106]:
df_final = df.loc[:,['desc','amount']].loc[df['amount']<0]  

In [107]:
df_final.head(3)

Unnamed: 0,desc,amount
0,Paytrail Oyj Musacorner Oy,-44.0
1,VERKKOKAUPPA.COM,-51.98
2,Paytrail Oyj Ecolor Oy,-11.27


In [108]:
llm = OllamaLLM(model='my-finamodel')

In [109]:
all_exp_df = pd.DataFrame()
sample_size=10
chunk_size=5
#sample_size = len(df_final)
full_output = ''
for i in range(0,sample_size,chunk_size):
    chunk = df_final.iloc[i:i+chunk_size,:] 
    input = "" 
    for idx, line in chunk.iterrows():
        ltxt = str(line[0]) + ',' + str(line[1])
        input = input + ltxt + '\n'
    print(f'Processing lines {i} to {i+chunk_size}')
    output = llm.invoke(input)
    full_output = full_output + '\n' + output

  ltxt = str(line[0]) + ',' + str(line[1])


Processing lines 0 to 5


  ltxt = str(line[0]) + ',' + str(line[1])


Processing lines 5 to 10


In [110]:
full_output.split('\n')

['',
 '<h1>Example</h1>',
 '<h2>Input:</h2> ',
 'Paytrail Oyj Musacorner Oy, -44.0',
 'VERKKOKAUPPA.COM, -51.98',
 'Paytrail Oyj Ecolor Oy, -11.27',
 'Paytrail Oyj wwwtiedekoulufi, -275.0',
 'Haukkasalon tiekunta lossitili, -37.0',
 'NIITTUNIEMEN YKSITYISTIEN TIEK, -211.24, utilities',
 'Vihreä Älyenergia Oy, -21.77, utilities',
 'TALLELOKERO, -7.0, sirte',
 'Asunto Oy Helsingin H, -561.15, housing',
 'Vattenfall Oy, -107.55, utilities']

In [111]:
from pydantic import BaseModel, field_validator, ValidationError
from typing import List
class ValidResponseLine(BaseModel):
    line: str

    @field_validator('line')
    def check(cls, value):
        assert len(value.split(',')) ==3, 'No content or category'
        return value


In [112]:
l1 =[] 
for l in full_output.split('\n'):
    try:
        ValidResponseLine(line=l)
    except ValidationError:
        continue
    l2 = l.split(',')
    l1.append(l2)

In [113]:
l1

[['NIITTUNIEMEN YKSITYISTIEN TIEK', ' -211.24', ' utilities'],
 ['Vihreä Älyenergia Oy', ' -21.77', ' utilities'],
 ['TALLELOKERO', ' -7.0', ' sirte'],
 ['Asunto Oy Helsingin H', ' -561.15', ' housing'],
 ['Vattenfall Oy', ' -107.55', ' utilities']]