In [4]:
# Cell 1: Imports and Constants
import os.path
import uuid
from config import *

OP_FAMILY_ID = '50ca9746-f13a-4b67-adbb-1fe8f7f28439'
OP_EUR_ID = 'f02f21c3-2686-4c78-a3da-cc4c776fba93'
OP_MONEY_BOX = '052b718d-74f1-4e25-b1fa-5f7b9e7a7ca4'

from op import OPReader
import zenmoney

In [5]:
# Cell 2: load_or_sync function
def load_or_sync(filename, token, out_diff=None):
    conn = zenmoney.ZenConnection(token)
    if os.path.exists(filename):
        print('Syncing...')
        zen = zenmoney.Zenmoney.load(filename)
        conn.sync_timestamp = zen.server_timestamp
        diff = conn.sync(diff=out_diff)
        zen.apply_diff(diff)
        zen.write(filename)
        print('Sync done.')
    else:
        print('Getting initial data...')
        zen = zenmoney.Zenmoney(conn.sync())
        zen.write(filename)
        print('Done.')
    return zen

In [6]:
# Cell 3: get_updates function
def get_updates(zen, _filename, _acc_id):
    diff = {'transaction': []}

    op = OPReader(
        filename=_filename,
        zen_id=_acc_id,
        instrument_id=3)
    for line in op.read():
        def check(_zen, _op):
            _f = ['date']
            if _op['income'] > 0:
                _f += ['income']
                _f += ['incomeAccount']
            else:
                _f += ['outcome']
                _f += ['outcomeAccount']

            for _n in _f:
                if not _zen.get(_n, None) == _op.get(_n, None):
                    return False

            if _zen.get('deleted'):
                print(_op)
            return True

        search = [x for x in zen.transaction if check(x, line)]

        if len(search) == 0:
            line.update({
                'id': str(uuid.uuid4()),
                'created': line['changed'],
                'user': ZEN_USER,
                'deleted': False,
                'tag': [],
                'merchant': None,
                'reminderMarker': None,
                'incomeBankID': None,
                'outcomeBankID': None,
                'opIncome': None,
                'opOutcome': None,
                'opIncomeInstrument': None,
                'opOutcomeInstrument': None,
                'latitude': None,
                'longitude': None,
            })
            diff['transaction'].append(line)
    return diff

In [8]:
# Cell 4: Main logic (run interactively)
token = ZEN_API_TOKEN
filename = 'zenmoney.json'

zen = load_or_sync(filename, token)

Syncing...
Sync done.


In [17]:
import pandas as pd

# Assuming `zen` is your loaded Zenmoney object
df = pd.DataFrame(zen.transaction)
df.head()  # Display the first few rows

Unnamed: 0,id,user,date,income,outcome,changed,incomeInstrument,outcomeInstrument,created,originalPayee,...,opIncome,opOutcome,opIncomeInstrument,opOutcomeInstrument,latitude,longitude,merchant,incomeBankID,outcomeBankID,reminderMarker
0,8c3f4650-4529-4a1b-8181-da03fd3af672,85919,2018-11-30,0.0,53076.6,1565611212,2,2,1543566310,,...,,,,,,,9cff7bac-8f35-4e9c-b15b-80feae8cfe92,,,
1,6a4350d5-5eb8-4f9c-9e1c-9ec8f95e8b9b,85919,2018-12-28,227.03,227.03,1565611212,2,2,1546016182,Леночка,...,0.0,0.0,,,59.959991,30.325079,b6186ce1-3506-41da-90f4-ac402afa1855,,,
2,cb3d93a6-f8bd-468a-b3fb-01a9fbcab85a,85919,2014-11-13,10100.0,10100.0,1684498613,2,2,1415835908,Рэдрик,...,0.0,0.0,,,,,,,,85fd570f-f54c-4297-a33c-7e7da6fe1984
3,eee66817-41b1-442f-b7d3-4b991adcd932,85919,2014-05-26,2000.0,2000.0,1684498613,2,2,1401115647,,...,0.0,0.0,,,59.95881,30.403931,9cff7bac-8f35-4e9c-b15b-80feae8cfe92,,,
4,c87ae2e9-88a1-441f-820d-938a68fb6502,85919,2013-08-30,300.0,300.0,1675113410,2,2,1377976328,,...,0.0,0.0,,,,,8584ab68-9e17-4fe9-aa90-580292f497ac,,,


In [23]:
import pandas as pd
from datetime import datetime, timedelta

# Ensure 'date' is datetime
df['date'] = pd.to_datetime(df['date'])

# Calculate last year range
today = pd.Timestamp.today().normalize()
one_year_ago = today - pd.DateOffset(years=1)

# Filter for last year
df_last_year = df[(df['date'] >= one_year_ago) & (df['date'] < today)]
df_last_year = df_last_year[df_last_year['deleted'] == False]

In [24]:
df_last_year.head()

Unnamed: 0,id,user,date,income,outcome,changed,incomeInstrument,outcomeInstrument,created,originalPayee,...,opIncome,opOutcome,opIncomeInstrument,opOutcomeInstrument,latitude,longitude,merchant,incomeBankID,outcomeBankID,reminderMarker
233,685362b1-d5ca-4129-8b98-525ef35f7fd5,85919,2024-11-26,4.65,4.65,1733925405,3,3,1732598894,Yi Bin Qu Zhu Ge Zhen Yi,...,0.0,0.0,,,,,b6186ce1-3506-41da-90f4-ac402afa1855,,[revolut][reg]-67455c6d-95c4-af78-a728-32a2d23...,
234,9db6170e-c31c-4f3d-ae0a-79ae090cd670,85919,2024-11-26,0.34,0.34,1733925405,3,3,1732598977,Yi Bin Qu Zhu Ge Zhen Yi,...,0.0,0.0,,,,,,,[revolut][reg]-67455cc0-5d7b-a06f-9d8c-03b800e...,
235,b986fba1-c68e-4584-bdd8-8912863cd82b,85919,2024-10-19,40.0,40.0,1730062027,3,3,1729350900,www.piletilevi.ee,...,0.0,0.0,,,,,b6186ce1-3506-41da-90f4-ac402afa1855,,[revolut][reg]-6713ccf4-8d70-ab8d-9af0-cdb2b6e...,
236,ff3de359-0954-47c4-a84b-4bb10ba1be58,85919,2024-10-15,12.0,12.0,1729169537,3,3,1728986077,Ateneum,...,0.0,0.0,,,,,b6186ce1-3506-41da-90f4-ac402afa1855,,[revolut][reg]-670e3bdc-66f8-a466-bd1a-d6ac9ee...,
237,2385bd8d-63cd-4a8d-822b-e003baec8aa6,85919,2024-10-14,3.9,3.9,1728996436,3,3,1728883827,Smartseller Si D.o.,...,0.0,0.0,,,,,b6186ce1-3506-41da-90f4-ac402afa1855,,[revolut][reg]-670cac72-7743-ab86-b72d-f4d47ce...,


In [41]:
import pandas as pd
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Prepare target: use first tag if it's a list, else as is
df_last_year = df_last_year.copy()
df_last_year['tag_first'] = df_last_year['tag'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None)

# Drop rows with missing target
df_train = df_last_year.dropna(subset=['tag_first'])

# Count occurrences of each class
class_counts = df_train['tag_first'].value_counts()

# Keep only classes with at least 2 samples
valid_classes = class_counts[class_counts >= 2].index
df_train_filtered = df_train[df_train['tag_first'].isin(valid_classes)]

# Select features (drop columns you don't want to use)
# features = [col for col in df_train.columns if col not in ['tag', 'tag_first', 'id', 'created', 'user', 'deleted']]
features = [
    'date', 'income', 'outcome', 'incomeAccount', 'outcomeAccount',
    'merchant']
X = df_train_filtered[features]
y = df_train_filtered['tag_first']

# Identify categorical features (example: 'incomeAccount', 'outcomeAccount', etc.)
cat_features = [col for col in X.columns if X[col].dtype == 'object']

# Fill missing values
X = X.fillna('NA')

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train CatBoost
# model = CatBoostClassifier(verbose=100, cat_features=cat_features, iterations=500)
# model.fit(X_train, y_train)

y_pred = model.predict(X_test)
# 1. Create tag ID to name mapping
tag_map = {tag['id']: tag['title'] for tag in zen.tag}

# 2. Map y_test and y_pred to tag names
# y_test_names = y_test.map(tag_map)
# y_pred_names = pd.Series(y_pred, index=y_test.index).map(tag_map)

# 3. Get unique tag names in test set
# unique_tag_names = sorted(y_test_names.unique())

# 4. Print classification report with tag names

# print(classification_report(y_test_names, y_pred_names, target_names=unique_tag_names))

print(classification_report(y_test, y_pred))

                                      precision    recall  f1-score   support

1fa26058-f686-424d-b640-2952c682365d       1.00      0.40      0.57         5
25cf9bfc-7a49-426e-b391-29161705867b       0.33      0.20      0.25         5
28ceef21-7956-46d4-b175-5b8505e3e626       0.56      0.36      0.43        14
35ebc934-c293-4447-a5e2-89ba921b2355       0.00      0.00      0.00         1
4f378bee-8c4e-477f-aba1-f9cc1758c4e4       0.00      0.00      0.00         1
5c417dd2-ae3f-4a7f-b49b-5518765c4c72       0.20      0.17      0.18         6
5d900da7-55a0-43fc-ad91-13e9adff392e       0.00      0.00      0.00         2
67a6432d-82b9-4033-8a95-4c9b3a89afe5       0.59      0.70      0.64        23
6b4b833e-3faf-4a33-87ea-814d0ed9a4a0       0.42      0.42      0.42        12
735244eb-5d04-41ac-824a-b7169177d37c       0.00      0.00      0.00         1
73c785b9-e859-4a70-9aff-a4de411b30f2       0.56      0.38      0.45        13
746aca4e-699a-4fd4-98e0-5973214e0523       1.00      0.93      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [62]:
import pandas as pd

# 1. Filter for last month's data
today = pd.Timestamp.today().normalize()
one_month_ago = today - pd.DateOffset(months=1)
df_last_month = df[(df['date'] >= one_month_ago) & (df['date'] < today) & (df['deleted'] == False)].copy()

# 2. Prepare features (same as training)
features = [
    'date', 'income', 'outcome', 'incomeAccount', 'outcomeAccount',
    'merchant'
]
X_last_month = df_last_month[features].fillna('NA')

# 3. Predict tag IDs
y_pred_last_month = model.predict(X_last_month)

# # 4. Map predicted tag IDs to tag names
tag_map = {tag['id']: tag['title'] for tag in zen.tag}
df_last_month['predicted_tag'] = y_pred_last_month.ravel()
df_last_month['predicted_tag_name'] = df_last_month['predicted_tag'].map(tag_map)

# 5. Output transaction data with predicted tag name
output_cols = ['originalPayee', 'comment', 'payee'] + ['predicted_tag_name']
df_last_month[output_cols]

Unnamed: 0,originalPayee,comment,payee,predicted_tag_name
296,BKG*HOTEL AT BOOKIN (888)850-3958,Viesti: 492065******2407 OSTOPVM 250516MF NRO ...,Леночка,B Еда
22444,Анастасия Денисовна В.,Клиенту Сбера,Анастасия Денисовна В.,B Спорт/образование
24640,Ya Points Oy,,Ya Points Oy,BR Обеды Х
24641,,To Flexible Cash Funds,,Палладиевый кафель
24642,,To Flexible Cash Funds,,Палладиевый кафель
...,...,...,...,...
29199,NORMAL HELSINKI RED HELSINKI,Viesti: 492065******9088 OSTOPVM 250515MF NRO ...,NORMAL HELSINKI RED HELSINKI,B Карманные П
29200,Lidl Helsinki-Sorna Helsinki,Viesti: 492065******9088 OSTOPVM 250514MF NRO ...,Lidl Helsinki-Sorna Helsinki,B Карманные Х
29203,JUHLAMAAILMA RE Helsinki,Viesti: 492065******9088 OSTOPVM 250515MF NRO ...,JUHLAMAAILMA RE Helsinki,B Карманные П
29204,STARDENT OY Helsinki,Viesti: 492065******9088 OSTOPVM 250515MF NRO ...,STARDENT OY Helsinki,B Карманные Х


array(['67a6432d-82b9-4033-8a95-4c9b3a89afe5',
       '746aca4e-699a-4fd4-98e0-5973214e0523',
       '73c785b9-e859-4a70-9aff-a4de411b30f2',
       '8fbb76dc-b695-4326-998e-d762ee20a90f',
       '8fbb76dc-b695-4326-998e-d762ee20a90f',
       'da7c3f74-3a9b-4a32-96e7-cada052267b6',
       '8fbb76dc-b695-4326-998e-d762ee20a90f',
       'da7c3f74-3a9b-4a32-96e7-cada052267b6',
       '8fbb76dc-b695-4326-998e-d762ee20a90f',
       '8fbb76dc-b695-4326-998e-d762ee20a90f',
       'da7c3f74-3a9b-4a32-96e7-cada052267b6',
       '8fbb76dc-b695-4326-998e-d762ee20a90f',
       'da7c3f74-3a9b-4a32-96e7-cada052267b6',
       'da7c3f74-3a9b-4a32-96e7-cada052267b6',
       'c76a32f2-be8a-4621-b92c-6d215cdd40a3',
       '8fbb76dc-b695-4326-998e-d762ee20a90f',
       'c76a32f2-be8a-4621-b92c-6d215cdd40a3',
       '73c785b9-e859-4a70-9aff-a4de411b30f2',
       'da7c3f74-3a9b-4a32-96e7-cada052267b6',
       '8fbb76dc-b695-4326-998e-d762ee20a90f',
       '8fbb76dc-b695-4326-998e-d762ee20a90f',
       '8fbb7