In [1]:
import sys
import os
from pathlib import Path
from typing import Any

import pandas as pd

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src.utils.google_services import set_up_google_connection
from src.utils.data_extracting import get_acc_files_from_gdrive_folder, extract_text_from_pdf, get_balance_of_account, get_all_transactions, check_income_or_expense, get_transaction_value


# Functions

# Setup

In [2]:
credentials_path = Path('../credentials/cool-plasma-452619-v4-feb20b70d461.json')
client, service = set_up_google_connection(credentials_path)

Using local JSON credentials


In [3]:
SPREADSHEET_ID = os.environ.get('SPREADSHEET_ID')
TEMP_FOLDER_ID = os.environ.get('TEMP_FOLDER_ID')
REGULAR_FOLDER_ID = os.environ.get('REGULAR_FOLDER_ID')

In [4]:
acc_states = get_acc_files_from_gdrive_folder(TEMP_FOLDER_ID, service)
acc_states

[{'id': '1P7IxxT9vNULZjsNrrH-9awJp_lahNROA',
  'name': '2115089016_2023_Nr.001_Kontoauszug.pdf'}]

In [5]:
file_id = acc_states[0]['id']
text = extract_text_from_pdf(file_id, service=service)
lines = text.split('\n')
len(lines)

131

In [6]:
acc_balance_old = get_balance_of_account(lines, 'alter Kontostand')
acc_balance_new = get_balance_of_account(lines, 'neuer Kontostand')
print(f'Old acc value, Value: {acc_balance_old[0]}€ - Line-Index: {acc_balance_old[1]}')
print(f'New acc value, Value: {acc_balance_new[0]}€ - Line-Index: {acc_balance_new[1]}')

all_transactions = get_all_transactions(lines, acc_balance_old[1], acc_balance_new[1])
print(f'Count of transactions: {len(all_transactions)}')

Old acc value, Value: 3401.18€ - Line-Index: 11
New acc value, Value: 2972.89€ - Line-Index: 99
Count of transactions: 17


# Open Spreadsheet

In [7]:
year = acc_states[0]['name'].split('_')[1]
print(year)
SPREADSHEET_ID = os.environ.get('SPREADSHEET_ID')
print(f'Using spreadsheet ID: {SPREADSHEET_ID}')

2023
Using spreadsheet ID: 1z_rr8nVFlF3PKIbO4v6xv_tSswop7sJRzRftzWzPdE8


In [8]:
spreadsheet = client.open_by_key(SPREADSHEET_ID)

sheet_transactions = spreadsheet.worksheet('Transaktionen')
df_sheet_transactions = pd.DataFrame(sheet_transactions.get_all_values())

In [9]:
df_sheet_transactions.columns = df_sheet_transactions.iloc[0]
df_sheet_transactions = df_sheet_transactions[1:].reset_index(drop=True)

In [10]:
df_sheet_transactions.head()

Unnamed: 0,Wer,Betrag,Art,Datum,Kategorie,Hinweis


# Add transactions to Gsheet -> Change!

In [None]:
from datetime import datetime
for transaction_index in range(len(all_transactions)):
    transaction = all_transactions[transaction_index]

    print(transaction)

    transaction_value = get_transaction_value(transaction)
    print(transaction_value)
    transaction_type = check_income_or_expense(transaction)
    print(transaction_type)
    name = transaction[1].strip()
    print(name)
    date_str = str(transaction[0].split(' ')[0] + year)
    date = datetime.strptime(date_str, '%d.%m.%Y').date()
    print(date)

    df_sheet_transactions.loc[len(df_sheet_transactions)] = [name, transaction_value, transaction_type, date, None, None] # Neue Zeile


['02.01. 02.01. Überweisungsauftrag 507,60 S', 'Elisabeth Heuser', 'Elisabeth HeuserGolf IBAN: DE41500105175409388951 BIC: INGDDEFFXXX']
507.6
Ausgabe
Elisabeth Heuser
2023-01-02
['02.01. 02.01. Dauerauftragsbelast 10,00 S', 'Freundeskreis Elisabeth-Hospiz e.V.', 'Freundeskreis Elisabeth-Hospiz e.V.Spende /*DA-2* IBAN: DE70370695202107385013 BIC: GENODED1RST']
10.0
Ausgabe
Freundeskreis Elisabeth-Hospiz e.V.
2023-01-02
['02.01. 02.01. Kartenzahlung girocard 10,37 S', 'KAUFLAND', 'KAUFLANDKaufland Lohmar/Lohmar/DE31.12.2022 um 09:39:46 Uhr 61347200/159578/ECTL/37069520/2115089016/1/1225']
10.37
Ausgabe
KAUFLAND
2023-01-02
['02.01. 02.01. Kartenzahlung girocard 254,00 S', '11teamsports Koeln GmbH', '11teamsports Koeln GmbH11teamsports Koeln GmbH/Koeln/DE30.12.2022 um 17:02:27 Uhr 60533803/11992/CICC/FPIN37069520/2115089016/1/1225 REF 743710/260047']
254.0
Ausgabe
11teamsports Koeln GmbH
2023-01-02
['03.01. 03.01. Dauerauftragsgutschr 60,00 H', 'Heinz-Dieter Heuser', 'Heinz-Dieter HeuserT

In [12]:
df_sheet_transactions

Unnamed: 0,Wer,Betrag,Art,Datum,Kategorie,Hinweis
-1,Deutsche Post AG NL Renten Service,282.1,Einnahme,2023-01-31,,
