In [58]:
import pandas as pd
import re
import numpy as np

In [63]:
def parse_transactions(text):
    # Split the text by date
    date_splits = re.split(r'(\d{2} (?:августа|июля), \d{4} года)', text)

    # Initialize lists to store transaction data
    dates = []
    times = []
    transaction_types = []
    mcc_codes = []
    merchant_infos = []
    local_currencies = []
    usd_amounts = []
    card_types = []

    # Iterate over the split text
    for i in range(1, len(date_splits), 2):
        date = date_splits[i]
        transactions = date_splits[i+1].split("\n")

        j = 0
        while j < len(transactions):
            if transactions[j].strip() == "":
                j += 1
                continue

            dates.append(date)
            times.append(transactions[j].strip())

            if j+1 < len(transactions):
                transaction_types.append(transactions[j+1].strip())
            else:
                transaction_types.append(None)
                break

            if j+2 < len(transactions):
                mcc_match = re.search(r'MCC (\d+)', transactions[j+2])
                mcc_codes.append(mcc_match.group(1) if mcc_match else None)
                merchant_infos.append(re.sub(r'MCC \d+ / ', '', transactions[j+2]).strip())
            else:
                mcc_codes.append(None)
                merchant_infos.append(None)
                break

            # Check for local currency and USD amounts
            if j+3 < len(transactions) and "USD" in transactions[j+3]:
                local_currencies.append(None)
                usd_amounts.append(transactions[j+3].strip())
                j += 5
            elif j+4 < len(transactions):
                local_currencies.append(transactions[j+3].strip())
                usd_amounts.append(transactions[j+4].strip())
                j += 6
            else:
                local_currencies.append(None)
                usd_amounts.append(None)
                break

            if j-1 < len(transactions):
                card_types.append(transactions[j-1].strip())
            else:
                card_types.append(None)

    # Ensure all lists have the same length by appending NaN values
    max_len = max(len(dates), len(times), len(transaction_types), len(mcc_codes), len(merchant_infos), len(local_currencies), len(usd_amounts), len(card_types))

    while len(local_currencies) < max_len:
        local_currencies.append(np.nan)
    while len(usd_amounts) < max_len:
        usd_amounts.append(np.nan)
    while len(card_types) < max_len:
        card_types.append(np.nan)

    # Create a pandas DataFrame
    df = pd.DataFrame({
        'Date': dates,
        'Time': times,
        'Transaction Type': transaction_types,
        'MCC Code': mcc_codes,
        'Merchant Information': merchant_infos,
        'Amount in Local Currency': local_currencies,
        'Amount in USD': usd_amounts,
        'Card Type': card_types
    })

    return df

In [65]:
trans_text = open('trans.txt', 'r').read()

In [66]:
df = parse_transactions(trans_text)

In [69]:
df.to_excel('results_mtbank.xlsx', index=False)

In [31]:
date_splits = re.split(r'(\d{2} (?:августа|июля), \d{4} года)', trans_text)

In [33]:
# Initialize lists to store transaction data
dates = []
times = []
transaction_types = []
mcc_codes = []
merchant_infos = []
local_currencies = []
usd_amounts = []
card_types = []

In [17]:
len(date_splits)

49