In [None]:
import pandas as pd

In [None]:
from splitwise_sync.config import RAW_DIR, PROCESSED_DIR
from splitwise_sync.core.models import Transaction

# Reading data

In [None]:
from splitwise_sync.ml.cleaning import read_expenses
import json


def get_transaction_details(expense_details: str) -> dict:
    """Get transaction details from the expense details string."""

    if not isinstance(expense_details, str):
        return None

    lines = expense_details.splitlines()
    if len(lines) == 0:
        return None

    lastline = lines[-1]
    try:
        t = json.loads(lastline)
        return Transaction(**t)
    except TypeError:
        return None
    except json.JSONDecodeError:
        return None


def read_expenses_feedback(path: str) -> pd.DataFrame:
    """Read the expenses feedback from a JSON file."""
    df = read_expenses(path)
    df["transaction"] = df["expense_details"].map(get_transaction_details)
    df = df[df["transaction"].notna()]
    transactions = df["transaction"].apply(lambda x: x.to_series())
    ans = pd.concat([df.drop(columns=["transaction"]), transactions], axis=1)
    ans["is_shared"] = ans["expense_deleted_at"].notna()
    return ans


expenses_feedback = read_expenses_feedback(str(RAW_DIR / "dump-splitwise.json"))
expenses_feedback

In [None]:
expenses_feedback.to_pickle(PROCESSED_DIR / "expenses_feedback.pkl")

In [None]:
matched_transactions_locs = pd.read_pickle(
    PROCESSED_DIR / "matched_transactions_locs.pkl"
)

In [None]:
matched_transactions_with_feedback = pd.concat(
    [expenses_feedback, matched_transactions_locs], axis=0, ignore_index=True
)
matched_transactions_with_feedback

In [None]:
matched_transactions_with_feedback.to_pickle(
    PROCESSED_DIR / "matched_transactions_locs_with_feedback.pkl"
)