# SimpleFIN Data Exploration

This notebook allows you to interactively pull and explore your financial transaction data using the SimpleFIN Bridge.

In [None]:
import requests
import base64
import datetime
import json
import os
import pandas as pd
import psycopg2
from IPython.display import display, Markdown

ACCESS_FILE_NAME = 'simplefin_access_url.txt'

print(f"Current Working Directory: {os.getcwd()}")

# Try to find the file in the current directory or adjacent to this notebook
possible_paths = [
    ACCESS_FILE_NAME,
    os.path.join(os.getcwd(), 'simplefin', ACCESS_FILE_NAME), # In case we are running from root
    os.path.join(os.path.dirname(os.path.abspath('__file__')), ACCESS_FILE_NAME), # Best guess if __file__ worked (it doesn't in nbs usually)
    '/home/danielaroko/applications/data_exploration/simplefin/simplefin_access_url.txt' # Hardcoded fallback
]

access_file_path = None
for path in possible_paths:
    if os.path.exists(path):
        access_file_path = path
        break

print(f"Resolved Access File Path: {access_file_path}")

## Authentication

We need an Access URL to talk to SimpleFIN. 
1. If you have already run this, it will load from `simplefin_access_url.txt`.
2. If not, it will ask for a **Setup Token**.
   - Get a token here: [https://bridge.simplefin.org/simplefin/create](https://bridge.simplefin.org/simplefin/create)
   - Or use a demo token.

In [None]:
access_url = None

# 1. Try to load Access URL from file
if access_file_path and os.path.exists(access_file_path):
    try:
        with open(access_file_path, 'r') as f:
            content = f.read().strip()
            if content:
                access_url = content
                print(f"Loaded Access URL from {access_file_path}")
            else:
                print("Access file is empty.")
    except Exception as e:
        print(f"Error reading access file: {e}")

# 2. Only prompt if we still don't have an Access URL
if not access_url:
    print("No stored Access URL found.")
    setup_token = input('Enter your Setup Token: ').strip()
    
    if setup_token:
        try:
            # Decode base64 setup token
            claim_url = base64.b64decode(setup_token).decode('utf-8')
            print(f"Claiming access from: {claim_url}")
            
            response = requests.post(claim_url)
            response.raise_for_status()
            access_url = response.text
            
            # Save it
            save_path = access_file_path if access_file_path else ACCESS_FILE_NAME
            with open(save_path, 'w') as f:
                f.write(access_url)
            print(f"Access URL claimed and saved to {save_path}")
            
        except Exception as e:
            print(f"Error claiming token: {e}")
    else:
        print("No token provided. Cannot proceed.")

## Fetch Data

Now we use the Access URL to fetch account and transaction data.

In [None]:
if access_url:
    try:
        scheme, rest = access_url.split('//', 1)
        auth, rest = rest.split('@', 1)
        url = scheme + '//' + rest + '/accounts'
        username, password = auth.split(':', 1)
        
        print(f"Fetching data from: {url}")
        response = requests.get(url, auth=(username, password))
        response.raise_for_status()
        data = response.json()
        print("Data fetched successfully.")
        
    except Exception as e:
        print(f"Error fetching data: {e}")
        data = None
else:
    print("No Access URL available.")

## Explore Accounts and Transactions

We'll convert the data into two Pandas DataFrames: one for Accounts and one for Transactions.

In [None]:
def process_data(data):
    if not data or 'accounts' not in data:
        return pd.DataFrame(), pd.DataFrame()

    all_transactions = []
    all_accounts = []
    
    for account in data['accounts']:
        acct_name = account.get('name', 'Unknown')
        acct_id = account.get('id', '')
        acct_currency = account.get('currency', '')
        balance = account.get('balance', 0)
        balance_date = datetime.datetime.fromtimestamp(account.get('balance-date'))
        
        display(Markdown(f"### {acct_name}"))
        display(Markdown(f"**Balance:** {balance} {acct_currency} (as of {balance_date})"))
        
        # Store Account Data
        all_accounts.append({
            'account_id': acct_id,
            'name': acct_name,
            'currency': acct_currency,
            'balance': balance,
            'balance_date': balance_date
        })

        transactions = account.get('transactions', [])
        if transactions:
            df = pd.DataFrame(transactions)
            # Convert timestamps
            if 'posted' in df.columns:
                df['posted'] = pd.to_datetime(df['posted'], unit='s')
            
            # Add account info to the dataframe for reference
            df['account_id'] = acct_id
            
            # Ensure transaction id is clear
            if 'id' in df.columns:
                df.rename(columns={'id': 'transaction_id'}, inplace=True)
            
            all_transactions.append(df)
            
            # Display individual account transactions
            cols_to_show = ['posted', 'amount', 'description']
            if 'transaction_id' in df.columns:
                cols_to_show.append('transaction_id')
            display(df[cols_to_show].head())
        else:
            print("No transactions found.")
            
    df_accounts = pd.DataFrame(all_accounts)
    df_transactions = pd.concat(all_transactions, ignore_index=True) if all_transactions else pd.DataFrame()
    
    return df_accounts, df_transactions

df_accounts, df_transactions = process_data(data)

## Aggregate Analysis

All transactions combined for analysis.

In [None]:
if not df_transactions.empty:
    print(f"Total Transactions: {len(df_transactions)}")
    display(df_transactions.sort_values('posted', ascending=False).head(10))
else:
    print("No transactions to display.")

## Database Storage

Store the fetched accounts and transactions into the local PostgreSQL database.

In [None]:
def save_to_db(accounts_df, transactions_df):
    # Load .env variables manually
    env_vars = {}
    # Look for .env in common locations
    possible_envs = [
        '.env',
        '/home/danielaroko/applications/data_exploration/simplefin/.env'
    ]
    env_path = None
    for path in possible_envs:
        if os.path.exists(path):
            env_path = path
            break
            
    if env_path:
        try:
            with open(env_path, 'r') as f:
                for line in f:
                    if '=' in line:
                        key, value = line.strip().split('=', 1)
                        env_vars[key] = value
        except Exception as e:
            print(f"Error reading .env: {e}")
            return
    else:
         print("No .env file found.")
         return

    try:
        conn = psycopg2.connect(
            host=env_vars.get('DB_HOST', 'localhost'),
            database=env_vars.get('DB_NAME'),
            user=env_vars.get('DB_USER'),
            password=env_vars.get('DB_PASS')
        )
        cur = conn.cursor()
        
        # 1. Accounts Table
        create_accounts_query = """
        CREATE TABLE IF NOT EXISTS accounts (
            account_id VARCHAR(255) PRIMARY KEY,
            name VARCHAR(255),
            currency VARCHAR(10),
            balance NUMERIC(15, 2),
            balance_date TIMESTAMP,
            last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );
        """
        cur.execute(create_accounts_query)
        
        # 2. Transactions Table
        create_tx_query = """
        CREATE TABLE IF NOT EXISTS transactions (
            transaction_id VARCHAR(255) PRIMARY KEY,
            account_id VARCHAR(255) REFERENCES accounts(account_id),
            posted TIMESTAMP,
            amount NUMERIC(15, 2),
            description TEXT
        );
        """
        cur.execute(create_tx_query)
        conn.commit()
        
        # Upsert Accounts
        acc_inserted = 0
        if not accounts_df.empty:
            for index, row in accounts_df.iterrows():
                if 'account_id' not in row or pd.isna(row['account_id']):
                    continue
                insert_acc = """
                INSERT INTO accounts (account_id, name, currency, balance, balance_date)
                VALUES (%s, %s, %s, %s, %s)
                ON CONFLICT (account_id)
                DO UPDATE SET
                    name = EXCLUDED.name,
                    currency = EXCLUDED.currency,
                    balance = EXCLUDED.balance,
                    balance_date = EXCLUDED.balance_date,
                    last_updated = CURRENT_TIMESTAMP;
                """
                cur.execute(insert_acc, (
                    row['account_id'], row['name'], row['currency'], row['balance'], row['balance_date']
                ))
                acc_inserted += 1
        
        conn.commit()
        print(f"Processed {acc_inserted} accounts.")

        # Upsert Transactions
        tx_inserted = 0
        if not transactions_df.empty:
            for index, row in transactions_df.iterrows():
                if 'transaction_id' not in row or pd.isna(row['transaction_id']):
                    continue
                    
                insert_tx = """
                INSERT INTO transactions (transaction_id, account_id, posted, amount, description)
                VALUES (%s, %s, %s, %s, %s)
                ON CONFLICT (transaction_id) 
                DO UPDATE SET 
                    account_id = EXCLUDED.account_id,
                    posted = EXCLUDED.posted,
                    amount = EXCLUDED.amount,
                    description = EXCLUDED.description;
                """
                
                cur.execute(insert_tx, (
                    row['transaction_id'],
                    row.get('account_id'),
                    row.get('posted'),
                    row.get('amount'),
                    row.get('description')
                ))
                tx_inserted += 1
            
        conn.commit()
        print(f"Processed {tx_inserted} transactions.")
        
        cur.close()
        conn.close()
        
    except Exception as e:
        print(f"Database Error: {e}")

save_to_db(df_accounts, df_transactions)

## Fetch Historical Data

We use Dropdown widgets to select 60-day windows of older transactions and filter by account.

In [None]:
import ipywidgets as widgets
from IPython.display import clear_output

def process_data_silent(data):
    if not data or 'accounts' not in data:
        return pd.DataFrame(), pd.DataFrame()

    all_transactions = []
    all_accounts = []
    
    for account in data['accounts']:
        acct_name = account.get('name', 'Unknown')
        acct_currency = account.get('currency', '')
        acct_id = account.get('id', '')
        balance = account.get('balance', 0)
        # balance-date might be missing in history call? usually present in account object
        balance_date = datetime.datetime.fromtimestamp(account.get('balance-date')) if account.get('balance-date') else None
        
        all_accounts.append({
            'account_id': acct_id,
            'name': acct_name,
            'currency': acct_currency,
            'balance': balance,
            'balance_date': balance_date
        })
        
        # Aggregate logic
        transactions = account.get('transactions', [])
        if transactions:
            df = pd.DataFrame(transactions)
            if 'posted' in df.columns:
                df['posted'] = pd.to_datetime(df['posted'], unit='s')
            
            # Add account info
            df['account_id'] = acct_id
            if 'id' in df.columns:
                df.rename(columns={'id': 'transaction_id'}, inplace=True)

            all_transactions.append(df)
    
    df_acc = pd.DataFrame(all_accounts)
    df_tx = pd.concat(all_transactions, ignore_index=True) if all_transactions else pd.DataFrame()
    return df_acc, df_tx

def fetch_history_window(change):
    # Both dropdowns need to be read, regardless of which one triggered the change
    days_choice = history_dropdown.value
    account_choice = account_dropdown.value # This will be the account ID or None
    
    if not days_choice:
        return
        
    start_days, end_days = days_choice
    
    # Clear previous output
    with out:
        clear_output()
        print(f"Fetching history for range: {start_days} to {end_days} days ago...")
        if account_choice:
            print(f"Filter: Account ID {account_choice}")
        else:
            print("Filter: All Accounts")
        
        if not access_url:
            print("No Access URL available.")
            return

        # Calculate timestamps
        now = datetime.datetime.now()
        end_dt = now - datetime.timedelta(days=start_days)
        start_dt = now - datetime.timedelta(days=end_days)
        
        start_ts = int(start_dt.timestamp())
        end_ts = int(end_dt.timestamp())
        
        print(f"Window: {start_dt.date()} -> {end_dt.date()}")
        
        try:
            scheme, rest = access_url.split('//', 1)
            auth, rest = rest.split('@', 1)
            base_url = scheme + '//' + rest + '/accounts'
            username, password = auth.split(':', 1)
            
            # Construct URL
            full_url = f"{base_url}?start-date={start_ts}&end-date={end_ts}"
            
            # Apply Account Filter
            if account_choice:
                full_url += f"&account={account_choice}"
            
            response = requests.get(full_url, auth=(username, password))
            response.raise_for_status()
            
            hist_data = response.json()
            df_acc, df_tx = process_data_silent(hist_data)
            
            if not df_tx.empty:
                print(f"Found {len(df_tx)} transactions.")
                display(df_tx.sort_values('posted', ascending=False).head(20))
                # Ask to save to DB
                print("Saving history to database...")
                save_to_db(df_acc, df_tx)
            else:
                print("No transactions found in this range.")
                if not df_acc.empty:
                     print("Updating account balances from history fetch...")
                     save_to_db(df_acc, df_tx)
                
        except Exception as e:
            print(f"Error: {e}")

# Prepare Account Options
account_options = [('All Accounts', None)]
if 'data' in globals() and data and 'accounts' in data:
    for acct in data['accounts']:
        name = acct.get('name', 'Unknown')
        aid = acct.get('id')
        if aid:
            account_options.append((f"{name} (...{aid[-4:]})", aid))

# Create Dropdowns
history_options = [
    ('Last 60 Days', (0, 60)),
    ('60 - 120 Days Ago', (60, 120)),
    ('120 - 180 Days Ago', (120, 180)),
    ('180 - 240 Days Ago', (180, 240)),
    ('240 - 300 Days Ago', (240, 300)),
    ('300 - 360 Days Ago', (300, 360)),
]

history_dropdown = widgets.Dropdown(
    options=history_options,
    value=None,
    description='History Range:',
    disabled=False,
)

account_dropdown = widgets.Dropdown(
    options=account_options,
    value=None,
    description='Account:',
    disabled=False,
)

out = widgets.Output()

# Observe both
history_dropdown.observe(fetch_history_window, names='value')
account_dropdown.observe(fetch_history_window, names='value')

display(Markdown("### Select History Range and Account"))
display(widgets.HBox([history_dropdown, account_dropdown]))
display(out)