# Firebase Data Explorer

This notebook allows you to explore data stored in Firebase Firestore.

It supports both:
- Firebase Emulator (for local development)
- Production Firebase (requires credentials)


## Setup

Import required libraries and set up Firebase connection.


In [None]:
!export USE_FIREBASE_EMULATOR=true
!export FIRESTORE_EMULATOR_HOST=localhost:8080

In [None]:
import sys
from pathlib import Path
import pandas as pd
import json
from datetime import datetime
from typing import List, Dict, Any, Optional

# Add project root to path
project_root = Path().resolve()
sys.path.insert(0, str(project_root))

# Import Firebase functions
from src.database.firestore import (
    get_db,
    get_all_users,
    get_user,
    get_user_accounts,
    get_user_transactions,
    get_user_features,
    get_persona_assignments,
    get_recommendations,
    get_all_features,
    get_all_personas,
    get_all_recommendations
)


## Initialize Firebase Connection

The connection will automatically detect if you're using the emulator or production Firebase based on environment variables.


In [None]:
# Initialize Firebase
db = get_db()

if db is None:
    print("⚠️  Firebase not initialized. Check your credentials or emulator settings.")
    print("\nFor emulator usage:")
    print("  export USE_FIREBASE_EMULATOR=true")
    print("  export FIRESTORE_EMULATOR_HOST=localhost:8080")
    print("\nFor production:")
    print("  Ensure firebase-service-account.json exists or FIREBASE_SERVICE_ACCOUNT env var is set")
else:
    print("✅ Firebase connection established successfully!")
    import os
    if os.getenv('FIRESTORE_EMULATOR_HOST'):
        print(f"   Using emulator at {os.getenv('FIRESTORE_EMULATOR_HOST')}")
    else:
        print("   Using production Firebase")


## Helper Functions

Utility functions to convert Firestore data to pandas DataFrames.


In [None]:
def convert_timestamp(obj):
    """Convert Firestore timestamp to string."""
    if hasattr(obj, 'timestamp'):
        return datetime.fromtimestamp(obj.timestamp()).isoformat()
    return obj

def clean_data_for_df(data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Clean Firestore data for DataFrame conversion."""
    cleaned = []
    for item in data:
        cleaned_item = {}
        for key, value in item.items():
            if hasattr(value, 'timestamp'):
                cleaned_item[key] = datetime.fromtimestamp(value.timestamp()).isoformat()
            elif isinstance(value, dict):
                cleaned_item[key] = json.dumps(value)
            elif isinstance(value, list):
                cleaned_item[key] = json.dumps(value)
            else:
                cleaned_item[key] = value
        cleaned.append(cleaned_item)
    return cleaned

def get_users_df() -> pd.DataFrame:
    """Get all users as a DataFrame."""
    users = get_all_users()
    if not users:
        return pd.DataFrame()
    return pd.DataFrame(clean_data_for_df(users))

def get_user_accounts_df(user_id: str) -> pd.DataFrame:
    """Get accounts for a specific user as a DataFrame."""
    accounts = get_user_accounts(user_id)
    if not accounts:
        return pd.DataFrame()
    return pd.DataFrame(clean_data_for_df(accounts))

def get_user_transactions_df(user_id: str) -> pd.DataFrame:
    """Get transactions for a specific user as a DataFrame."""
    transactions = get_user_transactions(user_id)
    if not transactions:
        return pd.DataFrame()
    return pd.DataFrame(clean_data_for_df(transactions))

def get_user_features_df(user_id: str) -> pd.DataFrame:
    """Get computed features for a specific user as a DataFrame."""
    features = get_user_features(user_id)
    if not features:
        return pd.DataFrame()
    return pd.DataFrame(clean_data_for_df(features))

def get_user_personas_df(user_id: str) -> pd.DataFrame:
    """Get persona assignments for a specific user as a DataFrame."""
    personas = get_persona_assignments(user_id)
    if not personas:
        return pd.DataFrame()
    return pd.DataFrame(clean_data_for_df(personas))

def get_user_recommendations_df(user_id: str) -> pd.DataFrame:
    """Get recommendations for a specific user as a DataFrame."""
    recommendations = get_recommendations(user_id)
    if not recommendations:
        return pd.DataFrame()
    return pd.DataFrame(clean_data_for_df(recommendations))

def get_all_transactions_df() -> pd.DataFrame:
    """Get all transactions across all users as a DataFrame."""
    users = get_all_users()
    all_transactions = []
    for user in users:
        user_id = user['user_id']
        transactions = get_user_transactions(user_id)
        for txn in transactions:
            txn['user_id'] = user_id
            all_transactions.append(txn)
    if not all_transactions:
        return pd.DataFrame()
    return pd.DataFrame(clean_data_for_df(all_transactions))

def get_all_accounts_df() -> pd.DataFrame:
    """Get all accounts across all users as a DataFrame."""
    users = get_all_users()
    all_accounts = []
    for user in users:
        user_id = user['user_id']
        accounts = get_user_accounts(user_id)
        for acc in accounts:
            acc['user_id'] = user_id
            all_accounts.append(acc)
    if not all_accounts:
        return pd.DataFrame()
    return pd.DataFrame(clean_data_for_df(all_accounts))


## Explore Users

Get an overview of all users in the database.


In [None]:
users_df = get_users_df()

if not users_df.empty:
    print(f"Total users: {len(users_df)}")
    print("\nUsers DataFrame:")
    display(users_df)
    print("\nBasic statistics:")
    print(users_df.describe(include='all'))
else:
    print("No users found in the database.")


## Explore Accounts

View all accounts across all users.


In [None]:
accounts_df = get_all_accounts_df()

if not accounts_df.empty:
    print(f"Total accounts: {len(accounts_df)}")
    print("\nAccounts DataFrame:")
    display(accounts_df)
    
    # Summary statistics
    if 'balance' in accounts_df.columns:
        print("\nBalance statistics:")
        print(accounts_df['balance'].describe())
    
    if 'type' in accounts_df.columns:
        print("\nAccounts by type:")
        print(accounts_df['type'].value_counts())
        
    if 'subtype' in accounts_df.columns:
        print("\nAccounts by subtype:")
        print(accounts_df['subtype'].value_counts())
else:
    print("No accounts found in the database.")


## Explore Transactions

View all transactions across all users.


In [None]:
transactions_df = get_all_transactions_df()

if not transactions_df.empty:
    print(f"Total transactions: {len(transactions_df)}")
    print("\nTransactions DataFrame (first 100 rows):")
    display(transactions_df.head(100))
    
    # Summary statistics
    if 'amount' in transactions_df.columns:
        print("\nTransaction amount statistics:")
        print(transactions_df['amount'].describe())
    
    if 'date' in transactions_df.columns:
        transactions_df['date'] = pd.to_datetime(transactions_df['date'], errors='coerce')
        print("\nTransaction date range:")
        print(f"  From: {transactions_df['date'].min()}")
        print(f"  To: {transactions_df['date'].max()}")
        
    if 'merchant_name' in transactions_df.columns:
        print("\nTop 10 merchants by transaction count:")
        print(transactions_df['merchant_name'].value_counts().head(10))
        
    if 'category' in transactions_df.columns:
        print("\nTop 10 categories by transaction count:")
        # Categories might be JSON strings, so we'll show them as-is
        print(transactions_df['category'].value_counts().head(10))
else:
    print("No transactions found in the database.")


## Explore Features

View computed features across all users.


In [None]:
features_df = pd.DataFrame(clean_data_for_df(get_all_features()))

if not features_df.empty:
    print(f"Total features: {len(features_df)}")
    print("\nFeatures DataFrame:")
    display(features_df.head(50))
    
    # Summary statistics
    if 'signal_type' in features_df.columns:
        print("\nFeatures by signal type:")
        print(features_df['signal_type'].value_counts())
        
    if 'time_window' in features_df.columns:
        print("\nFeatures by time window:")
        print(features_df['time_window'].value_counts())
        
    if 'user_id' in features_df.columns:
        print("\nFeatures per user:")
        print(features_df['user_id'].value_counts().head(10))
else:
    print("No features found in the database.")


## Explore Personas

View persona assignments across all users.


In [None]:
personas_df = pd.DataFrame(clean_data_for_df(get_all_personas()))

if not personas_df.empty:
    print(f"Total persona assignments: {len(personas_df)}")
    print("\nPersonas DataFrame:")
    display(personas_df)
    
    # Summary statistics
    if 'persona' in personas_df.columns:
        print("\nPersona distribution:")
        print(personas_df['persona'].value_counts())
        
    if 'time_window' in personas_df.columns:
        print("\nPersonas by time window:")
        print(personas_df['time_window'].value_counts())
        
    if 'primary_persona' in personas_df.columns:
        print("\nPrimary persona distribution:")
        print(personas_df['primary_persona'].value_counts())
else:
    print("No persona assignments found in the database.")


## Explore Recommendations

View recommendations across all users.


In [None]:
recommendations_df = pd.DataFrame(clean_data_for_df(get_all_recommendations()))

if not recommendations_df.empty:
    print(f"Total recommendations: {len(recommendations_df)}")
    print("\nRecommendations DataFrame:")
    display(recommendations_df.head(50))
    
    # Summary statistics
    if 'type' in recommendations_df.columns:
        print("\nRecommendations by type:")
        print(recommendations_df['type'].value_counts())
        
    if 'user_id' in recommendations_df.columns:
        print("\nRecommendations per user:")
        print(recommendations_df['user_id'].value_counts().head(10))
else:
    print("No recommendations found in the database.")


## Explore Individual User

Get comprehensive data for a specific user. Replace `USER_ID` with an actual user ID from the users list above.


In [None]:
# Replace with an actual user_id from the users DataFrame
USER_ID = "user_001"  # Change this!

# Get user info
user = get_user(USER_ID)
if user:
    print(f"User: {user.get('name', USER_ID)}")
    print(f"User ID: {USER_ID}")
    print("\nUser details:")
    for key, value in user.items():
        print(f"  {key}: {value}")
    
    # Accounts
    print("\n" + "="*60)
    print("ACCOUNTS")
    print("="*60)
    accounts = get_user_accounts_df(USER_ID)
    if not accounts.empty:
        display(accounts)
    else:
        print("No accounts found.")
    
    # Transactions
    print("\n" + "="*60)
    print("TRANSACTIONS")
    print("="*60)
    transactions = get_user_transactions_df(USER_ID)
    if not transactions.empty:
        print(f"Total transactions: {len(transactions)}")
        display(transactions.head(20))
        if 'amount' in transactions.columns:
            print(f"\nTotal transaction amount: ${transactions['amount'].sum():,.2f}")
    else:
        print("No transactions found.")
    
    # Features
    print("\n" + "="*60)
    print("COMPUTED FEATURES")
    print("="*60)
    features = get_user_features_df(USER_ID)
    if not features.empty:
        display(features)
    else:
        print("No features found.")
    
    # Personas
    print("\n" + "="*60)
    print("PERSONA ASSIGNMENTS")
    print("="*60)
    personas = get_user_personas_df(USER_ID)
    if not personas.empty:
        display(personas)
    else:
        print("No persona assignments found.")
    
    # Recommendations
    print("\n" + "="*60)
    print("RECOMMENDATIONS")
    print("="*60)
    recommendations = get_user_recommendations_df(USER_ID)
    if not recommendations.empty:
        display(recommendations)
    else:
        print("No recommendations found.")
else:
    print(f"User {USER_ID} not found. Check the users DataFrame above for valid user IDs.")


## Custom Queries

Use this section to write your own custom queries and analysis.


In [None]:
# Example: Find users with high transaction volumes
if 'transactions_df' in globals() and not transactions_df.empty:
    if 'amount' in transactions_df.columns:
        user_spending = transactions_df.groupby('user_id')['amount'].agg(['sum', 'count', 'mean'])
        user_spending.columns = ['total_spent', 'transaction_count', 'avg_transaction']
        user_spending = user_spending.sort_values('total_spent', ascending=False)
        print("Top 10 users by total spending:")
        display(user_spending.head(10))

# Add your custom queries here...


## Visualization Examples

Basic visualizations of the data (requires matplotlib/plotly).


In [None]:
# Uncomment to install visualization libraries if needed
# !pip install matplotlib seaborn plotly

try:
    import matplotlib.pyplot as plt
    import matplotlib
    matplotlib.use('inline')  # Use inline backend for Jupyter
    
    # Example: Transaction volume over time
    if 'transactions_df' in globals() and not transactions_df.empty:
        if 'date' in transactions_df.columns:
            transactions_df['date'] = pd.to_datetime(transactions_df['date'], errors='coerce')
            daily_transactions = transactions_df.groupby(transactions_df['date'].dt.date).size()
            
            plt.figure(figsize=(12, 6))
            daily_transactions.plot(kind='line')
            plt.title('Daily Transaction Volume')
            plt.xlabel('Date')
            plt.ylabel('Number of Transactions')
            plt.xticks(rotation=45)
            plt.tight_layout()
            plt.show()
    
    # Example: Account balance distribution
    if 'accounts_df' in globals() and not accounts_df.empty:
        if 'balance' in accounts_df.columns:
            plt.figure(figsize=(10, 6))
            accounts_df['balance'].hist(bins=50)
            plt.title('Account Balance Distribution')
            plt.xlabel('Balance')
            plt.ylabel('Frequency')
            plt.tight_layout()
            plt.show()
            
except ImportError:
    print("Matplotlib not installed. Install with: pip install matplotlib")
