# Financial Data Analysis with AI

This notebook demonstrates how to analyze financial transaction data using DuckDB and AI. The data is stored in a CSV file within the data folder and we'll explore various analytical insights.

## Import Required Libraries

Import pandas, numpy, and other necessary libraries for data analysis.

In [None]:
import pandas as pd
import numpy as np
import duckdb
import os
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

## Set Up Data Directory Path

Define the path to the data folder and verify it exists.

In [None]:
# Define data directory path
data_dir = Path("/app/data")  # Docker container path
csv_file = data_dir / "sample_data.csv"

# For local development, you might use:
# data_dir = Path("../data")
# csv_file = data_dir / "sample_data.csv"

print(f"Data directory: {data_dir}")
print(f"CSV file path: {csv_file}")
print(f"Data directory exists: {data_dir.exists()}")
print(f"CSV file exists: {csv_file.exists()}")

## Load CSV Data from Data Folder

Use pandas to read the sample_data.csv file from the data directory.

In [None]:
# Load the CSV data
try:
    df = pd.read_csv(csv_file)
    print(f"Successfully loaded {len(df)} transactions from {csv_file}")
    print(f"Data shape: {df.shape}")
except FileNotFoundError:
    print(f"Error: File not found at {csv_file}")
    # Fallback for local development
    try:
        df = pd.read_csv("../data/sample_data.csv")
        print("Loaded data using fallback path")
    except:
        print("Could not load data from any path")
        df = None

## Display Data Information

Show the first few rows, data types, and basic information about the dataset.

In [None]:
if df is not None:
    print("=== FIRST 5 ROWS ===")
    display(df.head())
    
    print("\n=== DATA TYPES ===")
    display(df.dtypes)
    
    print("\n=== DATASET INFO ===")
    print(f"Shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
    print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024:.2f} KB")
    
    print("\n=== MISSING VALUES ===")
    display(df.isnull().sum())

## Basic Data Exploration

Perform initial exploration including summary statistics and data shape analysis.

In [None]:
if df is not None:
    # Convert date column to datetime
    df['date'] = pd.to_datetime(df['date'])
    
    print("=== SUMMARY STATISTICS ===")
    display(df.describe())
    
    print("\n=== TRANSACTION CATEGORIES ===")
    category_counts = df['category'].value_counts()
    display(category_counts)
    
    print("\n=== ACCOUNT TYPES ===")
    account_counts = df['account_type'].value_counts()
    display(account_counts)
    
    print("\n=== FINANCIAL SUMMARY ===")
    total_income = df[df['amount'] > 0]['amount'].sum()
    total_expenses = df[df['amount'] < 0]['amount'].sum()
    net_amount = df['amount'].sum()
    
    print(f"Total Income: ${total_income:,.2f}")
    print(f"Total Expenses: ${abs(total_expenses):,.2f}")
    print(f"Net Amount: ${net_amount:,.2f}")
    print(f"Date Range: {df['date'].min()} to {df['date'].max()}")

## Monitor AI Model Download Progress

Check the status of the Llama 3.1 model download and test AI functionality once ready.

In [None]:
import requests
import subprocess
import time
import json

# Check if Ollama model is ready
def check_ollama_status():
    try:
        # Try to list models
        result = subprocess.run(['docker', 'exec', 'ollama', 'ollama', 'list'], 
                              capture_output=True, text=True)
        
        if result.returncode == 0:
            lines = result.stdout.strip().split('\n')
            if len(lines) > 1:  # Header + at least one model
                print("✅ Ollama models available:")
                print(result.stdout)
                return True
            else:
                print("⏳ No models available yet - still downloading...")
                return False
        else:
            print(f"❌ Error checking models: {result.stderr}")
            return False
    except Exception as e:
        print(f"❌ Error: {e}")
        return False

# Test AI functionality
def test_ai_query():
    try:
        response = requests.post('http://localhost:5000/ask', 
                               json={'question': 'What is the total spending amount?'})
        if response.status_code == 200:
            result = response.json()
            print("✅ AI Query successful!")
            print(f"Question: What is the total spending amount?")
            print(f"SQL: {result.get('sql', 'N/A')}")
            print(f"Answer: {result.get('answer', 'N/A')}")
            return True
        else:
            print(f"❌ AI Query failed: {response.status_code}")
            print(response.text)
            return False
    except Exception as e:
        print(f"❌ Error testing AI: {e}")
        return False

# Monitor download progress
def monitor_download():
    print("🔍 Checking Ollama model status...")
    
    if check_ollama_status():
        print("\n🚀 Testing AI functionality...")
        test_ai_query()
    else:
        print("\n📊 To check download progress, run in terminal:")
        print("docker logs ollama --tail 10")
        
        print("\n⏰ Download should complete in ~25 seconds at current speed")
        print("You can re-run this cell to check again!")

# Run the monitoring
monitor_download()

: 