In [None]:
# Import Required Libraries
import sys
sys.path.append('../')  # Add project root to path

import pandas as pd
import logging
from pathlib import Path
from datetime import datetime

# Import our reusable DataLoader
from src.data.loaders import DataLoader, ExcelExporter

ModuleNotFoundError: No module named 'panda'

In [None]:
# Monthly Month-End Data Consolidation

This notebook consolidates all daily trial balance files for a given month and extracts unique account combinations.

## Objectives:
1. Load all CSV files from the target folder (month's trial balance data)
2. Consolidate all data into a single DataFrame
3. Extract unique account combinations
4. Add a monthly column for tracking
5. Export results to Excel

## Setup Logging and Parameters

Configure logging and define parameters that can be injected by the orchestrator (papermill).

In [None]:
# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%H:%M:%S'
)
logger = logging.getLogger(__name__)

# Parameters (can be injected by papermill)
year = '2025'
month = 'September'

# Target columns for unique extraction
target_columns = ['accountname', 'level1accountname']

logger.info(f"üìÖ Processing: {month} {year}")
logger.info(f"üéØ Target columns: {target_columns}")

## Load All Files from Target Folder

Use the DataLoader class to load all CSV files from the specified month's Trial Balance folder.

In [None]:
# Initialize DataLoader
base_path = Path('../data/raw/Trial Balance')
loader = DataLoader(base_path=base_path, logger=logger)

# Load all CSV files from the target folder
# Files are expected to be named like: 09-01-2025.csv, 09-02-2025.csv, etc.
data_folder = base_path / year / month / 'Trial Balance'

logger.info(f"üìÇ Loading files from: {data_folder}")

# Load all daily trial balance files
daily_data = loader.load_all_csv_files(
    folder=data_folder,
    date_format='%m-%d-%Y'  # Parse dates from filenames
)

# Display summary
if daily_data:
    logger.info(f"‚úÖ Loaded {len(daily_data)} daily files")
    
    # Show date range
    dates = sorted(daily_data.keys())
    logger.info(f"   Date range: {dates[0]} to {dates[-1]}")
    
    # Show sample data structure
    sample_df = list(daily_data.values())[0]
    logger.info(f"   Columns available: {list(sample_df.columns)}")
else:
    logger.error("‚ùå No data loaded!")

## Consolidate All Data

Combine all daily DataFrames into a single consolidated DataFrame with date tracking.

In [None]:
# Consolidate all daily data into a single DataFrame
consolidated_df = loader.consolidate_data(
    data_dict=daily_data,
    add_date_column=True,
    date_column_name='Date'
)

# Add monthly metadata columns
consolidated_df['Month'] = month
consolidated_df['Year'] = year
consolidated_df['Month_Year'] = f"{month} {year}"

logger.info(f"üìä Consolidated DataFrame shape: {consolidated_df.shape}")
logger.info(f"   Total records: {len(consolidated_df):,}")
logger.info(f"   Total columns: {len(consolidated_df.columns)}")

# Display first few rows
print("\nüìã Sample of consolidated data:")
consolidated_df.head()

## Extract Unique Account Combinations

Get unique combinations of the target columns (e.g., account names and classifications).

In [None]:
# Verify target columns exist
available_columns = list(consolidated_df.columns)
logger.info(f"üìù Available columns: {available_columns}")

# Use available columns that match target columns
valid_target_columns = [col for col in target_columns if col in available_columns]

if not valid_target_columns:
    logger.error(f"‚ùå None of the target columns found: {target_columns}")
    logger.error(f"   Available columns: {available_columns}")
    raise ValueError("Target columns not found in data")

logger.info(f"‚úì Using columns: {valid_target_columns}")

# Extract unique combinations
unique_accounts = loader.get_unique_records(
    df=consolidated_df,
    columns=valid_target_columns,
    sort_by=valid_target_columns  # Sort by same columns
)

# Add monthly metadata to unique records
unique_accounts['Month'] = month
unique_accounts['Year'] = year
unique_accounts['Month_Year'] = f"{month} {year}"
unique_accounts['Record_Count'] = unique_accounts.apply(
    lambda row: len(consolidated_df[
        (consolidated_df[valid_target_columns[0]] == row[valid_target_columns[0]]) if len(valid_target_columns) == 1
        else (consolidated_df[valid_target_columns] == row[valid_target_columns]).all(axis=1)
    ]),
    axis=1
)

logger.info(f"üéØ Unique accounts found: {len(unique_accounts):,}")

# Display summary
print("\nüìä Unique Account Combinations:")
unique_accounts.head(10)

## Export Results to Excel

Export both the consolidated data and unique accounts to Excel with proper formatting.

In [None]:
# Prepare output path
output_dir = Path('../data/processed/Consolidation') / year
output_dir.mkdir(parents=True, exist_ok=True)

output_filename = f"Monthly_Consolidation_{month}_{year}_{datetime.now():%Y%m%d_%H%M%S}.xlsx"
output_path = output_dir / output_filename

logger.info(f"üíæ Exporting to: {output_path}")

# Create Excel exporter
exporter = ExcelExporter(output_path=output_path, logger=logger)

# Add sheets
exporter.add_sheet(
    df=unique_accounts,
    sheet_name='Unique Accounts',
    freeze_panes=(1, 0),  # Freeze header row
    autofilter=True
)

exporter.add_sheet(
    df=consolidated_df,
    sheet_name='Consolidated Data',
    freeze_panes=(1, 0),
    autofilter=True
)

# Add summary sheet with statistics
summary_data = {
    'Metric': [
        'Processing Date',
        'Month',
        'Year',
        'Number of Daily Files',
        'Date Range Start',
        'Date Range End',
        'Total Records',
        'Unique Account Combinations',
        'Target Columns'
    ],
    'Value': [
        datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        month,
        year,
        len(daily_data),
        min(daily_data.keys()) if daily_data else 'N/A',
        max(daily_data.keys()) if daily_data else 'N/A',
        f"{len(consolidated_df):,}",
        f"{len(unique_accounts):,}",
        ', '.join(valid_target_columns)
    ]
}
summary_df = pd.DataFrame(summary_data)

exporter.add_sheet(
    df=summary_df,
    sheet_name='Summary',
    freeze_panes=(1, 0),
    autofilter=False
)

# Save file
saved_path = exporter.save()

logger.info(f"‚úÖ Export completed successfully!")
logger.info(f"   File location: {saved_path}")
logger.info(f"   Sheets created: Summary, Unique Accounts, Consolidated Data")

print(f"\n‚úÖ SUCCESS! File saved to:\n{saved_path}")