# ATM Cash Demand Forecasting - Data Generation & EDA

## Week 1: Data Collection & Preprocessing

This notebook:
1. Generates synthetic ATM transaction data (12 months)
2. Performs Exploratory Data Analysis
3. Identifies patterns and seasonality
4. Prepares data for ML modeling

In [1]:
# Import required libraries
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("✓ Libraries imported successfully")

✓ Libraries imported successfully


## 1. Generate Synthetic Data

Generate 12 months of realistic ATM transaction data with:
- 4 different ATM locations
- Seasonal patterns
- Weekend/weekday variations
- Holiday effects
- Payday spikes

In [2]:
from ml_models.data_generator import ATMDataGenerator

# Initialize generator
generator = ATMDataGenerator(num_atms=4, months=12, seed=42)

# Generate data
print("Generating 12 months of ATM transaction data...\n")
df = generator.save_to_csv('atm_demand_data.csv')

# Display first few rows
print("\nFirst 5 records:")
df.head()

ModuleNotFoundError: No module named 'ml_models'

In [None]:
# Display data info
print("Dataset Information:")
df.info()

print("\nDataset Shape:", df.shape)
print("\nDate Range:", df['date'].min(), "to", df['date'].max())
print("\nUnique ATMs:", df['atm_id'].nunique())

## 2. Exploratory Data Analysis

In [None]:
# Summary statistics
print("Summary Statistics for Total Demand:")
df[['total_demand', 'num_transactions', 'avg_transaction']].describe()

In [None]:
# Demand by ATM
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Daily Demand Distribution by ATM', fontsize=16, fontweight='bold')

for idx, (atm_id, atm_name) in enumerate(df[['atm_id', 'atm_name']].drop_duplicates().values):
    ax = axes[idx // 2, idx % 2]
    atm_data = df[df['atm_id'] == atm_id]['total_demand']
    
    ax.hist(atm_data, bins=50, edgecolor='black', alpha=0.7)
    ax.set_title(f'{atm_name}\nMean: ${atm_data.mean():,.0f}', fontsize=12)
    ax.set_xlabel('Daily Demand ($)')
    ax.set_ylabel('Frequency')
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Time series plot for all ATMs
df['date'] = pd.to_datetime(df['date'])

plt.figure(figsize=(16, 8))
for atm_id in df['atm_id'].unique():
    atm_data = df[df['atm_id'] == atm_id].sort_values('date')
    atm_name = atm_data['atm_name'].iloc[0]
    plt.plot(atm_data['date'], atm_data['total_demand'], label=atm_name, linewidth=1.5, alpha=0.8)

plt.title('ATM Cash Demand Over Time (12 Months)', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Daily Demand ($)', fontsize=12)
plt.legend(fontsize=10, loc='upper left')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Pattern Analysis

In [None]:
# Weekend vs Weekday analysis
weekend_comparison = df.groupby(['atm_name', 'is_weekend'])['total_demand'].mean().unstack()
weekend_comparison.columns = ['Weekday', 'Weekend']

ax = weekend_comparison.plot(kind='bar', figsize=(12, 6), width=0.8)
ax.set_title('Average Demand: Weekday vs Weekend', fontsize=14, fontweight='bold')
ax.set_xlabel('ATM Location', fontsize=12)
ax.set_ylabel('Average Daily Demand ($)', fontsize=12)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
ax.legend(title='Day Type')
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

print("\nWeekend vs Weekday Demand:")
print(weekend_comparison)

In [None]:
# Day of week patterns
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
dow_demand = df.groupby('day_of_week')['total_demand'].mean()

plt.figure(figsize=(12, 6))
bars = plt.bar(range(7), dow_demand.values, color='skyblue', edgecolor='navy', alpha=0.7)
plt.xticks(range(7), day_names, rotation=45)
plt.title('Average Demand by Day of Week (All ATMs)', fontsize=14, fontweight='bold')
plt.xlabel('Day of Week', fontsize=12)
plt.ylabel('Average Demand ($)', fontsize=12)
plt.grid(True, alpha=0.3, axis='y')

# Highlight weekends
bars[5].set_color('lightcoral')
bars[6].set_color('lightcoral')

plt.tight_layout()
plt.show()

In [None]:
# Holiday impact
holiday_impact = df.groupby(['atm_name', 'is_holiday'])['total_demand'].mean().unstack()
holiday_impact.columns = ['Regular Day', 'Holiday']

ax = holiday_impact.plot(kind='bar', figsize=(12, 6), width=0.8, color=['steelblue', 'coral'])
ax.set_title('Average Demand: Regular Days vs Holidays', fontsize=14, fontweight='bold')
ax.set_xlabel('ATM Location', fontsize=12)
ax.set_ylabel('Average Daily Demand ($)', fontsize=12)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
ax.legend(title='Day Type')
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

print("\nHoliday Impact:")
print(holiday_impact)
print(f"\nHoliday boost: {((holiday_impact['Holiday'] / holiday_impact['Regular Day'] - 1) * 100).round(1)}%")

In [None]:
# Monthly seasonality
monthly_demand = df.groupby('month')['total_demand'].mean()
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

plt.figure(figsize=(14, 6))
plt.plot(range(1, 13), monthly_demand.values, marker='o', linewidth=2, markersize=8, color='darkblue')
plt.xticks(range(1, 13), month_names)
plt.title('Average Demand by Month (Seasonal Patterns)', fontsize=14, fontweight='bold')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Average Daily Demand ($)', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Correlation heatmap
correlation_cols = ['total_demand', 'num_transactions', 'day_of_week', 'is_weekend', 
                    'is_holiday', 'is_month_end', 'is_payday', 'month']

plt.figure(figsize=(10, 8))
correlation_matrix = df[correlation_cols].corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0, 
            square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('Feature Correlation Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 4. Data Export for ML Modeling

In [None]:
# Save processed data
output_file = '../ml_models/data/atm_demand_data.csv'
df.to_csv(output_file, index=False)
print(f"✓ Data saved to {output_file}")

# Summary report
print("\n" + "="*60)
print("DATA GENERATION & EDA COMPLETE")
print("="*60)
print(f"Total records: {len(df):,}")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
print(f"ATMs: {df['atm_id'].nunique()}")
print(f"Average daily demand: ${df['total_demand'].mean():,.0f}")
print(f"Total data points per ATM: {len(df) // df['atm_id'].nunique()}")
print("\nKey Findings:")
print("  • Strong weekend patterns detected")
print("  • Holiday effects vary by location type")
print("  • Seasonal trends present (higher in Nov-Dec)")
print("  • Payday spikes clearly visible")
print("\n✓ Ready for ML model training!")
print("="*60)