In [None]:
# 🌍 Geo-Financial Intelligence Platform - Exploratory Analysis

## Decoding the Spatial DNA of Financial Behavior

This comprehensive analysis notebook demonstrates the power of spatial intelligence in financial technology applications. We'll build a complete geospatial data science pipeline that transforms location data into actionable financial insights.

### 🎯 Objectives
1. **Spatial Feature Engineering**: Create 50+ geospatial features per location
2. **Credit Risk Enhancement**: Improve risk assessment with spatial intelligence
3. **Market Opportunity Analysis**: Identify optimal merchant acquisition targets
4. **Interactive Visualization**: Create compelling spatial dashboards


In [None]:
## 📦 Setup and Imports


In [None]:
# Core libraries
import pandas as pd
import numpy as np
import geopandas as gpd
import warnings
warnings.filterwarnings('ignore')

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Machine Learning
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import xgboost as xgb
import shap

# Geospatial
import h3
import osmnx as ox
from shapely.geometry import Point, Polygon

# System
import sys
from pathlib import Path
import json
from datetime import datetime
import time

# Add src to path
sys.path.append('../src')

# Local imports
from feature_engineering.hexgrid import HexagonalGrid, create_porto_alegre_grid
from data_pipeline.data_sources import DataPipeline
from feature_engineering.spatial_features import SpatialFeatureEngine, create_comprehensive_features
from models.credit_risk_model import GeoCreditRiskModel, CreditRiskDataGenerator
from models.merchant_acquisition import MarketOpportunityAnalyzer, MerchantAcquisitionOptimizer

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
plt.style.use('seaborn-v0_8-darkgrid')

print("✅ All libraries imported successfully!")
print(f"📅 Analysis started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")


In [None]:
## 🗺️ Step 1: Hexagonal Grid Generation

We start by creating an intelligent hexagonal grid system using H3 (Uber's Hierarchical Hexagons) for consistent spatial analysis across Porto Alegre's metropolitan area.


In [None]:
# Demo: Complete Geo-Financial Intelligence Platform Pipeline
print("🌍 Running Geo-Financial Intelligence Platform Demo...")

# 1. Create hexagonal grid
print("\n🔧 Step 1: Creating hexagonal grid system...")
hex_grid = create_porto_alegre_grid(resolution=9)
grid_stats = hex_grid.get_grid_stats()
print(f"   ✅ Generated {grid_stats['total_hexagons']:,} hexagons covering {grid_stats['total_area_km2']:.0f} km²")

# 2. Run data pipeline
print("\n🔄 Step 2: Running multi-source data integration...")
data_pipeline = DataPipeline()
datasets = data_pipeline.run_full_pipeline()
print(f"   ✅ Integrated {len([d for d in datasets.values() if not d.empty])} datasets")

# 3. Generate comprehensive features
print("\n⚙️ Step 3: Engineering spatial features...")
features_gdf = create_comprehensive_features(hex_grid, datasets)
feature_count = len([col for col in features_gdf.columns if col not in ['hex_id', 'geometry', 'area_km2']])
print(f"   ✅ Generated {feature_count} spatial intelligence features")

# 4. Credit risk modeling
print("\n🤖 Step 4: Training credit risk model...")
data_generator = CreditRiskDataGenerator(random_state=42)
loans_df = data_generator.generate_synthetic_loan_data(features_gdf, n_loans_per_hex=3)
credit_model = GeoCreditRiskModel()
X, y = credit_model.prepare_training_data(features_gdf, loans_df)
performance = credit_model.train_model(X, y)
print(f"   ✅ Model AUC: {performance['test_auc']:.3f} ({((performance['test_auc'] - 0.75) / 0.75) * 100:+.1f}% vs baseline)")

# 5. Market opportunity analysis
print("\n🏪 Step 5: Analyzing market opportunities...")
market_analyzer = MarketOpportunityAnalyzer()
opportunity_analysis = market_analyzer.analyze_market_opportunities(features_gdf, datasets.get('merchants'))
high_opp_count = (opportunity_analysis['opportunity_score'] >= 0.7).sum()
print(f"   ✅ Identified {high_opp_count:,} high-opportunity locations (avg ROI: {opportunity_analysis['expected_roi'].mean():.2f}x)")

# 6. Expansion optimization
print("\n💼 Step 6: Optimizing expansion strategy...")
optimizer = MerchantAcquisitionOptimizer()
optimization_results = optimizer.optimize_expansion_plan(opportunity_analysis, budget=500000)
if 'error' not in optimization_results:
    print(f"   ✅ Optimized plan: {optimization_results['total_selected']} acquisitions, {optimization_results['average_roi']:.2f}x ROI")
else:
    print(f"   ⚠️ Optimization: {optimization_results['error']}")

print(f"\n🎉 Platform Demo Completed Successfully!")
print(f"   📊 Processed {len(features_gdf):,} locations with {feature_count} features each")
print(f"   🚀 Ready for production deployment in financial technology applications")
