# Production Environment Verification

This notebook tests all components of the production dbt analytics environment.

**Server:** 159.203.140.78  
**Date:** October 6, 2025

## 1. Import Required Libraries

In [None]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import psycopg2
from pathlib import Path
import os

print("✅ All imports successful!")
print(f"Python version: {sys.version}")
print(f"Pandas version: {pd.__version__}")
print(f"NumPy version: {np.__version__}")

## 2. Test Database Connection

In [None]:
# Database connection parameters
db_params = {
    'host': 'postgres',
    'port': 5432,
    'database': 'dbt_analytics',
    'user': 'andernet',
    'password': 'LocalTestPassword123!'
}

try:
    # Connect to PostgreSQL
    conn = psycopg2.connect(**db_params)
    cursor = conn.cursor()
    
    # Test query
    cursor.execute("SELECT version();")
    db_version = cursor.fetchone()[0]
    print("✅ Database connection successful!")
    print(f"PostgreSQL version: {db_version}")
    
    # Check schemas
    cursor.execute("""
        SELECT schema_name 
        FROM information_schema.schemata 
        WHERE schema_name IN ('staging', 'marts', 'seeds', 'public')
        ORDER BY schema_name;
    """)
    schemas = cursor.fetchall()
    print("\n📁 Available schemas:")
    for schema in schemas:
        print(f"  - {schema[0]}")
    
    cursor.close()
    conn.close()
    
except Exception as e:
    print(f"❌ Database connection failed: {e}")

## 3. Check Data Files

In [None]:
# Check for data files
data_dir = Path('/app/data/processed')

if data_dir.exists():
    files = list(data_dir.glob('*.csv'))
    print(f"✅ Found {len(files)} CSV files:")
    print()
    
    file_info = []
    for f in sorted(files):
        size_mb = f.stat().st_size / (1024 * 1024)
        file_info.append({
            'Filename': f.name,
            'Size (MB)': f"{size_mb:.2f}"
        })
    
    df_files = pd.DataFrame(file_info)
    print(df_files.to_string(index=False))
else:
    print(f"❌ Data directory not found: {data_dir}")

## 4. Load and Preview NYC Education Data

In [None]:
# Load the main NYC education dataset
nyc_file = data_dir / 'nyc_education_analyzed.csv'

if nyc_file.exists():
    df = pd.read_csv(nyc_file)
    print(f"✅ Loaded NYC Education Data: {len(df):,} rows")
    print(f"\n📊 Dataset Info:")
    print(df.info())
    print(f"\n📈 First few rows:")
    display(df.head())
    print(f"\n📉 Statistical Summary:")
    display(df.describe())
else:
    print(f"❌ NYC education file not found: {nyc_file}")

## 5. Test Visualization

In [None]:
# Configure matplotlib
%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

# Create a simple visualization
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Sample data visualization
x = np.linspace(0, 10, 100)
y1 = np.sin(x)
y2 = np.cos(x)

axes[0].plot(x, y1, label='sin(x)', linewidth=2)
axes[0].plot(x, y2, label='cos(x)', linewidth=2)
axes[0].set_title('Test Plot: Trigonometric Functions', fontsize=14, fontweight='bold')
axes[0].set_xlabel('x')
axes[0].set_ylabel('y')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Random data histogram
data = np.random.randn(1000)
axes[1].hist(data, bins=30, edgecolor='black', alpha=0.7)
axes[1].set_title('Test Plot: Normal Distribution', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Value')
axes[1].set_ylabel('Frequency')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("✅ Visualization test successful!")

## 6. Environment Summary

In [None]:
print("="*50)
print("🎉 PRODUCTION ENVIRONMENT VERIFICATION COMPLETE")
print("="*50)
print()
print("✅ Python environment: Working")
print("✅ Database connection: Working")
print("✅ Data files: Available")
print("✅ Pandas/NumPy: Working")
print("✅ Matplotlib/Seaborn: Working")
print()
print("🚀 Ready for data analysis and dbt development!")
print()
print("📚 Next steps:")
print("  1. Create dbt staging models")
print("  2. Create dbt marts models")
print("  3. Run dbt transformations")
print("  4. Build analysis notebooks")