# Tier 1 Analysis - Clean Version

## Setup Instructions
1. **Install dependencies**: `pip install -r requirements.txt`
2. **Create .env file** with your database credentials
3. **Run cells in order**

## .env File Format
```env
DB_USER=your_username
DB_PASSWORD=your_password
DB_HOST=wg-data-rds.data.higg.org
DB_PORT=5432
DB_NAME=db_higg
```

In [30]:
# Setup: Import libraries and load environment variables
import pandas as pd
import psycopg2
from sqlalchemy import create_engine, text
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Create connection string
DB_USER = os.getenv('DB_USER')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')

connection_string = f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"

print(f"✅ Connected to: {DB_HOST}:{DB_PORT}/{DB_NAME}")
print("✅ Environment variables loaded successfully!")

✅ Connected to: wg-data-rds.data.higg.org:5432/db_higg
✅ Environment variables loaded successfully!


In [31]:
# Create database engine and test connection
print("🔧 Creating database engine...")

try:
    # Create engine with optimized settings
    engine = create_engine(
        connection_string,
        pool_pre_ping=True,
        pool_recycle=300,
        echo=False
    )
    
    # Test connection
    with engine.connect() as connection:
        result = connection.execute(text("SELECT 'Connection successful!' as status, current_timestamp as time"))
        row = result.fetchone()
        
    print("✅ DATABASE CONNECTION SUCCESSFUL!")
    print(f"Status: {row[0]}")
    print(f"Connected at: {row[1]}")
    
except Exception as e:
    print("❌ DATABASE CONNECTION FAILED!")
    print(f"Error: {e}")
    print("\n🔧 Check your .env file credentials")

🔧 Creating database engine...
✅ DATABASE CONNECTION SUCCESSFUL!
Status: Connection successful!
Connected at: 2025-09-09 21:32:41.292961+00:00


In [32]:
# Execute your SQL file
print("📁 Loading and executing SQL file...")

try:
    # Read SQL file
    with open('facility type and pc.sql', 'r') as file:
        sql_query = file.read()
    
    print(f"✅ SQL file loaded ({len(sql_query)} characters)")
    
    # Execute query using manual method
    with engine.connect() as connection:
        result = connection.execute(text(sql_query))
        rows = result.fetchall()
        columns = result.keys()
        
    # Create DataFrame
    df_results = pd.DataFrame(rows, columns=columns)
    
    print("✅ QUERY EXECUTED SUCCESSFULLY!")
    print(f"📊 Results: {df_results.shape[0]} rows, {df_results.shape[1]} columns")
    print(f"📋 Columns: {list(df_results.columns)}")
    
except Exception as e:
    print(f"❌ QUERY EXECUTION FAILED: {e}")
    print("\n🔍 SQL Query content:")
    print("=" * 50)
    print(sql_query)
    print("=" * 50)

📁 Loading and executing SQL file...
✅ SQL file loaded (1461 characters)
✅ QUERY EXECUTED SUCCESSFULLY!
📊 Results: 5434 rows, 3 columns
📋 Columns: ['assessment_id', 'sipfacilityapparelpc', 'apparel_pc_count']


In [33]:
# Display and analyze results
if 'df_results' in locals() and not df_results.empty:
    print("📊 Dataset Overview:")
    print(f"   • Total rows: {len(df_results)}")
    print(f"   • Total columns: {len(df_results.columns)}")
    
    print("\n📋 Column Information:")
    for col in df_results.columns:
        dtype = df_results[col].dtype
        non_null = df_results[col].count()
        print(f"   • {col}: {dtype} ({non_null} non-null)")
    
    print("\n📄 First 10 rows:")
    print(df_results.head(10))
    
    print("\n💾 To save results:")
    print("df_results.to_csv('tier1_results.csv', index=False)")
    
else:
    print("❌ No results available. Run the previous cell first.")

📊 Dataset Overview:
   • Total rows: 5434
   • Total columns: 3

📋 Column Information:
   • assessment_id: object (5434 non-null)
   • sipfacilityapparelpc: object (5434 non-null)
   • apparel_pc_count: int64 (5434 non-null)

📄 First 10 rows:
                                    assessment_id  \
0  femsurvey:fffff92a-914f-446f-812f-8141dbe416a6   
1  femsurvey:ffff536a-d061-4fbd-84af-bfcaf59ac297   
2  femsurvey:fff81b83-f145-404b-aeae-d0bb63b0fa1a   
3  femsurvey:fff4757f-0b72-4f89-82cb-771a86980e0f   
4  femsurvey:ffe917d7-0bf2-4469-af77-cba73a34e513   
5  femsurvey:ffe53b8f-d79c-4480-a52f-92f026b318d2   
6  femsurvey:ffba25e8-eaea-486e-9abf-6e362be0f88d   
7  femsurvey:ffa679a1-848a-465e-92f3-9d518fbe7633   
8  femsurvey:ffa0e74d-2f96-4117-b7ba-46680f7741f4   
9  femsurvey:ff8205f1-281f-4d6f-8d69-72e447f694ca   

                                sipfacilityapparelpc  apparel_pc_count  
0          [Hosiery, Pants, Shirts, Skirts, T-shirt]                 5  
1                          