In [16]:
import os
import pandas as pd
from google.cloud import bigquery

In [17]:
# Set credentials

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/Users/dariaserbichenko/code/DariaSerb/key-gcp/trash-optimizer-479913-91e59ecc96c9.json"

PROJECT = "trash-optimizer-479913"
DATASET = "nantes"
client = bigquery.Client(project=PROJECT)

print("Creating optimized trash collection points table")

Creating optimized trash collection points table


In [18]:
# FIRST QUERY ALIMENTARY_GARBAGE (FOOD WASTE)

print("\n1. Querying alimentary_garbage_clean (food waste)")

query1 = f"""
SELECT
  ROW_NUMBER() OVER () as ID,
  CONCAT('Food Waste - ', COALESCE(commune, 'Nantes')) as Name,
  COALESCE(adresse, 'Address not specified') as Address,
  lon as Longitude,
  lat as Latitude,
  0 as Is_Cardboard_enabled,
  1 as Is_Food_enabled,
  0 as Is_Glass_enabled,
  0 as Is_Metal_enabled,
  0 as Is_Paper_enabled,
  0 as Is_Plastic_enabled,
  0 as Is_Textile_enabled,
  0 as Is_Vegetation_enabled,
  0 as Is_Neon_enabled,
  0 as Is_Cartridge_enabled,
  0 as Is_Lamp_Light_enabled
FROM `{PROJECT}.{DATASET}.alimentary_garbage_clean`
WHERE lat IS NOT NULL AND lon IS NOT NULL
"""


1. Querying alimentary_garbage_clean (food waste)


In [19]:
try:
    df1 = client.query(query1).to_dataframe()
    print(f"Retrieved {len(df1):,} food waste locations")
except Exception as e:
    print(f"Error: {e}")
    df1 = pd.DataFrame()

Retrieved 1,644 food waste locations


In [20]:
# SECOND QUERY ECOPOINTS (USING ACTUAL COLUMNS FOUND)

print("\n2. Querying ecopoints with actual columns")

# From inspection: columns are ['bois', 'carton', 'ferraille', 'cartouche', 'neon', 'papier', 'textile', 'verre']

query2 = f"""
SELECT
  ROW_NUMBER() OVER () + 10000 as ID,
  CONCAT('Recycling Center - ', COALESCE(nom, commune, 'Ecopoint')) as Name,
  COALESCE(adresse, 'Address not specified') as Address,
  lon as Longitude,
  lat as Latitude,

  -- Use actual columns found
  CASE WHEN UPPER(carton) = 'OUI' THEN 1 ELSE 0 END as Is_Cardboard_enabled,
  0 as Is_Food_enabled,
  CASE WHEN UPPER(verre) = 'OUI' THEN 1 ELSE 0 END as Is_Glass_enabled,
  CASE WHEN UPPER(ferraille) = 'OUI' THEN 1 ELSE 0 END as Is_Metal_enabled,
  CASE WHEN UPPER(papier) = 'OUI' THEN 1 ELSE 0 END as Is_Paper_enabled,
  CASE WHEN UPPER(dechet_vert) = 'OUI' THEN 1 ELSE 0 END as Is_Vegetation_enabled,
  0 as Is_Plastic_enabled,  -- No plastique column
  CASE WHEN UPPER(textile) = 'OUI' THEN 1 ELSE 0 END as Is_Textile_enabled,
  CASE WHEN UPPER(neon) = 'OUI' THEN 1 ELSE 0 END as Is_Neon_enabled,
  CASE WHEN UPPER(cartouche) = 'OUI' THEN 1 ELSE 0 END as Is_Cartridge_enabled,
  0 as Is_Lamp_Light_enabled  -- No ampoule column
FROM `{PROJECT}.{DATASET}.ecopoints`
WHERE lat IS NOT NULL AND lon IS NOT NULL
"""


2. Querying ecopoints with actual columns


In [21]:
try:
    df2 = client.query(query2).to_dataframe()
    print(f"Retrieved {len(df2)} recycling centers with actual waste types")

    # Check acceptance rates
    waste_cols = [col for col in df2.columns if col.startswith('Is_')]
    print(f"- Waste acceptance in recycling centers:")
    for col in waste_cols:
        count = df2[col].sum()
        if count > 0:
            waste_name = col.replace('Is_', '').replace('_enabled', '').replace('_', ' ').title()
            print(f"   {waste_name}: {count}/{len(df2)} locations")

except Exception as e:
    print(f"   ‚ùå Error: {e}")
    df2 = pd.DataFrame()

Retrieved 15 recycling centers with actual waste types
- Waste acceptance in recycling centers:
   Cardboard: 15/15 locations
   Glass: 14/15 locations
   Metal: 14/15 locations
   Paper: 15/15 locations
   Vegetation: 14/15 locations
   Textile: 9/15 locations
   Neon: 8/15 locations
   Cartridge: 15/15 locations


In [22]:
# THIRD QUERY FOR GLASS COLLECTION COLUMNS (VERRE ONLY)

print("\n3. Querying glass collection columns (Verre only)")

query3 = f"""
SELECT
  ROW_NUMBER() OVER () + 30000 as ID,  # Start from 30000 for glass columns
  CONCAT(
    'Drop-off points - ',
    COALESCE(
      CASE
        WHEN type_colonne IS NOT NULL THEN
          CASE type_colonne
            WHEN 'colonne enterr√©e' THEN 'Underground'
            WHEN 'colonne a√©rienne' THEN 'Above-ground'
            ELSE INITCAP(type_colonne)
          END
        ELSE ''
      END,
      'Glass Collection'
    ),
    CASE
      WHEN commune IS NOT NULL THEN CONCAT(' - ', commune)
      ELSE ' - Nantes'
    END
  ) as Name,
  COALESCE(adresse, 'Nantes M√©tropole') as Address,
  lat as Latitude,
  lon as Longitude,

  -- Waste type capabilities: ONLY GLASS ENABLED
  0 as Is_Cardboard_enabled,
  0 as Is_Food_enabled,
  1 as Is_Glass_enabled,  # All these points are for glass collection
  0 as Is_Metal_enabled,
  0 as Is_Paper_enabled,
  0 as Is_Plastic_enabled,
  0 as Is_Textile_enabled,
  0 as Is_Vegetation_enabled,
  0 as Is_Neon_enabled,
  0 as Is_Cartridge_enabled,
  0 as Is_Lamp_Light_enabled

FROM `{PROJECT}.{DATASET}.location_dropoff_points_nantes`
WHERE
  lat IS NOT NULL
  AND lon IS NOT NULL
  AND LOWER(TRIM(type_dechet)) = 'verre'  # Only glass collection points
"""

try:
    df3 = client.query(query3).to_dataframe()
    print(f"‚úÖ Retrieved {len(df3):,} glass collection columns")

    # Show summary
    print(f"\nüìä GLASS COLUMNS SUMMARY:")
    print(f"  Total glass columns: {len(df3):,}")

    # Check coordinate validity
    valid_coords = df3['Latitude'].notna().sum()
    print(f"  With valid coordinates: {valid_coords:,}")

    # Show sample
    print(f"\nüëÄ SAMPLE GLASS COLUMNS (first 3):")
    for i in range(min(3, len(df3))):
        row = df3.iloc[i]
        print(f"  {i+1}. {row['Name']}")
        print(f"     Address: {row['Address'][:60]}...")
        print(f"     Location: ({row['Latitude']:.6f}, {row['Longitude']:.6f})")
        print(f"     Glass enabled: {'‚úì' if row['Is_Glass_enabled'] == 1 else '‚úó'}")

    # Show waste type summary
    print(f"\nüóëÔ∏è  WASTE TYPE ENABLEMENT (should be Glass only):")
    waste_cols = [col for col in df3.columns if col.startswith('Is_')]
    for col in waste_cols:
        count = df3[col].sum()
        if count > 0:
            waste_name = col.replace('Is_', '').replace('_enabled', '').replace('_', ' ').title()
            print(f"  {waste_name}: {count:,}/{len(df3):,} ({count/len(df3)*100:.1f}%)")

except Exception as e:
    print(f"‚ùå Error querying glass columns: {e}")

    # Debug: Check what types of waste exist in the table
    print("\nüîç Debug: Checking available waste types in the table...")
    try:
        debug_query = f"""
        SELECT
          type_dechet,
          COUNT(*) as count
        FROM `{PROJECT}.{DATASET}.location_dropoff_points_nantes`
        WHERE type_dechet IS NOT NULL
        GROUP BY type_dechet
        ORDER BY count DESC
        LIMIT 10
        """
        waste_types = client.query(debug_query).to_dataframe()
        print(f"Available waste types in table:")
        print(waste_types.to_string(index=False))
    except:
        print("Could not check waste types")

    df3 = pd.DataFrame()


3. Querying glass collection columns (Verre only)
‚úÖ Retrieved 1,079 glass collection columns

üìä GLASS COLUMNS SUMMARY:
  Total glass columns: 1,079
  With valid coordinates: 1,079

üëÄ SAMPLE GLASS COLUMNS (first 3):
  1. Drop-off points - Underground - Nantes
     Address: Rue de la petite Sensive...
     Location: (47.260437, -1.561580)
     Glass enabled: ‚úì
  2. Drop-off points - Underground - Nantes
     Address: Rue Blaise Pascal...
     Location: (47.256204, -1.566761)
     Glass enabled: ‚úì
  3. Drop-off points - Underground - Nantes
     Address: 2 Rue de Concarneau...
     Location: (47.264360, -1.578521)
     Glass enabled: ‚úì

üóëÔ∏è  WASTE TYPE ENABLEMENT (should be Glass only):
  Glass: 1,079/1,079 (100.0%)


In [24]:
# FOURTH QUERY: NON-GLASS WASTE TYPES

print("\n4. Querying non-glass waste columns with waste type names")

query4 = f"""
SELECT
  ROW_NUMBER() OVER () +
  CASE
    WHEN LOWER(TRIM(type_dechet)) LIKE '%papier%carton%' THEN 40000
    WHEN LOWER(TRIM(type_dechet)) LIKE '%d√©chet recyclable%' THEN 50000
    WHEN LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%' THEN 60000
    ELSE 70000
  END as ID,

  CONCAT(
    CASE
      WHEN LOWER(TRIM(type_dechet)) LIKE '%papier%carton%' THEN 'Paper/Cardboard'
      WHEN LOWER(TRIM(type_dechet)) LIKE '%d√©chet recyclable%' THEN 'Recyclable Waste'
      WHEN LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%' THEN 'Household Waste'
      ELSE INITCAP(type_dechet)
    END,
    ' Drop-off Point - ',
    COALESCE(commune, 'Nantes'),
    CASE
      WHEN type_colonne IS NOT NULL THEN CONCAT(' (',
        CASE type_colonne
          WHEN 'colonne enterr√©e' THEN 'Underground'
          WHEN 'colonne a√©rienne' THEN 'Above-ground'
          ELSE INITCAP(type_colonne)
        END, ')')
      ELSE ''
    END
  ) as Name,

  COALESCE(adresse, 'Nantes M√©tropole') as Address,
  lat as Latitude,
  lon as Longitude,

  -- Paper/Cardboard columns
  CASE
    WHEN LOWER(TRIM(type_dechet)) LIKE '%papier%carton%' THEN 1
    WHEN LOWER(TRIM(type_dechet)) LIKE '%d√©chet recyclable%' THEN 1
    WHEN LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%' THEN 1
    ELSE 0
  END as Is_Cardboard_enabled,

  -- Food (only for household waste)
  CASE
    WHEN LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%' THEN 1
    ELSE 0
  END as Is_Food_enabled,

  -- Glass (for recyclable and household waste - but NOT paper/cardboard)
  CASE
    WHEN LOWER(TRIM(type_dechet)) LIKE '%d√©chet recyclable%' THEN 1
    WHEN LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%' THEN 1
    ELSE 0
  END as Is_Glass_enabled,

  -- Metal (for recyclable and household waste)
  CASE
    WHEN LOWER(TRIM(type_dechet)) LIKE '%d√©chet recyclable%' THEN 1
    WHEN LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%' THEN 1
    ELSE 0
  END as Is_Metal_enabled,

  -- Paper (for paper/cardboard, recyclable, and household)
  CASE
    WHEN LOWER(TRIM(type_dechet)) LIKE '%papier%carton%' THEN 1
    WHEN LOWER(TRIM(type_dechet)) LIKE '%d√©chet recyclable%' THEN 1
    WHEN LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%' THEN 1
    ELSE 0
  END as Is_Paper_enabled,

  -- Plastic (for recyclable and household)
  CASE
    WHEN LOWER(TRIM(type_dechet)) LIKE '%d√©chet recyclable%' THEN 1
    WHEN LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%' THEN 1
    ELSE 0
  END as Is_Plastic_enabled,

  -- Textile (only for household)
  0 as Is_Textile_enabled,

  -- Vegetation (only for household)
  CASE
    WHEN LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%' THEN 1
    ELSE 0
  END as Is_Vegetation_enabled,

  -- Special waste types (none for these columns)
  0 as Is_Neon_enabled,
  0 as Is_Cartridge_enabled,
  0 as Is_Lamp_Light_enabled,

  type_dechet as Original_Waste_Type,
  type_colonne as Original_Column_Type,
  commune as Commune

FROM `{PROJECT}.{DATASET}.location_dropoff_points_nantes`
WHERE
  lat IS NOT NULL
  AND lon IS NOT NULL
  AND (
    LOWER(TRIM(type_dechet)) LIKE '%papier%carton%'
    OR LOWER(TRIM(type_dechet)) LIKE '%d√©chet recyclable%'
    OR LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%'
  )
ORDER BY
  CASE
    WHEN LOWER(TRIM(type_dechet)) LIKE '%papier%carton%' THEN 1
    WHEN LOWER(TRIM(type_dechet)) LIKE '%d√©chet recyclable%' THEN 2
    WHEN LOWER(TRIM(type_dechet)) LIKE '%ordure m√©nag√®re%' THEN 3
    ELSE 4
  END,
  commune
"""

try:
    df4 = client.query(query4).to_dataframe()
    print(f"‚úÖ Retrieved {len(df4):,} non-glass waste columns")

    # Show breakdown
    print(f"\nüìä BREAKDOWN BY WASTE TYPE:")
    if 'Original_Waste_Type' in df4.columns:
        # Group by cleaned waste type name
        df4['Waste_Category'] = df4['Original_Waste_Type'].apply(
            lambda x: 'Paper/Cardboard' if 'papier' in str(x).lower() and 'carton' in str(x).lower()
            else 'Recyclable Waste' if 'd√©chet recyclable' in str(x).lower()
            else 'Household Waste' if 'ordure m√©nag√®re' in str(x).lower()
            else 'Other'
        )

        waste_counts = df4['Waste_Category'].value_counts()
        for waste_type, count in waste_counts.items():
            percentage = (count / len(df4)) * 100
            print(f"  {waste_type}: {count:,} columns ({percentage:.1f}%)")

    # Show what each type accepts
    print(f"\nüóëÔ∏è  CAPABILITIES BY WASTE TYPE:")
    waste_categories = df4['Waste_Category'].unique() if 'Waste_Category' in df4.columns else df4['Original_Waste_Type'].unique()

    for category in waste_categories:
        if 'Waste_Category' in df4.columns:
            subset = df4[df4['Waste_Category'] == category]
        else:
            subset = df4[df4['Original_Waste_Type'] == category]

        if len(subset) > 0:
            print(f"\n  {category} columns accept:")
            enabled_types = []
            for col in [c for c in subset.columns if c.startswith('Is_') and c not in ['Is_Neon_enabled', 'Is_Cartridge_enabled', 'Is_Lamp_Light_enabled']]:
                if subset[col].iloc[0] == 1:
                    waste_name = col.replace('Is_', '').replace('_enabled', '').replace('_', ' ').title()
                    enabled_types.append(waste_name)

            if enabled_types:
                print(f"    ‚úì {', '.join(enabled_types)}")
            else:
                print(f"    ‚úó No specific waste types enabled")

    # Show samples
    print(f"\nüëÄ SAMPLES (one of each type):")
    sample_shown = set()

    for _, row in df4.iterrows():
        waste_type = row['Original_Waste_Type']
        if waste_type not in sample_shown:
            sample_shown.add(waste_type)

            category = row.get('Waste_Category', waste_type)
            print(f"\n  {category}:")
            print(f"    Name: {row['Name']}")
            print(f"    Original Type: {row['Original_Waste_Type']}")
            print(f"    Location: {row['Commune']}")
            print(f"    Coordinates: ({row['Latitude']:.6f}, {row['Longitude']:.6f})")

            # Show enabled types
            enabled = []
            for col in [c for c in row.index if c.startswith('Is_') and row[col] == 1]:
                waste_name = col.replace('Is_', '').replace('_enabled', '').replace('_', ' ').title()
                enabled.append(waste_name)
            if enabled:
                print(f"    Accepts: {', '.join(enabled)}")

            # Limit to 3 samples
            if len(sample_shown) >= 3:
                break

    # Show distribution by commune
    print(f"\nüìç DISTRIBUTION BY COMMUNE (top 5):")
    if 'Commune' in df4.columns:
        commune_counts = df4['Commune'].value_counts().head(5)
        for commune, count in commune_counts.items():
            print(f"  {commune}: {count:,} columns")

except Exception as e:
    print(f"‚ùå Error: {e}")

    # Try exact match if LIKE doesn't work
    print("\nüîÑ Trying with exact matches...")
    try:
        query4_exact = f"""
        SELECT
          ROW_NUMBER() OVER () +
          CASE
            WHEN type_dechet = 'Papier-carton' THEN 40000
            WHEN type_dechet = 'D√©chet recyclable' THEN 50000
            WHEN type_dechet = 'Ordure m√©nag√®re' THEN 60000
            ELSE 70000
          END as ID,

          CONCAT(
            CASE
              WHEN type_dechet = 'Papier-carton' THEN 'Paper/Cardboard'
              WHEN type_dechet = 'D√©chet recyclable' THEN 'Recyclable Waste'
              WHEN type_dechet = 'Ordure m√©nag√®re' THEN 'Household Waste'
              ELSE type_dechet
            END,
            ' Column - ',
            COALESCE(commune, 'Nantes')
          ) as Name,

          COALESCE(adresse, 'Nantes M√©tropole') as Address,
          lat as Latitude,
          lon as Longitude,

          -- Capabilities (simplified for testing)
          1 as Is_Cardboard_enabled,
          0 as Is_Food_enabled,
          0 as Is_Glass_enabled,
          0 as Is_Metal_enabled,
          1 as Is_Paper_enabled,
          0 as Is_Plastic_enabled,
          0 as Is_Textile_enabled,
          0 as Is_Vegetation_enabled,
          0 as Is_Neon_enabled,
          0 as Is_Cartridge_enabled,
          0 as Is_Lamp_Light_enabled,

          type_dechet as Original_Waste_Type,
          commune as Commune

        FROM `{PROJECT}.{DATASET}.location_dropoff_points_nantes`
        WHERE lat IS NOT NULL AND lon IS NOT NULL
          AND type_dechet IN ('Papier-carton', 'D√©chet recyclable', 'Ordure m√©nag√®re')
        LIMIT 100
        """

        df4 = client.query(query4_exact).to_dataframe()
        print(f"‚úÖ Retrieved {len(df4):,} columns with exact matching")

    except Exception as e2:
        print(f"‚ùå Exact match also failed: {e2}")
        df4 = pd.DataFrame()


4. Querying non-glass waste columns with waste type names
‚úÖ Retrieved 1,490 non-glass waste columns

üìä BREAKDOWN BY WASTE TYPE:
  Household Waste: 843 columns (56.6%)
  Recyclable Waste: 564 columns (37.9%)
  Paper/Cardboard: 83 columns (5.6%)

üóëÔ∏è  CAPABILITIES BY WASTE TYPE:

  Paper/Cardboard columns accept:
    ‚úì Cardboard, Paper

  Recyclable Waste columns accept:
    ‚úì Cardboard, Glass, Metal, Paper, Plastic

  Household Waste columns accept:
    ‚úì Cardboard, Food, Glass, Metal, Paper, Plastic, Vegetation

üëÄ SAMPLES (one of each type):

  Paper/Cardboard:
    Name: Paper/Cardboard Drop-off Point - Nantes (Above-ground)
    Original Type: Papier-carton
    Location: Nantes
    Coordinates: (47.229835, -1.519756)

  Recyclable Waste:
    Name: Recyclable Waste Drop-off Point - Basse-Goulaine (Underground)
    Original Type: D√©chet recyclable
    Location: Basse-Goulaine
    Coordinates: (47.208462, -1.466821)

  Household Waste:
    Name: Household Waste Drop-of

In [25]:
# COMBINE AND CREATE FINAL TABLE

print("CREATING FINAL TRASH COLLECTION POINTS TABLE")

all_dataframes = []

if not df1.empty:
    all_dataframes.append(df1)
    print(f"Food waste points: {len(df1):,}")
if not df2.empty:
    all_dataframes.append(df2)
    print(f"Recycling centers: {len(df2)}")
if not df3.empty:
    all_dataframes.append(df3)
    print(f"Underground containers: {len(df3):,}")
if not df4.empty:
    all_dataframes.append(df4)
    print(f"Underground containers (other type waste): {len(df4):,}")

if all_dataframes:
    # Combine all data
    combined_df = pd.concat(all_dataframes, ignore_index=True)

    # Reset ID to be sequential
    combined_df['ID'] = range(1, len(combined_df) + 1)

    # Define final structure
    final_columns = [
        'ID', 'Name', 'Address', 'Longitude', 'Latitude',
        'Is_Cardboard_enabled', 'Is_Food_enabled', 'Is_Glass_enabled',
        'Is_Metal_enabled', 'Is_Paper_enabled', 'Is_Plastic_enabled',
        'Is_Textile_enabled', 'Is_Vegetation_enabled', 'Is_Neon_enabled',
        'Is_Cartridge_enabled', 'Is_Lamp_Light_enabled'
    ]

    # Ensure all columns exist
    for col in final_columns:
        if col not in combined_df.columns:
            if col.startswith('Is_'):
                combined_df[col] = 0

    # Convert to proper types
    for col in combined_df.columns:
        if col.startswith('Is_'):
            combined_df[col] = combined_df[col].astype(int)
        elif col in ['Longitude', 'Latitude']:
            combined_df[col] = pd.to_numeric(combined_df[col], errors='coerce')

    # Reorder
    combined_df = combined_df[final_columns]

    total_locations = len(combined_df)
    print(f"FINAL TABLE: {total_locations:,} total trash collection points")

    # Save to CSV
    output_csv = 'trash_collection_points_final_optimized.csv'
    combined_df.to_csv(output_csv, index=False, encoding='utf-8-sig')
    print(f"CSV saved: '{output_csv}'")

    # ===== UPLOAD TO BIGQUERY =====
    print(f"Uploading to BigQuery")
    table_id = f"{PROJECT}.{DATASET}.trash_collection_points"

    job_config = bigquery.LoadJobConfig(
        write_disposition="WRITE_TRUNCATE",
        autodetect=True,
        max_bad_records=100
    )

    try:
        job = client.load_table_from_dataframe(combined_df, table_id, job_config=job_config)
        job.result()

        table = client.get_table(table_id)
        print(f"BigQuery table created: {table_id}")
        print(f"   Rows: {table.num_rows:,}")
        print(f"   Size: {table.num_bytes / (1024*1024):.2f} MB")

    except Exception as e:
        print(f"BigQuery upload failed: {e}")
        print(f"   Data saved locally: '{output_csv}'")


CREATING FINAL TRASH COLLECTION POINTS TABLE
Food waste points: 1,644
Recycling centers: 15
Underground containers: 1,079
Underground containers (other type waste): 1,490
FINAL TABLE: 4,228 total trash collection points
CSV saved: 'trash_collection_points_final_optimized.csv'
Uploading to BigQuery
BigQuery table created: trash-optimizer-479913.nantes.trash_collection_points
   Rows: 4,228
   Size: 0.71 MB


In [26]:
# CREATE DETAILED ANALYSIS

print(f"DETAILED ANALYSIS FOR TRASH COLLECTION POINTS")

# 1. Facility type breakdown

print(f"\n1. FACILITY TYPES:")
facility_summary = combined_df['Name'].str.extract(r'^(Food Waste|Recycling Center|Underground containers)')[0]
type_counts = facility_summary.value_counts()

for type_name, count in type_counts.items():
    percentage = (count / total_locations) * 100
    print(f"   {type_name:20} {count:6,} locations ({percentage:5.1f}%)")

# 2. Waste type acceptance

    print(f"\n2. WASTE TYPE ACCEPTANCE:")
    waste_cols = [col for col in combined_df.columns if col.startswith('Is_')]

    waste_stats = []
    for col in waste_cols:
        count = combined_df[col].sum()
        percentage = (count / total_locations) * 100
        waste_name = col.replace('Is_', '').replace('_enabled', '').replace('_', ' ').title()
        waste_stats.append((waste_name, count, percentage))

# Sort by most accepted

    waste_stats.sort(key=lambda x: x[1], reverse=True)

    for name, count, pct in waste_stats:
        print(f"   {name:20} {count:6,} locations ({pct:5.1f}%)")

# 3. Geographic coverage

    print(f"\n3. GEOGRAPHIC COVERAGE:")
    if combined_df['Latitude'].notna().any() and combined_df['Longitude'].notna().any():
        min_lat = combined_df['Latitude'].min()
        max_lat = combined_df['Latitude'].max()
        min_lon = combined_df['Longitude'].min()
        max_lon = combined_df['Longitude'].max()

        print(f"   Latitude range:  {min_lat:.4f} to {max_lat:.4f}")
        print(f"   Longitude range: {min_lon:.4f} to {max_lon:.4f}")
        print(f"   Center point:    ({combined_df['Latitude'].mean():.4f}, {combined_df['Longitude'].mean():.4f})")

DETAILED ANALYSIS FOR TRASH COLLECTION POINTS

1. FACILITY TYPES:
   Food Waste            1,644 locations ( 38.9%)

2. WASTE TYPE ACCEPTANCE:
   Glass                 2,500 locations ( 59.1%)
   Food                  2,487 locations ( 58.8%)
   Cardboard             1,505 locations ( 35.6%)
   Paper                 1,505 locations ( 35.6%)
   Metal                 1,421 locations ( 33.6%)
   Plastic               1,407 locations ( 33.3%)
   Vegetation              857 locations ( 20.3%)
   Cartridge                15 locations (  0.4%)
   Textile                   9 locations (  0.2%)
   Neon                      8 locations (  0.2%)
   Lamp Light                0 locations (  0.0%)

3. GEOGRAPHIC COVERAGE:
   Latitude range:  47.1225 to 47.3335
   Longitude range: -1.8177 to -1.3820
   Center point:    (47.2249, -1.5594)
   Recycling Center         15 locations (  0.4%)

2. WASTE TYPE ACCEPTANCE:
   Glass                 2,500 locations ( 59.1%)
   Food                  2,487 locatio