In [1]:
!pip install --quiet pillow-heif
!pip install --quiet geopandas shapely


[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: C:\Users\sybox\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: C:\Users\sybox\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


### 08.06 Dry-run

In [5]:
import os
import pandas as pd
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from pillow_heif import register_heif_opener
import googlemaps
from googlemaps.exceptions import ApiError, Timeout, TransportError
import time

# Enable HEIC support
register_heif_opener()

# Add your Google Maps API key here
GOOGLE_MAPS_API_KEY = "AIzaSyDl88yK8xVIEzPoZ0l7Nd9hzI1JMkoUNeQ"  # Replace with your actual API key

def convert_gps_to_decimal(gps_coord, gps_ref):
    """Convert GPS coordinates from degrees/minutes/seconds to decimal format"""
    if not gps_coord or not gps_ref:
        return None
    
    degrees = float(gps_coord[0])
    minutes = float(gps_coord[1])
    seconds = float(gps_coord[2])
    
    decimal = degrees + minutes/60 + seconds/3600
    
    if gps_ref in ['S', 'W']:
        decimal = -decimal
    
    return decimal

def extract_gps_from_heic(image_path):
    """Extract GPS coordinates from HEIC image"""
    try:
        image = Image.open(image_path)
        exifdata = image.getexif()
        
        coordinates = {
            'latitude': None,
            'longitude': None
        }
        
        for tag_id in exifdata:
            tag = TAGS.get(tag_id, tag_id)
            
            if tag == "GPSInfo":
                gps_data = {}
                gps_info = exifdata.get_ifd(tag_id)
                
                for key in gps_info.keys():
                    decode = GPSTAGS.get(key, key)
                    gps_data[decode] = gps_info[key]
                
                if 'GPSLatitude' in gps_data and 'GPSLatitudeRef' in gps_data:
                    coordinates['latitude'] = convert_gps_to_decimal(
                        gps_data['GPSLatitude'], 
                        gps_data['GPSLatitudeRef']
                    )
                
                if 'GPSLongitude' in gps_data and 'GPSLongitudeRef' in gps_data:
                    coordinates['longitude'] = convert_gps_to_decimal(
                        gps_data['GPSLongitude'], 
                        gps_data['GPSLongitudeRef']
                    )
        
        return coordinates
        
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return {'latitude': None, 'longitude': None}

def get_location_info_google_maps(lat, lon, gmaps_client):
    """Get detailed location information using Google Maps Geocoding API"""
    try:
        # Reverse geocode the coordinates
        geocode_result = gmaps_client.reverse_geocode((lat, lon))
        
        if not geocode_result:
            return "Unknown Street", "Unknown Neighborhood", "Unknown City"
        
        # Get the most detailed result (usually the first one)
        result = geocode_result[0]
        
        # Initialize variables
        street_address = "Unknown Street"
        neighborhood = "Unknown Neighborhood" 
        city = "Unknown City"
        
        # Extract formatted address components
        address_components = result.get('address_components', [])
        
        # Variables to build street address
        house_number = ""
        street_name = ""
        
        # Parse address components
        for component in address_components:
            types = component.get('types', [])
            long_name = component.get('long_name', '')
            
            # Street number
            if 'street_number' in types:
                house_number = long_name
            
            # Street name
            elif 'route' in types:
                street_name = long_name
            
            # Neighborhood (multiple possible types)
            elif any(ntype in types for ntype in ['neighborhood', 'sublocality', 'sublocality_level_1']):
                if neighborhood == "Unknown Neighborhood":  # Only update if we haven't found one yet
                    neighborhood = long_name
            
            # City
            elif 'locality' in types:
                city = long_name
            
            # Additional neighborhood fallbacks
            elif neighborhood == "Unknown Neighborhood":
                if any(ntype in types for ntype in ['political', 'sublocality_level_2', 'administrative_area_level_3']):
                    # Additional check to avoid getting state/country names
                    if any(avoid in types for avoid in ['administrative_area_level_1', 'administrative_area_level_2', 'country']):
                        continue
                    neighborhood = long_name
        
        # Build street address
        if house_number and street_name:
            street_address = f"{house_number} {street_name}"
        elif street_name:
            street_address = street_name
        else:
            # Try to get from formatted_address as fallback
            formatted_addr = result.get('formatted_address', '')
            if formatted_addr:
                # Extract first part before comma as potential street address
                parts = formatted_addr.split(',')
                if len(parts) > 0 and not parts[0].lower().startswith('detroit'):
                    street_address = parts[0].strip()
        
        return street_address, neighborhood, city
        
    except Exception as e:
        print(f"Error in geocoding: {e}")
        return "API Error", "API Error", "API Error"

def test_with_extracted_images(images_dir, max_test_images=5):
    """Test Google Maps API with your extracted images"""
    
    # Check if API key is set
    if GOOGLE_MAPS_API_KEY == "YOUR_API_KEY_HERE":
        print("‚ùå ERROR: Please set your Google Maps API key in the GOOGLE_MAPS_API_KEY variable")
        print("Get your API key at: https://console.cloud.google.com/google/maps-apis/")
        return
    
    # Check if directory exists
    if not os.path.exists(images_dir):
        print(f"‚ùå ERROR: Directory not found: {images_dir}")
        return
    
    # Initialize Google Maps client
    try:
        gmaps = googlemaps.Client(key=GOOGLE_MAPS_API_KEY)
        # Test the API key with a simple request
        test_result = gmaps.reverse_geocode((42.3314, -83.0458))  # Detroit coordinates
        print("‚úÖ Google Maps API key is working")
    except Exception as e:
        print(f"‚ùå ERROR: Failed to initialize Google Maps client: {e}")
        print("Please check your API key and ensure the Geocoding API is enabled")
        return
    
    print(f"\n=== TESTING WITH EXTRACTED IMAGES ===")
    print(f"Looking for HEIC images in: {images_dir}")
    
    # Find HEIC files
    heic_files = []
    for filename in os.listdir(images_dir):
        if filename.lower().endswith(('.heic', '.heif')):
            heic_files.append(os.path.join(images_dir, filename))
    
    if not heic_files:
        print("‚ùå No HEIC files found in the directory")
        print("Files in directory:")
        for file in os.listdir(images_dir)[:10]:  # Show first 10 files
            print(f"  - {file}")
        return
    
    print(f"‚úÖ Found {len(heic_files)} HEIC images")
    
    # Test with first few images
    test_count = min(max_test_images, len(heic_files))
    print(f"Testing with first {test_count} images...")
    print()
    
    results = []
    
    for i, image_path in enumerate(heic_files[:test_count]):
        filename = os.path.basename(image_path)
        print(f"Testing image {i+1}/{test_count}: {filename}")
        
        # Extract GPS coordinates
        coordinates = extract_gps_from_heic(image_path)
        
        if coordinates['latitude'] and coordinates['longitude']:
            lat = coordinates['latitude']
            lon = coordinates['longitude']
            print(f"  üìç GPS found: {lat:.6f}, {lon:.6f}")
            
            # Get location information using Google Maps
            street_address, neighborhood, city = get_location_info_google_maps(lat, lon, gmaps)
            
            print(f"  üè† Street: {street_address}")
            print(f"  üèòÔ∏è  Neighborhood: {neighborhood}")
            print(f"  üåÜ City: {city}")
            print(f"  üó∫Ô∏è  Google Maps: https://maps.google.com/?q={lat},{lon}")
            
            results.append({
                'filename': filename,
                'lat': lat,
                'lon': lon,
                'street_address': street_address,
                'neighborhood': neighborhood,
                'city': city
            })
            
            # Small delay to be respectful to API
            time.sleep(0.2)
            
        else:
            print(f"  ‚ùå No GPS data found")
            results.append({
                'filename': filename,
                'lat': None,
                'lon': None,
                'street_address': 'No GPS Data',
                'neighborhood': 'No GPS Data',
                'city': 'No GPS Data'
            })
        
        print()  # Empty line for readability
    
    # Show summary without saving CSV
    if results:
        df = pd.DataFrame(results)
        
        # Show summary
        gps_count = len(df[df['lat'].notna()])
        unique_neighborhoods = len(df[df['neighborhood'].notna() & (df['neighborhood'] != 'No GPS Data') & (df['neighborhood'] != 'API Error')]['neighborhood'].unique())
        
        print(f"=== TEST SUMMARY ===")
        print(f"Images tested: {len(results)}")
        print(f"With GPS data: {gps_count}")
        print(f"Unique neighborhoods found: {unique_neighborhoods}")
        
        if gps_count > 0:
            neighborhoods = df[df['neighborhood'].notna() & (df['neighborhood'] != 'No GPS Data') & (df['neighborhood'] != 'API Error')]['neighborhood'].value_counts()
            if not neighborhoods.empty:
                print(f"\nNeighborhoods found:")
                for neighborhood, count in neighborhoods.items():
                    print(f"  - {neighborhood}: {count} image(s)")

def quick_gps_check(images_dir):
    """Quick check to see which images have GPS data without using API calls"""
    
    print(f"=== QUICK GPS CHECK ===")
    print(f"Checking GPS data in: {images_dir}")
    
    if not os.path.exists(images_dir):
        print(f"‚ùå Directory not found: {images_dir}")
        return
    
    heic_files = [f for f in os.listdir(images_dir) if f.lower().endswith(('.heic', '.heif'))]
    
    if not heic_files:
        print("‚ùå No HEIC files found")
        return
    
    print(f"Found {len(heic_files)} HEIC images")
    print("Checking GPS data (no API calls)...")
    print()
    
    gps_count = 0
    for i, filename in enumerate(heic_files[:10]):  # Check first 10
        image_path = os.path.join(images_dir, filename)
        coordinates = extract_gps_from_heic(image_path)
        
        if coordinates['latitude'] and coordinates['longitude']:
            lat = coordinates['latitude']
            lon = coordinates['longitude']
            print(f"‚úÖ {filename}: {lat:.6f}, {lon:.6f}")
            gps_count += 1
        else:
            print(f"‚ùå {filename}: No GPS data")
    
    if len(heic_files) > 10:
        print(f"... (showing first 10 of {len(heic_files)} images)")
    
    print(f"\nGPS Summary: {gps_count}/{min(10, len(heic_files))} images have GPS data")

if __name__ == "__main__":
    # Your extracted images directory
    images_directory = r"C:\Users\sybox\NIJ Detroit\extracted_images"
    
    print("=== GOOGLE MAPS API TEST WITH YOUR IMAGES ===")
    print()
    
    # First, do a quick GPS check (no API calls needed)
    quick_gps_check(images_directory)
    
    print("\n" + "="*50)
    
    # Then test with Google Maps API (requires API key)
    print("Now testing with Google Maps API...")
    print("Make sure you've set your API key in GOOGLE_MAPS_API_KEY variable")
    print()
    
    test_with_extracted_images(images_directory, max_test_images=3)
    
    print("\n=== NEXT STEPS ===")
    print("If the test worked:")
    print("1. Increase max_test_images to test more images")
    print("2. Run the full processing script when you're ready")
    print("3. The test will work with any new images you add to the folder")

=== GOOGLE MAPS API TEST WITH YOUR IMAGES ===

=== QUICK GPS CHECK ===
Checking GPS data in: C:\Users\sybox\NIJ Detroit\extracted_images
Found 691 HEIC images
Checking GPS data (no API calls)...

‚úÖ IMG_3921 2.HEIC: 42.396664, -83.256583
‚úÖ IMG_3931 2.HEIC: 42.409328, -83.257269
‚úÖ IMG_3932 2.HEIC: 42.409314, -83.257233
‚úÖ IMG_3933 2.HEIC: 42.409292, -83.257233
‚úÖ IMG_3934 2.HEIC: 42.409292, -83.257233
‚úÖ IMG_3935 2.HEIC: 42.409283, -83.257233
‚úÖ IMG_3936 2.HEIC: 42.409272, -83.257233
‚úÖ IMG_3937 2.HEIC: 42.409261, -83.257233
‚úÖ IMG_3938 2.HEIC: 42.409231, -83.257233
‚úÖ IMG_3939 2.HEIC: 42.409222, -83.257225
... (showing first 10 of 691 images)

GPS Summary: 10/10 images have GPS data

Now testing with Google Maps API...
Make sure you've set your API key in GOOGLE_MAPS_API_KEY variable

‚úÖ Google Maps API key is working

=== TESTING WITH EXTRACTED IMAGES ===
Looking for HEIC images in: C:\Users\sybox\NIJ Detroit\extracted_images
‚úÖ Found 691 HEIC images
Testing with first 3

### 08.06 

-For each picture, extract GPS coordinates from the HEIC data(metadata) 

-Identify neighborhood names using Google Maps GPI 

-Create folders that are named using each unique neighborhood name

-Copy the pictures in the respective folder 

In [6]:
import os
import shutil
import pandas as pd
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from pillow_heif import register_heif_opener
import googlemaps
from googlemaps.exceptions import ApiError, Timeout, TransportError
import time
import re

# Enable HEIC support
register_heif_opener()

# Add your Google Maps API key here
GOOGLE_MAPS_API_KEY = "AIzaSyDl88yK8xVIEzPoZ0l7Nd9hzI1JMkoUNeQ"  # Replace with your actual API key

def convert_gps_to_decimal(gps_coord, gps_ref):
    """Convert GPS coordinates from degrees/minutes/seconds to decimal format"""
    if not gps_coord or not gps_ref:
        return None
    
    degrees = float(gps_coord[0])
    minutes = float(gps_coord[1])
    seconds = float(gps_coord[2])
    
    decimal = degrees + minutes/60 + seconds/3600
    
    if gps_ref in ['S', 'W']:
        decimal = -decimal
    
    return decimal

def extract_gps_from_heic(image_path):
    """Extract GPS coordinates from HEIC image"""
    try:
        image = Image.open(image_path)
        exifdata = image.getexif()
        
        coordinates = {
            'latitude': None,
            'longitude': None
        }
        
        for tag_id in exifdata:
            tag = TAGS.get(tag_id, tag_id)
            
            if tag == "GPSInfo":
                gps_data = {}
                gps_info = exifdata.get_ifd(tag_id)
                
                for key in gps_info.keys():
                    decode = GPSTAGS.get(key, key)
                    gps_data[decode] = gps_info[key]
                
                if 'GPSLatitude' in gps_data and 'GPSLatitudeRef' in gps_data:
                    coordinates['latitude'] = convert_gps_to_decimal(
                        gps_data['GPSLatitude'], 
                        gps_data['GPSLatitudeRef']
                    )
                
                if 'GPSLongitude' in gps_data and 'GPSLongitudeRef' in gps_data:
                    coordinates['longitude'] = convert_gps_to_decimal(
                        gps_data['GPSLongitude'], 
                        gps_data['GPSLongitudeRef']
                    )
        
        return coordinates
        
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return {'latitude': None, 'longitude': None}

def get_neighborhood_from_coordinates(lat, lon, gmaps_client):
    """Get neighborhood name from GPS coordinates using Google Maps API"""
    try:
        # Reverse geocode the coordinates
        geocode_result = gmaps_client.reverse_geocode((lat, lon))
        
        if not geocode_result:
            return "Unknown_Neighborhood"
        
        # Get the most detailed result (usually the first one)
        result = geocode_result[0]
        
        # Extract formatted address components
        address_components = result.get('address_components', [])
        
        neighborhood = "Unknown_Neighborhood"
        
        # Parse address components to find neighborhood
        for component in address_components:
            types = component.get('types', [])
            long_name = component.get('long_name', '')
            
            # Look for neighborhood types
            if any(ntype in types for ntype in ['neighborhood', 'sublocality', 'sublocality_level_1']):
                neighborhood = long_name
                break
            
            # Fallback options
            elif neighborhood == "Unknown_Neighborhood":
                if any(ntype in types for ntype in ['political', 'sublocality_level_2', 'administrative_area_level_3']):
                    # Avoid getting state/country names
                    if any(avoid in types for avoid in ['administrative_area_level_1', 'administrative_area_level_2', 'country']):
                        continue
                    neighborhood = long_name
        
        # Clean up neighborhood name for folder naming
        neighborhood = sanitize_folder_name(neighborhood)
        return neighborhood
        
    except Exception as e:
        print(f"Error in geocoding: {e}")
        return "API_Error"

def sanitize_folder_name(name):
    """Clean up neighborhood name to be safe for folder names"""
    # Remove or replace characters that aren't safe for folder names
    # Keep letters, numbers, spaces, hyphens, and underscores
    safe_name = re.sub(r'[<>:"/\\|?*]', '', name)  # Remove illegal characters
    safe_name = re.sub(r'\s+', '_', safe_name)     # Replace spaces with underscores
    safe_name = safe_name.strip('._')              # Remove leading/trailing dots and underscores
    
    # Ensure it's not empty
    if not safe_name or safe_name in ['CON', 'PRN', 'AUX', 'NUL']:  # Windows reserved names
        safe_name = "Unknown_Neighborhood"
    
    return safe_name

def organize_images_by_neighborhood(source_dir, output_base_dir="Neighborhoods_Organized"):
    """
    Organize images into neighborhood folders based on their GPS coordinates
    
    Args:
        source_dir: Directory containing the HEIC images
        output_base_dir: Base directory where neighborhood folders will be created
    """
    
    # Check if API key is set
    if GOOGLE_MAPS_API_KEY == "YOUR_API_KEY_HERE":
        print("‚ùå ERROR: Please set your Google Maps API key in the GOOGLE_MAPS_API_KEY variable")
        return False
    
    # Check if source directory exists
    if not os.path.exists(source_dir):
        print(f"‚ùå ERROR: Source directory not found: {source_dir}")
        return False
    
    # Initialize Google Maps client
    try:
        gmaps = googlemaps.Client(key=GOOGLE_MAPS_API_KEY)
        # Test the API key
        test_result = gmaps.reverse_geocode((42.3314, -83.0458))
        print("‚úÖ Google Maps API key is working")
    except Exception as e:
        print(f"‚ùå ERROR: Failed to initialize Google Maps client: {e}")
        return False
    
    # Create output base directory
    if not os.path.exists(output_base_dir):
        os.makedirs(output_base_dir)
        print(f"‚úÖ Created output directory: {output_base_dir}")
    
    # Find all HEIC files
    heic_files = []
    for filename in os.listdir(source_dir):
        if filename.lower().endswith(('.heic', '.heif')):
            heic_files.append(filename)
    
    if not heic_files:
        print("‚ùå No HEIC files found in source directory")
        return False
    
    print(f"‚úÖ Found {len(heic_files)} HEIC images to organize")
    print("üîÑ Starting neighborhood organization...")
    print()
    
    # Track statistics
    processed_count = 0
    no_gps_count = 0
    api_error_count = 0
    neighborhood_counts = {}
    
    # Process each image
    for i, filename in enumerate(heic_files, 1):
        image_path = os.path.join(source_dir, filename)
        
        # Show progress
        if i % 20 == 0 or i == 1:
            print(f"Processing: {i}/{len(heic_files)} images...")
        
        # Extract GPS coordinates
        coordinates = extract_gps_from_heic(image_path)
        
        if coordinates['latitude'] and coordinates['longitude']:
            lat = coordinates['latitude']
            lon = coordinates['longitude']
            
            # Get neighborhood name
            neighborhood = get_neighborhood_from_coordinates(lat, lon, gmaps)
            
            if neighborhood == "API_Error":
                api_error_count += 1
                neighborhood = "API_Error"
            
            processed_count += 1
            
        else:
            neighborhood = "No_GPS_Data"
            no_gps_count += 1
        
        # Create neighborhood folder if it doesn't exist
        neighborhood_dir = os.path.join(output_base_dir, neighborhood)
        if not os.path.exists(neighborhood_dir):
            os.makedirs(neighborhood_dir)
            print(f"üìÅ Created folder: {neighborhood}")
        
        # Copy image to neighborhood folder
        source_file = image_path
        destination_file = os.path.join(neighborhood_dir, filename)
        
        try:
            # Check if file already exists (avoid duplicates)
            if os.path.exists(destination_file):
                # Create unique filename
                name, ext = os.path.splitext(filename)
                counter = 1
                while os.path.exists(destination_file):
                    new_filename = f"{name}_{counter}{ext}"
                    destination_file = os.path.join(neighborhood_dir, new_filename)
                    counter += 1
            
            # Copy the file
            shutil.copy2(source_file, destination_file)
            
            # Track neighborhood counts
            neighborhood_counts[neighborhood] = neighborhood_counts.get(neighborhood, 0) + 1
            
        except Exception as e:
            print(f"‚ùå Error copying {filename}: {e}")
        
        # Rate limiting for API calls
        if coordinates['latitude'] and coordinates['longitude']:
            time.sleep(0.1)  # Small delay for API respect
    
    # Create summary report
    create_organization_summary(output_base_dir, neighborhood_counts, processed_count, no_gps_count, api_error_count)
    
    print(f"\nüéâ ORGANIZATION COMPLETE!")
    print(f"‚úÖ Processed: {len(heic_files)} images")
    print(f"‚úÖ With GPS: {processed_count}")
    print(f"‚úÖ Without GPS: {no_gps_count}")
    if api_error_count > 0:
        print(f"‚ö†Ô∏è  API errors: {api_error_count}")
    print(f"‚úÖ Unique neighborhoods: {len([n for n in neighborhood_counts.keys() if n not in ['No_GPS_Data', 'API_Error']])}")
    print(f"üìÅ Images organized in: {output_base_dir}")
    
    return True

def create_organization_summary(output_dir, neighborhood_counts, processed_count, no_gps_count, api_error_count):
    """Create a summary report of the organization process"""
    
    summary_file = os.path.join(output_dir, "ORGANIZATION_SUMMARY.txt")
    
    with open(summary_file, 'w') as f:
        f.write("NEIGHBORHOOD ORGANIZATION SUMMARY\n")
        f.write("=" * 50 + "\n\n")
        f.write(f"Total images processed: {processed_count + no_gps_count}\n")
        f.write(f"Images with GPS data: {processed_count}\n")
        f.write(f"Images without GPS data: {no_gps_count}\n")
        f.write(f"API errors: {api_error_count}\n")
        f.write(f"Total neighborhood folders created: {len(neighborhood_counts)}\n\n")
        
        f.write("NEIGHBORHOOD BREAKDOWN:\n")
        f.write("-" * 30 + "\n")
        
        # Sort neighborhoods by count (descending)
        sorted_neighborhoods = sorted(neighborhood_counts.items(), key=lambda x: x[1], reverse=True)
        
        for neighborhood, count in sorted_neighborhoods:
            f.write(f"{neighborhood}: {count} images\n")
        
        f.write(f"\nOrganization completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
    
    print(f"üìä Summary report saved: {summary_file}")

def preview_organization(source_dir, preview_count=10):
    """
    Preview what the organization would look like without actually moving files
    
    Args:
        source_dir: Directory containing the HEIC images
        preview_count: Number of images to preview
    """
    
    # Check if API key is set
    if GOOGLE_MAPS_API_KEY == "YOUR_API_KEY_HERE":
        print("‚ùå ERROR: Please set your Google Maps API key to preview organization")
        return
    
    if not os.path.exists(source_dir):
        print(f"‚ùå ERROR: Source directory not found: {source_dir}")
        return
    
    # Initialize Google Maps client
    try:
        gmaps = googlemaps.Client(key=GOOGLE_MAPS_API_KEY)
        print("‚úÖ Google Maps API key is working")
    except Exception as e:
        print(f"‚ùå ERROR: Failed to initialize Google Maps client: {e}")
        return
    
    # Find HEIC files
    heic_files = [f for f in os.listdir(source_dir) if f.lower().endswith(('.heic', '.heif'))]
    
    if not heic_files:
        print("‚ùå No HEIC files found")
        return
    
    print(f"üîç ORGANIZATION PREVIEW")
    print(f"Found {len(heic_files)} HEIC images")
    print(f"Previewing first {min(preview_count, len(heic_files))} images...\n")
    
    neighborhood_preview = {}
    
    for i, filename in enumerate(heic_files[:preview_count]):
        image_path = os.path.join(source_dir, filename)
        print(f"Processing {i+1}/{min(preview_count, len(heic_files))}: {filename}")
        
        coordinates = extract_gps_from_heic(image_path)
        
        if coordinates['latitude'] and coordinates['longitude']:
            lat = coordinates['latitude']
            lon = coordinates['longitude']
            neighborhood = get_neighborhood_from_coordinates(lat, lon, gmaps)
            print(f"  üìç GPS: {lat:.6f}, {lon:.6f}")
            print(f"  üìÅ Folder: {neighborhood}")
            
            neighborhood_preview[neighborhood] = neighborhood_preview.get(neighborhood, 0) + 1
        else:
            neighborhood = "No_GPS_Data"
            print(f"  ‚ùå No GPS data")
            print(f"  üìÅ Folder: {neighborhood}")
            neighborhood_preview[neighborhood] = neighborhood_preview.get(neighborhood, 0) + 1
        
        print()
        time.sleep(0.1)  # API rate limiting
    
    print(f"üìä PREVIEW SUMMARY:")
    print("Neighborhood folders that would be created:")
    for neighborhood, count in sorted(neighborhood_preview.items(), key=lambda x: x[1], reverse=True):
        print(f"  üìÅ {neighborhood}: {count} image(s)")

if __name__ == "__main__":
    # Configuration
    source_directory = r"C:\Users\sybox\NIJ Detroit\extracted_images"
    output_directory = "Detroit_Neighborhoods_Organized"
    
    print("=== IMAGE NEIGHBORHOOD ORGANIZER ===")
    print()
    print("This script will:")
    print("1. Read GPS coordinates from each HEIC image")
    print("2. Use Google Maps API to identify the neighborhood")
    print("3. Create folders for each unique neighborhood")
    print("4. Copy images to their respective neighborhood folders")
    print("5. Generate a summary report")
    print()
    
    # Ask user what they want to do
    print("Choose an option:")
    print("1. Preview organization (test with first 10 images)")
    print("2. Organize all images into neighborhood folders")
    print()
    
    choice = input("Enter your choice (1 or 2): ").strip()
    
    if choice == "1":
        print("\nüîç PREVIEW MODE")
        preview_organization(source_directory, preview_count=10)
    
    elif choice == "2":
        print(f"\nüöÄ FULL ORGANIZATION")
        print("This will process all images and create neighborhood folders...")
        confirm = input("Are you sure? (y/n): ").strip().lower()
        
        if confirm == 'y':
            organize_images_by_neighborhood(source_directory, output_directory)
        else:
            print("Organization cancelled.")
    
    else:
        print("Invalid choice. Please run the script again and choose 1 or 2.")

=== IMAGE NEIGHBORHOOD ORGANIZER ===

This script will:
1. Read GPS coordinates from each HEIC image
2. Use Google Maps API to identify the neighborhood
3. Create folders for each unique neighborhood
4. Copy images to their respective neighborhood folders
5. Generate a summary report

Choose an option:
1. Preview organization (test with first 10 images)
2. Organize all images into neighborhood folders



Enter your choice (1 or 2):  2



üöÄ FULL ORGANIZATION
This will process all images and create neighborhood folders...


Are you sure? (y/n):  y


‚úÖ Google Maps API key is working
‚úÖ Created output directory: Detroit_Neighborhoods_Organized
‚úÖ Found 691 HEIC images to organize
üîÑ Starting neighborhood organization...

Processing: 1/691 images...
üìÅ Created folder: Brightmoor
üìÅ Created folder: Riverdale
üìÅ Created folder: Dearborn_Heights
üìÅ Created folder: Unknown_Neighborhood
üìÅ Created folder: Warrendale
Processing: 20/691 images...
Processing: 40/691 images...
üìÅ Created folder: Dearborn
Processing: 60/691 images...
üìÅ Created folder: No_GPS_Data
Processing: 80/691 images...
üìÅ Created folder: Fiskhorn
Processing: 100/691 images...
üìÅ Created folder: Joy-Schaefer
Processing: 120/691 images...
Processing: 140/691 images...
üìÅ Created folder: We_Care_Community
Processing: 160/691 images...
üìÅ Created folder: Plymouth-Hubbell
Processing: 180/691 images...
Processing: 200/691 images...
Processing: 220/691 images...
Processing: 240/691 images...
üìÅ Created folder: Schoolcraft_Southfield
Processing: 26

### 1. Use one picture and extract coordinates

In [7]:
# Enable HEIC support in Pillow
register_heif_opener()

def convert_gps_to_decimal(gps_coord, gps_ref):
    """Convert GPS coordinates from degrees/minutes/seconds to decimal format"""
    if not gps_coord or not gps_ref:
        return None
    
    degrees = float(gps_coord[0])
    minutes = float(gps_coord[1])
    seconds = float(gps_coord[2])
    
    decimal = degrees + minutes/60 + seconds/3600
    
    # Apply direction (negative for South/West)
    if gps_ref in ['S', 'W']:
        decimal = -decimal
    
    return decimal

def extract_gps_data(image_path):
    """Extract GPS coordinates and other metadata from HEIC image"""
    try:
        # Open the image
        image = Image.open(image_path)
        
        # Get EXIF data
        exifdata = image.getexif()
        
        metadata = {
            'filename': os.path.basename(image_path),
            'latitude': None,
            'longitude': None,
            'altitude': None,
            'datetime': None,
            'camera_make': None,
            'camera_model': None
        }
        
        # Extract basic EXIF data
        for tag_id in exifdata:
            tag = TAGS.get(tag_id, tag_id)
            data = exifdata.get(tag_id)
            
            if tag == "Make":
                metadata['camera_make'] = data
            elif tag == "Model":
                metadata['camera_model'] = data
            elif tag == "DateTime":
                metadata['datetime'] = data
            elif tag == "GPSInfo":
                # Extract GPS data
                gps_data = {}
                gps_info = exifdata.get_ifd(tag_id)
                
                for key in gps_info.keys():
                    decode = GPSTAGS.get(key, key)
                    gps_data[decode] = gps_info[key]
                
                # Convert to decimal coordinates
                if 'GPSLatitude' in gps_data and 'GPSLatitudeRef' in gps_data:
                    metadata['latitude'] = convert_gps_to_decimal(
                        gps_data['GPSLatitude'], 
                        gps_data['GPSLatitudeRef']
                    )
                
                if 'GPSLongitude' in gps_data and 'GPSLongitudeRef' in gps_data:
                    metadata['longitude'] = convert_gps_to_decimal(
                        gps_data['GPSLongitude'], 
                        gps_data['GPSLongitudeRef']
                    )
                
                if 'GPSAltitude' in gps_data:
                    metadata['altitude'] = float(gps_data['GPSAltitude'])
        
        return metadata
        
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

def process_single_image(filename):
    """Process a single image file"""
    # Try different possible paths
    possible_paths = [
        f"Downloads/{filename}",
        f"downloads/{filename}",
        filename,
        f"./{filename}"
    ]
    
    for path in possible_paths:
        if os.path.exists(path):
            print(f"Found image at: {path}")
            metadata = extract_gps_data(path)
            
            if metadata:
                print("\n=== IMAGE METADATA ===")
                print(f"Filename: {metadata['filename']}")
                print(f"Camera: {metadata['camera_make']} {metadata['camera_model']}")
                print(f"Date/Time: {metadata['datetime']}")
                
                if metadata['latitude'] and metadata['longitude']:
                    print(f"GPS Coordinates: {metadata['latitude']:.6f}, {metadata['longitude']:.6f}")
                    print(f"Google Maps Link: https://maps.google.com/?q={metadata['latitude']},{metadata['longitude']}")
                    
                    if metadata['altitude']:
                        print(f"Altitude: {metadata['altitude']} meters")
                else:
                    print("No GPS data found in this image")
                
                return metadata
            else:
                print("Failed to extract metadata")
                return None
    
    print(f"Could not find image file: {filename}")
    print("Make sure the file is in your Downloads folder or current directory")
    return None

def process_all_images_in_folder(folder_path="Downloads"):
    """Process all HEIC images in a folder and create a CSV with locations"""
    if not os.path.exists(folder_path):
        print(f"Folder {folder_path} not found")
        return
    
    all_metadata = []
    
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.heic', '.heif')):
            print(f"Processing: {filename}")
            filepath = os.path.join(folder_path, filename)
            metadata = extract_gps_data(filepath)
            
            if metadata and metadata['latitude'] and metadata['longitude']:
                all_metadata.append(metadata)
    
    if all_metadata:
        # Create DataFrame and save to CSV
        df = pd.DataFrame(all_metadata)
        df.to_csv('image_locations.csv', index=False)
        print(f"\nProcessed {len(all_metadata)} images with GPS data")
        print("Results saved to 'image_locations.csv'")
        
        # Show summary
        print("\n=== LOCATION SUMMARY ===")
        print(f"Latitude range: {df['latitude'].min():.6f} to {df['latitude'].max():.6f}")
        print(f"Longitude range: {df['longitude'].min():.6f} to {df['longitude'].max():.6f}")
        
        return df
    else:
        print("No images with GPS data found")
        return None

if __name__ == "__main__":
    # Process your specific image with the full path
    image_path = r"C:\Users\sybox\Downloads\IMG_4122 2.HEIC"
    
    print(f"Processing image at: {image_path}")
    
    # Check if file exists
    if os.path.exists(image_path):
        print("File found! Extracting GPS data...")
        metadata = extract_gps_data(image_path)
        
        if metadata:
            print("\n=== IMAGE LOCATION DATA ===")
            print(f"Filename: {metadata['filename']}")
            
            if metadata['latitude'] and metadata['longitude']:
                print(f"GPS Coordinates: {metadata['latitude']:.6f}, {metadata['longitude']:.6f}")
                print(f"Google Maps Link: https://maps.google.com/?q={metadata['latitude']},{metadata['longitude']}")
            else:
                print("No GPS data found in this image")
        else:
            print("Failed to extract metadata from the image")
    else:
        print(f"File not found at: {image_path}")
        print("Please check the file path and make sure the file exists")
    
    # Uncomment the line below to process all images in Downloads folder
    # process_all_images_in_folder(r"C:\Users\sybox\Downloads")

Processing image at: C:\Users\sybox\Downloads\IMG_4122 2.HEIC
File found! Extracting GPS data...

=== IMAGE LOCATION DATA ===
Filename: IMG_4122 2.HEIC
GPS Coordinates: 42.366347, -83.187486
Google Maps Link: https://maps.google.com/?q=42.366347222222224,-83.18748611111111


### 2. Follow 1, and add interactive app

In [8]:
import os
import pandas as pd
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from pillow_heif import register_heif_opener
import folium
from datetime import datetime

# Enable HEIC support in Pillow
register_heif_opener()

def convert_gps_to_decimal(gps_coord, gps_ref):
    """Convert GPS coordinates from degrees/minutes/seconds to decimal format"""
    if not gps_coord or not gps_ref:
        return None
    
    degrees = float(gps_coord[0])
    minutes = float(gps_coord[1])
    seconds = float(gps_coord[2])
    
    decimal = degrees + minutes/60 + seconds/3600
    
    # Apply direction (negative for South/West)
    if gps_ref in ['S', 'W']:
        decimal = -decimal
    
    return decimal

def extract_gps_data(image_path):
    """Extract GPS coordinates and other metadata from HEIC image"""
    try:
        # Open the image
        image = Image.open(image_path)
        
        # Get EXIF data
        exifdata = image.getexif()
        
        metadata = {
            'filename': os.path.basename(image_path),
            'latitude': None,
            'longitude': None,
            'altitude': None,
            'datetime': None,
            'camera_make': None,
            'camera_model': None
        }
        
        # Extract basic EXIF data
        for tag_id in exifdata:
            tag = TAGS.get(tag_id, tag_id)
            data = exifdata.get(tag_id)
            
            if tag == "Make":
                metadata['camera_make'] = data
            elif tag == "Model":
                metadata['camera_model'] = data
            elif tag == "DateTime":
                metadata['datetime'] = data
            elif tag == "GPSInfo":
                # Extract GPS data
                gps_data = {}
                gps_info = exifdata.get_ifd(tag_id)
                
                for key in gps_info.keys():
                    decode = GPSTAGS.get(key, key)
                    gps_data[decode] = gps_info[key]
                
                # Convert to decimal coordinates
                if 'GPSLatitude' in gps_data and 'GPSLatitudeRef' in gps_data:
                    metadata['latitude'] = convert_gps_to_decimal(
                        gps_data['GPSLatitude'], 
                        gps_data['GPSLatitudeRef']
                    )
                
                if 'GPSLongitude' in gps_data and 'GPSLongitudeRef' in gps_data:
                    metadata['longitude'] = convert_gps_to_decimal(
                        gps_data['GPSLongitude'], 
                        gps_data['GPSLongitudeRef']
                    )
                
                if 'GPSAltitude' in gps_data:
                    metadata['altitude'] = float(gps_data['GPSAltitude'])
        
        return metadata
        
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

def create_interactive_map_single_image(image_path, output_name="single_image_map.html"):
    """Create an interactive map for a single image"""
    
    print(f"Creating interactive map for: {os.path.basename(image_path)}")
    
    # Extract metadata
    metadata = extract_gps_data(image_path)
    
    if not metadata or not metadata['latitude'] or not metadata['longitude']:
        print("No GPS data found in this image - cannot create map")
        return None
    
    # Create map centered on image location
    image_map = folium.Map(
        location=[metadata['latitude'], metadata['longitude']],
        zoom_start=15,
        tiles='OpenStreetMap'
    )
    
    # Create detailed popup with all image information
    popup_html = f"""
    <div style="font-family: Arial; width: 300px;">
        <h4 style="color: #2E8B57; margin-bottom: 10px;">üì∏ {metadata['filename']}</h4>
        
        <p><strong>üìç Location:</strong><br>
        Latitude: {metadata['latitude']:.6f}<br>
        Longitude: {metadata['longitude']:.6f}</p>
        
        <p><strong>üìÖ Date/Time:</strong><br>
        {metadata['datetime'] or 'Not available'}</p>
        
        <p><strong>üì± Camera:</strong><br>
        {metadata['camera_make'] or ''} {metadata['camera_model'] or ''}</p>
        
        {f"<p><strong>üèîÔ∏è Altitude:</strong><br>{metadata['altitude']} meters</p>" if metadata['altitude'] else ""}
        
        <p><strong>üîó Links:</strong><br>
        <a href="https://maps.google.com/?q={metadata['latitude']},{metadata['longitude']}" target="_blank">Open in Google Maps</a></p>
    </div>
    """
    
    # Add marker with detailed popup
    folium.Marker(
        [metadata['latitude'], metadata['longitude']],
        popup=folium.Popup(popup_html, max_width=400),
        tooltip=f"Click for details: {metadata['filename']}",
        icon=folium.Icon(color='red', icon='camera', prefix='fa')
    ).add_to(image_map)
    
    # Add a circle to highlight the exact location
    folium.Circle(
        [metadata['latitude'], metadata['longitude']],
        radius=20,  # 20 meter radius
        popup=f"Photo location: {metadata['filename']}",
        color='red',
        fill=True,
        fillOpacity=0.3
    ).add_to(image_map)
    
    # Save the map
    image_map.save(output_name)
    
    print(f"\n=== INTERACTIVE MAP CREATED ===")
    print(f"Map saved as: {output_name}")
    print(f"Open this file in your web browser to see the location")
    print(f"\nImage Information:")
    print(f"Filename: {metadata['filename']}")
    print(f"GPS Coordinates: {metadata['latitude']:.6f}, {metadata['longitude']:.6f}")
    print(f"Google Maps Link: https://maps.google.com/?q={metadata['latitude']},{metadata['longitude']}")
    
    return metadata

def create_interactive_map_all_images(folder_path, output_name="all_images_map.html"):
    """Create an interactive map for all images in a folder"""
    
    if not os.path.exists(folder_path):
        print(f"Folder {folder_path} not found")
        return None
    
    print(f"Creating interactive map for all images in: {folder_path}")
    
    all_metadata = []
    
    # Extract metadata from all HEIC images
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.heic', '.heif')):
            filepath = os.path.join(folder_path, filename)
            metadata = extract_gps_data(filepath)
            
            if metadata and metadata['latitude'] and metadata['longitude']:
                all_metadata.append(metadata)
    
    if not all_metadata:
        print("No images with GPS data found")
        return None
    
    # Calculate center point for map
    center_lat = sum(img['latitude'] for img in all_metadata) / len(all_metadata)
    center_lon = sum(img['longitude'] for img in all_metadata) / len(all_metadata)
    
    # Create map
    route_map = folium.Map(
        location=[center_lat, center_lon],
        zoom_start=12,
        tiles='OpenStreetMap'
    )
    
    # Add markers for each image
    for i, metadata in enumerate(all_metadata, 1):
        
        # Create popup with image information
        popup_html = f"""
        <div style="font-family: Arial; width: 250px;">
            <h4 style="color: #2E8B57;">üì∏ Image #{i}</h4>
            <p><strong>File:</strong> {metadata['filename']}</p>
            <p><strong>Location:</strong><br>
            {metadata['latitude']:.6f}, {metadata['longitude']:.6f}</p>
            <p><strong>Time:</strong> {metadata['datetime'] or 'Unknown'}</p>
            <a href="https://maps.google.com/?q={metadata['latitude']},{metadata['longitude']}" target="_blank">Google Maps</a>
        </div>
        """
        
        # Add marker
        folium.Marker(
            [metadata['latitude'], metadata['longitude']],
            popup=folium.Popup(popup_html, max_width=300),
            tooltip=f"#{i}: {metadata['filename']}",
            icon=folium.Icon(color='blue', icon='camera', prefix='fa')
        ).add_to(route_map)
    
    # Connect points with a route line (sorted by timestamp if available)
    try:
        # Sort by datetime if available
        sorted_metadata = sorted(all_metadata, 
                                key=lambda x: datetime.strptime(x['datetime'], '%Y:%m:%d %H:%M:%S') if x['datetime'] else datetime.min)
        
        coordinates = [[img['latitude'], img['longitude']] for img in sorted_metadata]
        folium.PolyLine(
            coordinates,
            color='red',
            weight=3,
            opacity=0.7,
            popup="Your route"
        ).add_to(route_map)
    except:
        print("Could not create route line (timestamp issues)")
    
    # Save map
    route_map.save(output_name)
    
    print(f"\n=== INTERACTIVE MAP CREATED ===")
    print(f"Map saved as: {output_name}")
    print(f"Processed {len(all_metadata)} images with GPS data")
    print(f"Open this file in your web browser to see all locations")
    
    # Also save CSV for reference
    df = pd.DataFrame(all_metadata)
    df.to_csv('all_images_coordinates.csv', index=False)
    print(f"Coordinates also saved to: all_images_coordinates.csv")
    
    return all_metadata

if __name__ == "__main__":
    # Create map for your specific image
    image_path = r"C:\Users\sybox\Downloads\IMG_4122 2.HEIC"
    
    print("=== CREATING INTERACTIVE MAP ===")
    print("This will create an interactive map showing your image location\n")
    
    # Check if file exists
    if os.path.exists(image_path):
        # Create map for single image
        metadata = create_interactive_map_single_image(image_path, "IMG_4122_map.html")
        
        if metadata:
            print(f"\nüó∫Ô∏è  Map created! Open 'IMG_4122_map.html' in your browser")
            print(f"üìç Click the red marker to see detailed image information")
        
    else:
        print(f"File not found at: {image_path}")
    
    print("\n" + "="*50)
    print("OPTIONAL: Create map for ALL images in Downloads folder")
    print("Uncomment the line below to process all images:")
    print("# create_interactive_map_all_images(r'C:\\Users\\sybox\\Downloads', 'all_images_route_map.html')")
    
    # Uncomment this line to create map for ALL images:
    # create_interactive_map_all_images(r"C:\Users\sybox\Downloads", "all_images_route_map.html")

=== CREATING INTERACTIVE MAP ===
This will create an interactive map showing your image location

Creating interactive map for: IMG_4122 2.HEIC

=== INTERACTIVE MAP CREATED ===
Map saved as: IMG_4122_map.html
Open this file in your web browser to see the location

Image Information:
Filename: IMG_4122 2.HEIC
GPS Coordinates: 42.366347, -83.187486
Google Maps Link: https://maps.google.com/?q=42.366347222222224,-83.18748611111111

üó∫Ô∏è  Map created! Open 'IMG_4122_map.html' in your browser
üìç Click the red marker to see detailed image information

OPTIONAL: Create map for ALL images in Downloads folder
Uncomment the line below to process all images:
# create_interactive_map_all_images(r'C:\Users\sybox\Downloads', 'all_images_route_map.html')


### 3. BULK Street Address Classifier 

In [22]:
import os
import zipfile
import pandas as pd
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from pillow_heif import register_heif_opener
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time
import tempfile
import shutil

# Enable HEIC support
register_heif_opener()

def convert_gps_to_decimal(gps_coord, gps_ref):
    """Convert GPS coordinates from degrees/minutes/seconds to decimal format"""
    if not gps_coord or not gps_ref:
        return None
    
    degrees = float(gps_coord[0])
    minutes = float(gps_coord[1])
    seconds = float(gps_coord[2])
    
    decimal = degrees + minutes/60 + seconds/3600
    
    if gps_ref in ['S', 'W']:
        decimal = -decimal
    
    return decimal

def extract_gps_from_heic(image_path):
    """Extract GPS coordinates from HEIC image"""
    try:
        image = Image.open(image_path)
        exifdata = image.getexif()
        
        coordinates = {
            'latitude': None,
            'longitude': None
        }
        
        for tag_id in exifdata:
            tag = TAGS.get(tag_id, tag_id)
            
            if tag == "GPSInfo":
                gps_data = {}
                gps_info = exifdata.get_ifd(tag_id)
                
                for key in gps_info.keys():
                    decode = GPSTAGS.get(key, key)
                    gps_data[decode] = gps_info[key]
                
                if 'GPSLatitude' in gps_data and 'GPSLatitudeRef' in gps_data:
                    coordinates['latitude'] = convert_gps_to_decimal(
                        gps_data['GPSLatitude'], 
                        gps_data['GPSLatitudeRef']
                    )
                
                if 'GPSLongitude' in gps_data and 'GPSLongitudeRef' in gps_data:
                    coordinates['longitude'] = convert_gps_to_decimal(
                        gps_data['GPSLongitude'], 
                        gps_data['GPSLongitudeRef']
                    )
        
        return coordinates
        
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return {'latitude': None, 'longitude': None}

def get_street_address(lat, lon, max_retries=2):
    """Get street address and neighborhood from coordinates using OpenStreetMap"""
    geolocator = Nominatim(user_agent="detroit_image_classifier", timeout=10)
    
    for attempt in range(max_retries):
        try:
            location = geolocator.reverse(f"{lat}, {lon}", timeout=15)
            
            if location and location.raw.get('address'):
                address = location.raw['address']
                
                # Build street address
                house_number = address.get('house_number', '')
                road = address.get('road', '')
                street_address = f"{house_number} {road}".strip()
                
                # If no specific street, try other road types
                if not street_address or street_address == ' ':
                    street_address = (
                        address.get('highway') or 
                        address.get('footway') or 
                        address.get('cycleway') or 
                        'Unknown Street'
                    )
                
                # Get neighborhood
                neighborhood = (
                    address.get('neighbourhood') or 
                    address.get('suburb') or 
                    address.get('hamlet') or 
                    address.get('village') or 
                    address.get('town') or 
                    address.get('city_district') or 
                    address.get('quarter') or
                    'Unknown Neighborhood'
                )
                
                return street_address, neighborhood
            
        except (GeocoderTimedOut, GeocoderServiceError):
            if attempt < max_retries - 1:
                time.sleep(2)  # Wait before retry
            
    return "Unknown Street", "Unknown Neighborhood"

def process_zip_file(zip_path, output_csv="detroit_images_classified.csv", extract_images=True):
    """Process all HEIC images in a zip file and classify by street address"""
    
    if not os.path.exists(zip_path):
        print(f"Zip file not found: {zip_path}")
        return None
    
    print(f"Processing zip file: {zip_path}")
    print("Note: Progress shown every 10 images to reduce output...")
    
    # Create directory to extract images permanently for viewing
    extract_dir = "extracted_images"
    if extract_images and not os.path.exists(extract_dir):
        os.makedirs(extract_dir)
    
    # Create temporary directory for processing
    temp_dir = tempfile.mkdtemp()
    all_results = []
    
    try:
        # Extract zip file
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(temp_dir)
        
        # Find all HEIC files recursively
        heic_files = []
        for root, dirs, files in os.walk(temp_dir):
            for file in files:
                if file.lower().endswith(('.heic', '.heif')):
                    heic_files.append(os.path.join(root, file))
        
        print(f"Found {len(heic_files)} HEIC images")
        
        if not heic_files:
            print("No HEIC files found in the zip archive")
            return None
        
        # Process each image
        processed_count = 0
        no_gps_count = 0
        
        for i, image_path in enumerate(heic_files, 1):
            filename = os.path.basename(image_path)
            
            # Copy image to permanent directory for viewing
            if extract_images:
                dest_path = os.path.join(extract_dir, filename)
                try:
                    shutil.copy2(image_path, dest_path)
                    # Create relative path for hyperlink
                    image_link = f"./{extract_dir}/{filename}"
                except Exception as e:
                    image_link = "File copy failed"
            else:
                image_link = "Not extracted"
            
            # Show progress every 10 images
            if i % 10 == 0 or i == 1:
                print(f"Progress: {i}/{len(heic_files)} images processed...")
            
            # Extract GPS coordinates
            coordinates = extract_gps_from_heic(image_path)
            
            if coordinates['latitude'] and coordinates['longitude']:
                lat = coordinates['latitude']
                lon = coordinates['longitude']
                
                # Get street address and neighborhood (quietly)
                street_address, neighborhood = get_street_address(lat, lon)
                
                result = {
                    'filename': filename,
                    'image_path': image_link,
                    'lat': lat,
                    'lon': lon,
                    'street_address': street_address,
                    'neighborhood': neighborhood
                }
                
                all_results.append(result)
                processed_count += 1
                
                # Rate limiting - be respectful to free service
                time.sleep(1.5)  # 1.5 second delay between requests
                
            else:
                no_gps_count += 1
                
                # Still add to results with no location data
                result = {
                    'filename': filename,
                    'image_path': image_link,
                    'lat': None,
                    'lon': None,
                    'street_address': 'No GPS Data',
                    'neighborhood': 'No GPS Data'
                }
                all_results.append(result)
        
        # Save results to CSV
        if all_results:
            df = pd.DataFrame(all_results)
            df.to_csv(output_csv, index=False)
            
            # Also create an HTML version with clickable links
            create_html_with_links(df, "detroit_images_with_links.html")
            
            print(f"\n=== PROCESSING COMPLETE ===")
            print(f"Total images: {len(all_results)}")
            print(f"With GPS data: {processed_count}")
            print(f"Without GPS data: {no_gps_count}")
            print(f"Results saved to: {output_csv}")
            print(f"HTML with clickable images: detroit_images_with_links.html")
            if extract_images:
                print(f"Images extracted to: {extract_dir}/ folder")
            
            # Show summary of neighborhoods found
            if processed_count > 0:
                gps_df = df[df['lat'].notna()]
                neighborhood_counts = gps_df['neighborhood'].value_counts()
                
                print(f"\n=== NEIGHBORHOODS FOUND ===")
                for neighborhood, count in neighborhood_counts.head(10).items():
                    print(f"{neighborhood}: {count} images")
                
                if len(neighborhood_counts) > 10:
                    print(f"... and {len(neighborhood_counts) - 10} more neighborhoods")
            
            return df
        else:
            print("No images were processed successfully")
            return None
            
    except Exception as e:
        print(f"Error processing zip file: {e}")
        return None
        
    finally:
        # Clean up temporary directory
        try:
            shutil.rmtree(temp_dir)
        except:
            pass

def create_html_with_links(df, output_html):
    """Create an HTML table with clickable image thumbnails"""
    
    html_content = """
    <!DOCTYPE html>
    <html>
    <head>
        <title>Detroit Images Classification</title>
        <style>
            body { font-family: Arial, sans-serif; margin: 20px; }
            table { border-collapse: collapse; width: 100%; }
            th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
            th { background-color: #f2f2f2; }
            .thumbnail { width: 100px; height: 75px; object-fit: cover; cursor: pointer; }
            .thumbnail:hover { opacity: 0.8; }
            .popup { display: none; position: fixed; z-index: 1000; left: 0; top: 0; width: 100%; height: 100%; background-color: rgba(0,0,0,0.9); }
            .popup-content { position: relative; margin: auto; padding: 20px; width: 80%; max-width: 800px; top: 10%; }
            .popup img { width: 100%; height: auto; }
            .close { color: white; float: right; font-size: 28px; font-weight: bold; cursor: pointer; }
        </style>
    </head>
    <body>
        <h1>Detroit Images Classification Results</h1>
        <p>Click on thumbnails to view full-size images</p>
        
        <table>
            <tr>
                <th>Thumbnail</th>
                <th>Filename</th>
                <th>Latitude</th>
                <th>Longitude</th>
                <th>Street Address</th>
                <th>Neighborhood</th>
                <th>Google Maps</th>
            </tr>
    """
    
    for _, row in df.iterrows():
        if pd.notna(row['lat']):
            maps_link = f"https://maps.google.com/?q={row['lat']},{row['lon']}"
            maps_cell = f'<a href="{maps_link}" target="_blank">View</a>'
            lat_display = f"{row['lat']:.6f}"
            lon_display = f"{row['lon']:.6f}"
        else:
            maps_cell = "No GPS"
            lat_display = "No GPS"
            lon_display = "No GPS"
        
        # Create thumbnail and popup for image
        if "extracted_images" in str(row['image_path']):
            thumbnail_html = f'''
                <img src="{row['image_path']}" class="thumbnail" 
                     onclick="openPopup('{row['image_path']}', '{row['filename']}')" 
                     alt="{row['filename']}" title="Click to enlarge">
            '''
        else:
            thumbnail_html = f"<span>{row['filename']}</span>"
        
        html_content += f"""
            <tr>
                <td>{thumbnail_html}</td>
                <td>{row['filename']}</td>
                <td>{lat_display}</td>
                <td>{lon_display}</td>
                <td>{row['street_address']}</td>
                <td>{row['neighborhood']}</td>
                <td>{maps_cell}</td>
            </tr>
        """
    
    html_content += """
        </table>
        
        <!-- Popup for full-size image -->
        <div id="imagePopup" class="popup" onclick="closePopup()">
            <div class="popup-content">
                <span class="close" onclick="closePopup()">&times;</span>
                <img id="popupImage" src="">
                <p id="popupCaption" style="color: white; text-align: center; margin-top: 10px;"></p>
            </div>
        </div>
        
        <script>
            function openPopup(imageSrc, caption) {
                document.getElementById('imagePopup').style.display = 'block';
                document.getElementById('popupImage').src = imageSrc;
                document.getElementById('popupCaption').innerHTML = caption;
            }
            
            function closePopup() {
                document.getElementById('imagePopup').style.display = 'none';
            }
            
            // Close popup with Escape key
            document.addEventListener('keydown', function(event) {
                if (event.key === 'Escape') {
                    closePopup();
                }
            });
        </script>
    </body>
    </html>
    """
    
    with open(output_html, 'w', encoding='utf-8') as f:
        f.write(html_content)
    
    print(f"HTML table with clickable images saved as: {output_html}")
    print(f"Open this file in your browser to view images with classification data")

def create_summary_map(csv_file, output_html="detroit_images_map.html"):
    """Create an interactive map from the CSV results"""
    try:
        import folium
        
        df = pd.read_csv(csv_file)
        gps_df = df[df['lat'].notna()].copy()
        
        if gps_df.empty:
            print("No GPS data available for mapping")
            return
        
        # Create map centered on Detroit area
        center_lat = gps_df['lat'].mean()
        center_lon = gps_df['lon'].mean()
        
        m = folium.Map(location=[center_lat, center_lon], zoom_start=12)
        
        # Add markers for each image
        for _, row in gps_df.iterrows():
            popup_text = f"""
            <b>File:</b> {row['filename']}<br>
            <b>Address:</b> {row['street_address']}<br>
            <b>Neighborhood:</b> {row['neighborhood']}<br>
            <b>Coordinates:</b> {row['lat']:.6f}, {row['lon']:.6f}
            """
            
            folium.Marker(
                [row['lat'], row['lon']],
                popup=popup_text,
                tooltip=f"{row['filename']} - {row['neighborhood']}"
            ).add_to(m)
        
        m.save(output_html)
        print(f"Interactive map saved as: {output_html}")
        
    except ImportError:
        print("Install folium to create interactive map: pip install folium")
    except Exception as e:
        print(f"Error creating map: {e}")

if __name__ == "__main__":
    # Path to your zip file
    zip_path = r"C:\Users\sybox\OneDrive\Desktop\NIJ Detroit\data\Archive_clean.zip"
    
    print("=== DETROIT IMAGES STREET CLASSIFICATION ===")
    print("This script will:")
    print("1. Extract all HEIC images from the zip file")
    print("2. Get GPS coordinates from each image")  
    print("3. Use free reverse geocoding to get street addresses")
    print("4. Classify images by neighborhood")
    print("5. Save results to CSV")
    print("\nNote: This will take 5-10 minutes due to rate limiting on free geocoding service")
    print("="*60)
    
    # Process all images
    results = process_zip_file(zip_path, "detroit_images_classified.csv")
    
    if results is not None:
        print("\n=== OPTIONAL: CREATE INTERACTIVE MAP ===")
        print("Uncomment the line below to create an interactive map:")
        print("# create_summary_map('detroit_images_classified.csv')")
        
        # Uncomment to create map:
        # create_summary_map('detroit_images_classified.csv')

=== DETROIT IMAGES STREET CLASSIFICATION ===
This script will:
1. Extract all HEIC images from the zip file
2. Get GPS coordinates from each image
3. Use free reverse geocoding to get street addresses
4. Classify images by neighborhood
5. Save results to CSV

Note: This will take 5-10 minutes due to rate limiting on free geocoding service
Processing zip file: C:\Users\sybox\OneDrive\Desktop\NIJ Detroit\data\Archive_clean.zip
Note: Progress shown every 10 images to reduce output...
Found 200 HEIC images
Progress: 1/200 images processed...
Progress: 10/200 images processed...
Progress: 20/200 images processed...
Progress: 30/200 images processed...
Progress: 40/200 images processed...
Progress: 50/200 images processed...
Progress: 60/200 images processed...
Progress: 70/200 images processed...
Progress: 80/200 images processed...
Progress: 90/200 images processed...
Progress: 100/200 images processed...
Progress: 110/200 images processed...
Progress: 120/200 images processed...
Progress:

### 4.  Follow 3, but add statistical analysis and organized folders

In [29]:
import os
import zipfile
import pandas as pd
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from pillow_heif import register_heif_opener
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time
import tempfile
import shutil

# Enable HEIC support
register_heif_opener()

def convert_gps_to_decimal(gps_coord, gps_ref):
    """Convert GPS coordinates from degrees/minutes/seconds to decimal format"""
    if not gps_coord or not gps_ref:
        return None
    
    degrees = float(gps_coord[0])
    minutes = float(gps_coord[1])
    seconds = float(gps_coord[2])
    
    decimal = degrees + minutes/60 + seconds/3600
    
    if gps_ref in ['S', 'W']:
        decimal = -decimal
    
    return decimal

def extract_gps_from_heic(image_path):
    """Extract GPS coordinates from HEIC image"""
    try:
        image = Image.open(image_path)
        exifdata = image.getexif()
        
        coordinates = {
            'latitude': None,
            'longitude': None
        }
        
        for tag_id in exifdata:
            tag = TAGS.get(tag_id, tag_id)
            
            if tag == "GPSInfo":
                gps_data = {}
                gps_info = exifdata.get_ifd(tag_id)
                
                for key in gps_info.keys():
                    decode = GPSTAGS.get(key, key)
                    gps_data[decode] = gps_info[key]
                
                if 'GPSLatitude' in gps_data and 'GPSLatitudeRef' in gps_data:
                    coordinates['latitude'] = convert_gps_to_decimal(
                        gps_data['GPSLatitude'], 
                        gps_data['GPSLatitudeRef']
                    )
                
                if 'GPSLongitude' in gps_data and 'GPSLongitudeRef' in gps_data:
                    coordinates['longitude'] = convert_gps_to_decimal(
                        gps_data['GPSLongitude'], 
                        gps_data['GPSLongitudeRef']
                    )
        
        return coordinates
        
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return {'latitude': None, 'longitude': None}

def get_street_address(lat, lon, max_retries=2):
    """Get street address and neighborhood from coordinates using OpenStreetMap"""
    geolocator = Nominatim(user_agent="detroit_image_classifier", timeout=10)
    
    for attempt in range(max_retries):
        try:
            location = geolocator.reverse(f"{lat}, {lon}", timeout=15)
            
            if location and location.raw.get('address'):
                address = location.raw['address']
                
                # Build street address
                house_number = address.get('house_number', '')
                road = address.get('road', '')
                street_address = f"{house_number} {road}".strip()
                
                # If no specific street, try other road types
                if not street_address or street_address == ' ':
                    street_address = (
                        address.get('highway') or 
                        address.get('footway') or 
                        address.get('cycleway') or 
                        'Unknown Street'
                    )
                
                # Get neighborhood
                neighborhood = (
                    address.get('neighbourhood') or 
                    address.get('suburb') or 
                    address.get('hamlet') or 
                    address.get('village') or 
                    address.get('town') or 
                    address.get('city_district') or 
                    address.get('quarter') or
                    'Unknown Neighborhood'
                )
                
                return street_address, neighborhood
            
        except (GeocoderTimedOut, GeocoderServiceError):
            if attempt < max_retries - 1:
                time.sleep(2)  # Wait before retry
            
    return "Unknown Street", "Unknown Neighborhood"

def process_zip_file(zip_path, output_csv="detroit_images_classified.csv"):
    """Process all HEIC images in a zip file and classify by street address"""
    
    if not os.path.exists(zip_path):
        print(f"Zip file not found: {zip_path}")
        return None
    
    print(f"Processing zip file: {zip_path}")
    print("Note: Progress shown every 10 images to reduce output...")
    
    # Create temporary directory for processing
    temp_dir = tempfile.mkdtemp()
    all_results = []
    
    try:
        # Extract zip file
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(temp_dir)
        
        # Find all HEIC files recursively
        heic_files = []
        for root, dirs, files in os.walk(temp_dir):
            for file in files:
                if file.lower().endswith(('.heic', '.heif')):
                    heic_files.append(os.path.join(root, file))
        
        print(f"Found {len(heic_files)} HEIC images")
        
        if not heic_files:
            print("No HEIC files found in the zip archive")
            return None, None
        
        # Process each image
        processed_count = 0
        no_gps_count = 0
        
        for i, image_path in enumerate(heic_files, 1):
            filename = os.path.basename(image_path)
            
            # Show progress every 10 images
            if i % 10 == 0 or i == 1:
                print(f"Progress: {i}/{len(heic_files)} images processed...")
            
            # Extract GPS coordinates
            coordinates = extract_gps_from_heic(image_path)
            
            if coordinates['latitude'] and coordinates['longitude']:
                lat = coordinates['latitude']
                lon = coordinates['longitude']
                
                # Get street address and neighborhood (quietly)
                street_address, neighborhood = get_street_address(lat, lon)
                
                result = {
                    'filename': filename,
                    'lat': lat,
                    'lon': lon,
                    'street_address': street_address,
                    'neighborhood': neighborhood,
                    'temp_path': image_path
                }
                
                all_results.append(result)
                processed_count += 1
                
                # Rate limiting - be respectful to free service
                time.sleep(1.5)  # 1.5 second delay between requests
                
            else:
                no_gps_count += 1
                
                # Still add to results with no location data
                result = {
                    'filename': filename,
                    'lat': None,
                    'lon': None,
                    'street_address': 'No GPS Data',
                    'neighborhood': 'No GPS Data',
                    'temp_path': image_path
                }
                all_results.append(result)
        
        # Save results to CSV (without temp_path column)
        if all_results:
            df = pd.DataFrame(all_results)
            df_output = df.drop('temp_path', axis=1)  # Remove temp path from CSV
            df_output.to_csv(output_csv, index=False)
            
            print(f"\n=== PROCESSING COMPLETE ===")
            print(f"Total images: {len(all_results)}")
            print(f"With GPS data: {processed_count}")
            print(f"Without GPS data: {no_gps_count}")
            print(f"Results saved to: {output_csv}")
            
            # Show summary of neighborhoods found
            if processed_count > 0:
                gps_df = df[df['lat'].notna()]
                neighborhood_counts = gps_df['neighborhood'].value_counts()
                
                print(f"\n=== NEIGHBORHOODS FOUND ===")
                for neighborhood, count in neighborhood_counts.head(10).items():
                    print(f"{neighborhood}: {count} images")
                
                if len(neighborhood_counts) > 10:
                    print(f"... and {len(neighborhood_counts) - 10} more neighborhoods")
            
            return df, temp_dir  # Return with temp_path for organizing
        else:
            print("No images were processed successfully")
            return None, None
            
    except Exception as e:
        print(f"Error processing zip file: {e}")
        return None, None

def organize_images_by_neighborhood(df, temp_dir, output_folder="images_by_neighborhood"):
    """Organize images into folders by neighborhood and convert HEIC to JPG"""
    
    print(f"Organizing {len(df)} images into neighborhood folders...")
    print("Converting HEIC files to JPG for better compatibility...")
    
    # Create main output directory
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Get unique neighborhoods and create folders
    neighborhoods = df['neighborhood'].unique()
    neighborhood_stats = {}
    
    for neighborhood in neighborhoods:
        # Clean neighborhood name for folder (remove invalid characters)
        clean_name = "".join(c for c in neighborhood if c.isalnum() or c in (' ', '-', '_')).strip()
        if not clean_name:
            clean_name = "Unknown"
        
        neighborhood_folder = os.path.join(output_folder, clean_name)
        if not os.path.exists(neighborhood_folder):
            os.makedirs(neighborhood_folder)
        
        neighborhood_stats[clean_name] = 0
    
    # Copy and convert images to their neighborhood folders
    converted_count = 0
    missing_count = 0
    
    for _, row in df.iterrows():
        filename = row['filename']
        neighborhood = row['neighborhood']
        source_path = row['temp_path']
        
        # Clean neighborhood name
        clean_name = "".join(c for c in neighborhood if c.isalnum() or c in (' ', '-', '_')).strip()
        if not clean_name:
            clean_name = "Unknown"
        
        # Convert HEIC to JPG
        jpg_filename = os.path.splitext(filename)[0] + '.jpg'
        dest_folder = os.path.join(output_folder, clean_name)
        dest_path = os.path.join(dest_folder, jpg_filename)
        
        # Convert and save as JPG
        if os.path.exists(source_path):
            try:
                # Open HEIC and convert to JPG
                image = Image.open(source_path)
                
                # Convert to RGB if necessary (HEIC might be in other color modes)
                if image.mode != 'RGB':
                    image = image.convert('RGB')
                
                # Save as high-quality JPG
                image.save(dest_path, 'JPEG', quality=95)
                
                neighborhood_stats[clean_name] += 1
                converted_count += 1
                
            except Exception as e:
                print(f"Error converting {filename}: {e}")
        else:
            missing_count += 1
    
    # Create summary report inside the neighborhood folder
    summary_path = os.path.join(output_folder, "organization_summary.txt")
    with open(summary_path, 'w') as f:
        f.write("Detroit Images Organization Summary\n")
        f.write("=" * 40 + "\n\n")
        f.write(f"Total images processed: {len(df)}\n")
        f.write(f"Images successfully converted: {converted_count}\n")
        f.write(f"Missing images: {missing_count}\n")
        f.write(f"Format: HEIC converted to JPG for compatibility\n\n")
        f.write("Images per neighborhood:\n")
        f.write("-" * 25 + "\n")
        for neighborhood, count in sorted(neighborhood_stats.items(), key=lambda x: x[1], reverse=True):
            f.write(f"{neighborhood}: {count} images\n")
    
    # Clean up temporary directory now
    try:
        shutil.rmtree(temp_dir)
    except:
        pass
    
    print(f"\n=== ORGANIZATION COMPLETE ===")
    print(f"Images converted to JPG: {converted_count}")
    print(f"Missing images: {missing_count}")
    print(f"Output folder: {output_folder}")
    print(f"Summary saved to: {summary_path}")
    
    print(f"\n=== NEIGHBORHOOD FOLDERS CREATED ===")
    for neighborhood, count in sorted(neighborhood_stats.items(), key=lambda x: x[1], reverse=True):
        print(f"{neighborhood}: {count} images")
    
    return True

if __name__ == "__main__":
    # Path to your zip file
    zip_path = r"C:\Users\sybox\OneDrive\Desktop\NIJ Detroit\data\Archive_clean.zip"
    
    print("=== DETROIT IMAGES STREET CLASSIFICATION ===")
    print("This script will:")
    print("1. Extract all HEIC images from the zip file")
    print("2. Get GPS coordinates from each image")  
    print("3. Use free reverse geocoding to get street addresses")
    print("4. Classify images by neighborhood")
    print("5. Convert HEIC to JPG and organize into folders")
    print("6. Save results to CSV")
    print("\nNote: This will take 5-10 minutes due to rate limiting on free geocoding service")
    print("="*60)
    
    # Process all images
    results, temp_dir = process_zip_file(zip_path, "detroit_images_classified.csv")
    
    if results is not None:
        print("\n=== ORGANIZING IMAGES BY NEIGHBORHOOD ===")
        
        # Organize images into neighborhood folders
        organize_images_by_neighborhood(results, temp_dir)
        
        print("\n=== FINAL OUTPUT ===")
        print("‚úÖ detroit_images_classified.csv - Classification data")
        print("‚úÖ images_by_neighborhood/ - Organized folders with JPG images")
        print("‚úÖ images_by_neighborhood/organization_summary.txt - Summary report")
        print("\nNote: HEIC files converted to JPG for better compatibility")

=== DETROIT IMAGES STREET CLASSIFICATION ===
This script will:
1. Extract all HEIC images from the zip file
2. Get GPS coordinates from each image
3. Use free reverse geocoding to get street addresses
4. Classify images by neighborhood
5. Convert HEIC to JPG and organize into folders
6. Save results to CSV

Note: This will take 5-10 minutes due to rate limiting on free geocoding service
Processing zip file: C:\Users\sybox\OneDrive\Desktop\NIJ Detroit\data\Archive_clean.zip
Note: Progress shown every 10 images to reduce output...
Found 200 HEIC images
Progress: 1/200 images processed...
Progress: 10/200 images processed...
Progress: 20/200 images processed...
Progress: 30/200 images processed...
Progress: 40/200 images processed...
Progress: 50/200 images processed...
Progress: 60/200 images processed...
Progress: 70/200 images processed...
Progress: 80/200 images processed...
Progress: 90/200 images processed...
Progress: 100/200 images processed...
Progress: 110/200 images processed..

In [30]:
import os
print("Current directory:", os.getcwd())
print("Files in current directory:")
for item in os.listdir():
    print(f"  {item}")

Current directory: C:\Users\sybox\NIJ Detroit
Files in current directory:
  .ipynb_checkpoints
  06.23.Address Convert and Crossroad Detect.ipynb
  06.28.HAWK_NY_Images.ipynb
  07.23.HAWK-NLCD.ipynb
  07.24.Detroit Images Classification.ipynb
  ade20k_starter.ipynb
  Detect Nearby Crossroad.ipynb
  detroit_images_classified.csv
  extracted_images
  HAWK-NY-map.ipynb
  images_by_neighborhood
  Intro-Read Images on Python.ipynb
  Mapillary Images.ipynb
  my_functions.ipynb
  my_functions.py
  Object Detection using YOLOv8.ipynb
  __pycache__
