<a href="https://colab.research.google.com/github/gbrlpzz/liminal-ai-prompts/blob/main/encouraging_reuse_full_gpkg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports

In [None]:
import os
import json
import csv
import re
from PIL import Image
import google.generativeai as genai
from google.colab import userdata
import time
import concurrent.futures
import requests
from requests.exceptions import RequestException
import geopandas as gpd
import pandas as pd

print(f"google-generativeai version: {genai.__version__}")

google-generativeai version: 0.7.2


Load Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


API Key

In [None]:
GOOGLE_API_KEY = 'AIzaSyCYamOiF02n84lLXXV-H7mVLWmDOfTQd7M'
genai.configure(api_key=GOOGLE_API_KEY)
print(f"API Key (first 5 chars): {GOOGLE_API_KEY[:5]}...")

API Key (first 5 chars): AIzaS...


Test Connection and Load Model List

In [None]:
try:
    models = list(genai.list_models())
    print(f"Number of models available: {len(models)}")
    for model in models:
        print(f"- {model.name}")
except Exception as e:
    print(f"Error listing models: {str(e)}")

Number of models available: 20
- models/chat-bison-001
- models/text-bison-001
- models/embedding-gecko-001
- models/gemini-1.0-pro-latest
- models/gemini-1.0-pro
- models/gemini-pro
- models/gemini-1.0-pro-001
- models/gemini-1.0-pro-vision-latest
- models/gemini-pro-vision
- models/gemini-1.5-pro-latest
- models/gemini-1.5-pro-001
- models/gemini-1.5-pro
- models/gemini-1.5-pro-exp-0801
- models/gemini-1.5-flash-latest
- models/gemini-1.5-flash-001
- models/gemini-1.5-flash
- models/gemini-1.5-flash-001-tuning
- models/embedding-001
- models/text-embedding-004
- models/aqa


Load Model

In [None]:
try:
    model = genai.GenerativeModel('gemini-1.5-pro-latest')
    print("Model initialized successfully")
except Exception as e:
    print(f"Error initializing model: {str(e)}")

Model initialized successfully


Analyze Image

In [None]:
def analyze_image_with_retry(image, image_path, max_retries=3, timeout=60):
    print(f"  Analyzing image: {os.path.basename(image_path)}")
    for attempt in range(max_retries):
        try:
            model = genai.GenerativeModel('gemini-1.5-pro-latest')
            prompt = """
            Analyze this image of a building and answer the following questions. For each question, choose ONLY from the provided options without providing any additional text:

            1. Person present: FALSE or TRUE
            2. Dogs/Pets present: FALSE or TRUE
            3. Livestock present: FALSE or TRUE
            4. Sounds present inside: FALSE or TRUE
            5. Lights present inside: FALSE or TRUE
            6. Collapsed Roof: FALSE or TRUE
            7. Windows: "No - Close (if all)" or "Yes - Open (if any)" or "None"
            8. Window Broken: "None" or "True (if any broken)" or "False (if all windows intact)"
            9. Shutters: "None" or "Close (if all)" or "Open (if any)"
            10. Doorway Curtains: "None" or "Unsecured" or "Secured (if any)"
            11. For Sale Sign: FALSE or TRUE
            12. Civic Number Condition: "None" or "Decorative / Tiled (if any)" or "Not Updated"
            13. Chimney Smoke: FALSE or TRUE
            14. Vehicle present in garage or private driveway: FALSE or TRUE
            15. Hanging laundry: FALSE or TRUE
            16. Personal Belongings Outside: "None" or "Good Condition" or "Mixed Condition" or "Bad Condition"
            17. Mail accumulation: "None" or "Minimal" or "Extensive"
            18. Plants / Gardens: "None" or "Well maintained (any)" or "Overgrown/Dead"
            19. Perceived Balcony Damage: "Mild Damage (cracking in plaster in facade)" or "No balcony" or "Severe Damage (visible rebar)" or "No Damage (good condition)"
            20. Perceived Private Staircase Damage: "No private staircases" or "Severe Damage (missing stairs)" or "No Damage (good condition)" or "Mild Damage (rusted railings)"
            21. Perceived Shutter Damage: "No Shutters" or "No Damage (good condition)" or "Mild Damage (needs to be repainted)" or "Severe Damage (rotting)"
            22. Door Board: "None" or "Good Condition (any)" or "Bad Condition"

            Provide your answers as a numbered list, using ONLY the exact phrases from the options given. DO NOT write any additional text, introduction or explanation.
            For example: 22. Door Board: "None"
            """
            print(f"  Sending API request for {os.path.basename(image_path)} (Attempt {attempt + 1}/{max_retries})")

            with concurrent.futures.ThreadPoolExecutor() as executor:
                future = executor.submit(model.generate_content, [prompt, image])
                response = future.result(timeout=timeout)
                print(f"  API request completed for {os.path.basename(image_path)}")
                return response.text
        except concurrent.futures.TimeoutError:
            print(f"  API request timed out for {os.path.basename(image_path)} (Attempt {attempt + 1}/{max_retries})")
        except RequestException as e:
            print(f"  Network error for {os.path.basename(image_path)}: {str(e)} (Attempt {attempt + 1}/{max_retries})")
        except Exception as e:
            print(f"  Error analyzing image {os.path.basename(image_path)}: {str(e)} (Attempt {attempt + 1}/{max_retries})")

        if attempt < max_retries - 1:
            wait_time = 2 ** attempt  # Exponential backoff
            print(f"  Retrying in {wait_time} seconds...")
            time.sleep(wait_time)

    print(f"  Failed to analyze image {os.path.basename(image_path)} after {max_retries} attempts")
    return None

Assign the image path and start the analysis

In [None]:
def process_analysis_result(result_string):
    if result_string is None:
        return {}
    lines = result_string.split('\n')
    result_dict = {}
    for line in lines:
        parts = line.split(': ', 1)
        if len(parts) == 2:
            key = parts[0].strip().split('. ', 1)[-1]
            value = parts[1].strip().strip('"')
            result_dict[key] = value
    return result_dict

In [None]:
def calculate_occupancy_rating(result_dict):
    point_system = {
        "Person present": {"FALSE": 0, "TRUE": 4},
        "Dogs/Pets present": {"FALSE": 0, "TRUE": 3},
        "Livestock present": {"FALSE": 0, "TRUE": 2},
        "Sounds present inside": {"FALSE": 0, "TRUE": 2},
        "Lights present inside": {"FALSE": 0, "TRUE": 3},
        "Collapsed Roof": {"FALSE": 0, "TRUE": -6},
        "Windows": {"No - Close (if all)": 0, "Yes - Open (if any)": 2, "None": -3},
        "Window Broken": {"None": 0, "True (if any broken)": -2, "False (if all windows intact)": 1},
        "Shutters": {"None": 0, "Close (if all)": -1, "Open (if any)": 2},
        "Doorway Curtains": {"None": 0, "Unsecured": 2, "Secured (if any)": 2},
        "For Sale Sign": {"FALSE": 0, "TRUE": -2},
        "Address Number Condition": {"None": 0, "Decorative / Tiled (if any)": 1, "Not Updated": 0},
        "Chimney Smoke": {"FALSE": 0, "TRUE": 3},
        "Vehicle present in garage or private driveway": {"FALSE": 0, "TRUE": 2},
        "Hanging laundry": {"FALSE": 0, "TRUE": 4},
        "Personal Belongings Outside": {"None": -1, "Good Condition": 3, "Mixed Condition": 2, "Bad Condition": -1},
        "Mail accumulation": {"None": 0, "Minimal": -1, "Extensive": -1},
        "Plants / Gardens": {"None": 0, "Well maintained (any)": 1, "Overgrown/Dead": -2},
        "Perceived Balcony Damage": {"Mild Damage (cracking in plaster in facade)": 0, "No balcony": 0, "Severe Damage (visible rebar)": -3, "No Damage (good condition)": 2},
        "Perceived Private Staircase Damage": {"No private staircases": 0, "Severe Damage (missing stairs)": -2, "No Damage (good condition)": 1, "Mild Damage (rusted railings)": 0},
        "Perceived Shutter Damage": {"No Shutters": 0, "No Damage (good condition)": 1, "Mild Damage (needs to be repainted)": 0, "Severe Damage (rotting)": -2},
        "Door Board": {"None": 0, "Good Condition (any)": 1, "Bad Condition": -2}
    }
    total_points = sum(point_system[key].get(value, 0) for key, value in result_dict.items() if key in point_system)
    return total_points

In [None]:
def process_building(building_folder):
    print(f"\nProcessing building: {os.path.basename(building_folder)}")
    results = []
    image_count = 0
    for filename in os.listdir(building_folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
            image_count += 1
            image_path = os.path.join(building_folder, filename)
            print(f"  Opening image: {filename}")
            try:
                image = Image.open(image_path)
                print(f"  Image opened successfully: {filename}")
                analysis_result = analyze_image_with_retry(image, image_path)
                if analysis_result:
                    processed_result = process_analysis_result(analysis_result)
                    results.append(processed_result)
                else:
                    print(f"  Skipping image due to analysis failure: {filename}")
            except Exception as e:
                print(f"  Error processing image {filename}: {str(e)}")

            time.sleep(1)

    print(f"Completed analysis of {image_count} images for building {os.path.basename(building_folder)}")
    return results

In [None]:
def synthesize_building_results(results, building_id):
    print(f"Synthesizing results for building: {building_id}")
    synthesized = {"Building ID": building_id}

    if not results:
        print(f"No results to synthesize for building {building_id}")
        return synthesized

    for key in results[0].keys():
        if key == "Building ID":
            continue

        values = [result.get(key) for result in results if result.get(key)]

        if key in ["Person present", "Dogs/Pets present", "Livestock present", "Sounds present inside",
                   "Lights present inside", "Collapsed Roof", "For Sale Sign", "Chimney Smoke",
                   "Vehicle present in garage or private driveway", "Hanging laundry"]:
            synthesized[key] = "TRUE" if "TRUE" in values else "FALSE"

        elif key in ["Windows", "Shutters"]:
            if "Yes - Open (if any)" in values or "Open (if any)" in values:
                synthesized[key] = "Yes - Open (if any)" if key == "Windows" else "Open (if any)"
            elif "No - Close (if all)" in values or "Close (if all)" in values:
                synthesized[key] = "No - Close (if all)" if key == "Windows" else "Close (if all)"
            else:
                synthesized[key] = "None"

        elif key == "Window Broken":
            if "True (if any broken)" in values:
                synthesized[key] = "True (if any broken)"
            elif "False (if all windows intact)" in values:
                synthesized[key] = "False (if all windows intact)"
            else:
                synthesized[key] = "None"

        elif key in ["Doorway Curtains", "Personal Belongings Outside", "Mail accumulation",
                     "Plants / Gardens", "Perceived Balcony Damage", "Perceived Private Staircase Damage",
                     "Perceived Shutter Damage", "Door Board"]:
            value_priority = {
                "Doorway Curtains": ["Secured (if any)", "Unsecured", "None"],
                "Personal Belongings Outside": ["Good Condition", "Mixed Condition", "Bad Condition", "None"],
                "Mail accumulation": ["Extensive", "Minimal", "None"],
                "Plants / Gardens": ["Well maintained (any)", "Overgrown/Dead", "None"],
                "Perceived Balcony Damage": ["Severe Damage (visible rebar)", "Mild Damage (cracking in plaster in facade)", "No Damage (good condition)", "No balcony"],
                "Perceived Private Staircase Damage": ["Severe Damage (missing stairs)", "Mild Damage (rusted railings)", "No Damage (good condition)", "No private staircases"],
                "Perceived Shutter Damage": ["Severe Damage (rotting)", "Mild Damage (needs to be repainted)", "No Damage (good condition)", "No Shutters"],
                "Door Board": ["Bad Condition", "Good Condition (any)", "None"]
            }
            for priority_value in value_priority[key]:
                if priority_value in values:
                    synthesized[key] = priority_value
                    break
            else:
                synthesized[key] = "None"  # Default if no priority value is found

        else:  # For any other keys not specifically handled
            synthesized[key] = max(set(values), key=values.count) if values else "None"

    occupancy_rating = calculate_occupancy_rating(synthesized)
    synthesized["Occupancy Rating"] = occupancy_rating

    print(f"Synthesis complete for building {building_id}. Occupancy Rating: {occupancy_rating}")
    return synthesized

In [None]:
def process_all_buildings(parent_folder):
    all_building_results = []
    building_count = 0
    processed_count = 0
    skipped_count = 0
    for building_folder in os.listdir(parent_folder):
        building_path = os.path.join(parent_folder, building_folder)
        if os.path.isdir(building_path):
            building_count += 1
            json_path = f"{building_path}_analysis.json"

            if os.path.exists(json_path):
                print(f"Skipping building {building_folder} - JSON file already exists.")
                skipped_count += 1
                # Load existing JSON and add to results
                with open(json_path, "r") as f:
                    existing_result = json.load(f)
                all_building_results.append(existing_result)
            else:
                building_results = process_building(building_path)
                synthesized_result = synthesize_building_results(building_results, building_folder)
                all_building_results.append(synthesized_result)

                # Save individual building result as JSON
                with open(json_path, "w") as f:
                    json.dump(synthesized_result, f, indent=2)
                print(f"Saved analysis for building {building_folder} to {json_path}")
                processed_count += 1

    print(f"Completed analysis of {building_count} buildings")
    print(f"Processed: {processed_count}, Skipped: {skipped_count}")
    return all_building_results

In [None]:
def create_csv(all_building_results, output_file):
    if not all_building_results:
        print("No results to write to CSV.")
        return

    # Collect all keys from the dictionaries to form the header of the CSV
    keys = set()
    for result in all_building_results:
        keys.update(result.keys())

    # Create an absolute path for the output file
    output_path = os.path.abspath(output_file)

    # Write the CSV file
    with open(output_path, 'w', newline='') as csv_file:
        dict_writer = csv.DictWriter(csv_file, fieldnames=list(keys))
        dict_writer.writeheader()
        dict_writer.writerows(all_building_results)

    print(f"CSV file created: {output_path}")
    return output_path  # Return the path for use in the main function


In [None]:
def main(parent_folder):
    start_time = time.time()
    print(f"Starting analysis of buildings in {parent_folder}")
    all_building_results = process_all_buildings(parent_folder)
    csv_file = "building_analysis_results.csv"
    csv_path = create_csv(all_building_results, csv_file)
    end_time = time.time()
    duration = end_time - start_time
    print(f"Analysis complete. Results saved in {csv_path}")
    print(f"Total execution time: {duration:.2f} seconds")

    # Optionally, you can also copy the file to Google Drive
    drive_path = "/content/drive/MyDrive/Encouraging Reuse 2024: Fontainemore/TEST/"
    drive_csv_path = os.path.join(drive_path, csv_file)
    os.system(f"cp {csv_path} {drive_csv_path}")
    print(f"CSV file also copied to: {drive_csv_path}")
    return drive_csv_path

In [None]:
folder_path = "/content/drive/MyDrive/Encouraging Reuse 2024: Fontainemore/TEST"
main(folder_path)

Starting analysis of buildings in /content/drive/MyDrive/Encouraging Reuse 2024: Fontainemore/TEST
Skipping building 77 - JSON file already exists.
Skipping building 83 - JSON file already exists.
Skipping building 85 - JSON file already exists.
Skipping building 79 - JSON file already exists.
Skipping building 95 - JSON file already exists.
Skipping building 100 - JSON file already exists.
Skipping building EncouragingReuse_Analysis_Fontainemore_ESPG23032_20240803 - JSON file already exists.
Skipping building .ipynb_checkpoints - JSON file already exists.
Completed analysis of 8 buildings
Processed: 0, Skipped: 8
CSV file created: /content/building_analysis_results.csv
Analysis complete. Results saved in /content/building_analysis_results.csv
Total execution time: 0.07 seconds
CSV file also copied to: /content/drive/MyDrive/Encouraging Reuse 2024: Fontainemore/TEST/building_analysis_results.csv


'/content/drive/MyDrive/Encouraging Reuse 2024: Fontainemore/TEST/building_analysis_results.csv'

In [None]:
import os
import pandas as pd
import geopandas as gpd
import re

def extract_building_id(value):
    if pd.isna(value):
        return None
    # Convert to string if not already
    value_str = str(value)
    # Extract the last number from the string
    match = re.search(r'(\d+)(?!.*\d)', value_str)
    return match.group(1) if match else None

def join_shapefile_with_csv(shapefile_path, shapefile_join_field, csv_path, output_gpkg_path):
    # Load the shapefile
    print(f"Loading shapefile from: {shapefile_path}")
    gdf = gpd.read_file(shapefile_path)
    print(f"Shapefile loaded. Number of features: {len(gdf)}")

    # Load the CSV
    print(f"Loading CSV from: {csv_path}")
    df = pd.read_csv(csv_path)
    df = df[df['Building ID'] != '.ipynb_checkpoints']  # Remove .ipynb_checkpoints entry
    print(f"CSV loaded. Number of rows: {len(df)}")

    # Print info about the join field
    print(f"\nJoin field '{shapefile_join_field}' info:")
    print(gdf[shapefile_join_field].info())
    print("\nSample values:")
    print(gdf[shapefile_join_field].head())

    # Extract Building ID from shapefile join field
    gdf['Extracted_ID'] = gdf[shapefile_join_field].apply(extract_building_id)

    # Convert join fields to string in both dataframes
    gdf['Extracted_ID'] = gdf['Extracted_ID'].astype(str)
    df['Building ID'] = df['Building ID'].astype(str)

    print(f"\nExtracted ID from shapefile data type: {gdf['Extracted_ID'].dtype}")
    print(f"Building ID in CSV data type: {df['Building ID'].dtype}")

    # Print unique values and their counts
    print("\nUnique extracted IDs in shapefile:")
    print(gdf['Extracted_ID'].value_counts().head())
    print("\nUnique values in CSV Building ID:")
    print(df['Building ID'].value_counts())

    # Perform the join
    print(f"\nJoining data on fields: 'Extracted_ID' (shapefile) and 'Building ID' (CSV)")
    gdf_joined = gdf.merge(df, left_on='Extracted_ID', right_on='Building ID', how='left')

    # Check for multiple matches
    multiple_matches = gdf_joined[gdf_joined.duplicated(subset=['Extracted_ID'], keep=False)]
    if not multiple_matches.empty:
        print(f"Warning: {len(multiple_matches)} rows in the result have multiple matches.")
        print("Objects with multiple matches:")
        print(multiple_matches['Extracted_ID'].value_counts().head())

    print(f"Join complete. Number of features in result: {len(gdf_joined)}")

    # Check for unmatched rows in shapefile
    unmatched = gdf_joined[gdf_joined['Building ID'].isna()]
    if not unmatched.empty:
        print(f"\nWarning: {len(unmatched)} rows in the shapefile did not match any CSV entries.")
        print("First few unmatched values from shapefile:")
        print(unmatched['Extracted_ID'].head())

    # Check for CSV rows that didn't match any shapefile object
    matched_ids = set(gdf_joined['Building ID'].dropna())
    unmatched_csv = df[~df['Building ID'].isin(matched_ids)]
    if not unmatched_csv.empty:
        print(f"\nWarning: {len(unmatched_csv)} rows in the CSV did not match any shapefile objects.")
        print("Unmatched Building IDs from CSV:")
        print(unmatched_csv['Building ID'].tolist())

    # Ensure the GeoDataFrame has the desired CRS (EPSG:3004)
    gdf_joined = gdf_joined.to_crs(epsg=3004)
    print(f"\nGeoDataFrame CRS set to: EPSG:3004")

    # Save as GeoPackage
    print(f"\nSaving joined data as GeoPackage to: {output_gpkg_path}")
    gdf_joined.to_file(output_gpkg_path, driver='GPKG')
    print("GeoPackage saved successfully.")

    return output_gpkg_path

In [None]:
# Input parameters
shapefile_folder = "/content/drive/MyDrive/Encouraging Reuse 2024: Fontainemore/TEST"
shapefile_name = "EncouragingReuse_Analysis_Fontainemore_ESPG23032_20240803"
shapefile_join_field = "cd_survey2"  # Replace with the correct field name if different
output_gpkg_name = "joined_analysis_results.gpkg"

# Construct full paths
shapefile_path = os.path.join(shapefile_folder, shapefile_name)
drive_path = "/content/drive/MyDrive/Encouraging Reuse 2024: Fontainemore/TEST/"
drive_csv_path = "/content/building_analysis_results.csv"
output_gpkg_path = os.path.join(shapefile_folder, output_gpkg_name)

# Run the join operation
result_path = join_shapefile_with_csv(shapefile_path, shapefile_join_field, drive_csv_path, output_gpkg_path)
print(f"Operation complete. Result saved to: {result_path}")

Loading shapefile from: /content/drive/MyDrive/Encouraging Reuse 2024: Fontainemore/TEST/EncouragingReuse_Analysis_Fontainemore_ESPG23032_20240803
Shapefile loaded. Number of features: 1787
Loading CSV from: /content/building_analysis_results.csv
CSV loaded. Number of rows: 7

Join field 'cd_survey2' info:
<class 'pandas.core.series.Series'>
RangeIndex: 1787 entries, 0 to 1786
Series name: cd_survey2
Non-Null Count  Dtype
--------------  -----
1787 non-null   int64
dtypes: int64(1)
memory usage: 14.1 KB
None

Sample values:
0    1
1    2
2    3
3    4
4    5
Name: cd_survey2, dtype: int64

Extracted ID from shapefile data type: object
Building ID in CSV data type: object

Unique extracted IDs in shapefile:
Extracted_ID
1889    4
59      4
117     4
1430    4
702     4
Name: count, dtype: int64

Unique values in CSV Building ID:
Building ID
77                                                           1
83                                                           1
85                    