In [None]:
import os
import cv2
from line_profiler import LineProfiler
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon as MplPolygon
import numpy as np
from tqdm import tqdm
import pandas as pd
import geopandas as gpd
import time
import threading
from tkinter import *
from PIL import Image, ImageTk
import queue
import hashlib
from shapely import wkt
from shapely.geometry import box, Polygon, MultiPolygon
from shapely.geometry import Polygon as ShapelyPolygon
from shapely.affinity import translate


In [None]:
from pathlib import Path
import os

PROJECT_ROOT = Path.cwd()
DATA_ROOT = Path(os.getenv("VSS_DATA_ROOT", PROJECT_ROOT / "data"))
OUTPUT_ROOT = Path(os.getenv("VSS_OUTPUT_ROOT", PROJECT_ROOT / "outputs"))
SDD_ROOT = DATA_ROOT / "source" / "sdd" / "swiss-dwellings-v3.0.0"
PROCESSED_ROOT = DATA_ROOT / "processed" / "sdd_recentered"


In [None]:
def read_avg_csv_with_progress(filepath, chunksize=100):
    chunks = []
    for chunk in tqdm(pd.read_csv(filepath, chunksize=chunksize, usecols=['site_id', 'apartment_id', 'floor_id', 'entity_type', 'entity_subtype', 'geometry'])):
        chunks.append(chunk)

    return pd.concat(chunks, axis=0)

def read_csv_with_progress(filepath, chunksize=100):
    chunks = []
    for chunk in tqdm(pd.read_csv(filepath, chunksize=chunksize, usecols=['site_id', 'apartment_id', 'floor_id', 'entity_type', 'entity_subtype', 'geometry', 'recentered_geometry'])):
        chunks.append(chunk)

    return pd.concat(chunks, axis=0)

def check_encountered_apartment_ids(row_number, apartment_id, encountered_apartment_ids, end_row_number):
    if apartment_id in encountered_apartment_ids or pd.isna(apartment_id):
        return True
    else:
        encountered_apartment_ids.append(apartment_id)
        print(f"\nfound apartment No. {len(encountered_apartment_ids)} @ row {row_number} of {end_row_number}")
        return False

def get_unit_hash(unit_df):
    # Combine the relevant details into one string
    sorted_df = unit_df.sort_values(by=['recentered_geometry', 'entity_subtype'])
    print("sorted")
    combined_string = ''.join([str(geom) for geom in sorted_df['recentered_geometry']]) + ''.join([str(val) for val in sorted_df['entity_subtype']])

    # Create a hash of the combined string
    return hashlib.md5(combined_string.encode()).hexdigest()

def next_available_filename(directory, base_filename):
    i = 1
    while True:
        filename = os.path.join(directory, f'{base_filename}_{str(i).zfill(4)}.png')
        if not os.path.exists(filename):
            return filename
        i += 1

def is_significantly_overlapping(polygon, polygons):
    for existing_polygon in polygons:
        if polygon.intersects(existing_polygon):
            intersection = polygon.intersection(existing_polygon).area
            if intersection/min(polygon.area, existing_polygon.area) > 0.5:
                return True
    return False


In [None]:
def recenter_geometry_avg(group):
    global drawingNo; global sumPts
    # Collect all points from all geometries of the group
    all_points = [point for geometry in group['geometry'] for point in geometry.exterior.coords]

    # Calculate average x and y coordinates
    avg_x = sum(point[0] for point in all_points) / len(all_points)
    avg_y = sum(point[1] for point in all_points) / len(all_points)

    # Calculate translation values
    x_translation = -avg_x
    y_translation = -avg_y
    
    # Apply the translation to every geometry in the group
    group['recentered_geometry'] = group['geometry'].apply(lambda geom: translate(geom, x_translation, y_translation))

    drawingNo += 1; sumPts += len(all_points)
    print(f"\ndrawing No {drawingNo} sucessfully relocated to origin. Moved total {sumPts} points")

    return group

def plot_apartment(apartment_data):
    # Create a new figure and axis
    fig, ax = plt.subplots()

    # Loop through each geometry in the apartment data and plot it
    for _, row in apartment_data.iterrows():
        # Check if the recentered geometry is a polygon and plot
        if isinstance(row['recentered_geometry'], Polygon):
            x, y = row['recentered_geometry'].exterior.xy
            ax.fill(x, y, alpha=0.5)  # Fill the polygon
            ax.plot(x, y, color='black')  # Outline the polygon
        elif isinstance(row['recentered_geometry'], MultiPolygon):
            for polygon in row['recentered_geometry']:
                x, y = polygon.exterior.xy
                ax.fill(x, y, alpha=0.5)  # Fill the polygon
                ax.plot(x, y, color='black')  # Outline the polygon

    # Set equal scaling and show the plot
    ax.set_aspect('equal', 'box')
    plt.show()

# draw single apartements or floors of whole buildings
''' group_id= 'apartement_id' '''
group_id= 'floor_id'

drawingNo = 0; sumPts = 0
csv_path = SDD_ROOT / "geometries.csv" 
print("\nfound extensive csv, reading 32560 pages\n")
df = read_avg_csv_with_progress(csv_path)
print("\ninflating wicked wkt into shapely shapes...")
df['geometry'] = df['geometry'].apply(wkt.loads)

# Group by group_id and apply the recentering
print("\nforcing polygons into polyamory, they all move in together...")
df = df.groupby(group_id).apply(recenter_geometry_avg).reset_index(drop=True)

# Once the entire dataframe has been processed, plot the first apartment
print(f"\nplotting the first {group_id.replace('_id', '')} drawing...")
first_drawing_id = df[group_id].iloc[0]
first_drawing_data = df[df[group_id] == first_drawing_id]
plot_apartment(first_drawing_data)

recentered_csv_path = PROCESSED_ROOT / f"recentered_{group_id.replace('_id', '')}_geometries.csv"

print(df)
print(f"\nsaving this df as recentered_{group_id}_geometries.csv...")

df.to_csv(os.path.join(recentered_csv_path), index=False)
print("goodbye")


In [None]:
def all_bounds(gdf):
    union_of_all = gdf['recentered_geometry'].unary_union
    minx, miny, maxx, maxy = union_of_all.bounds

    print(f"Bounding Box: ({minx}, {miny}, {maxx}, {maxy})")
    return (minx, miny, maxx, maxy)

def percentile_bounds(gdf):
    # Extract all x and y coordinates from the dataset
    all_x_coords = []
    all_y_coords = []

    for geom in gdf['recentered_geometry']:
        if geom.type == 'Polygon':
            x, y = geom.exterior.coords.xy
            all_x_coords.extend(x)
            all_y_coords.extend(y)
        elif geom.type == 'MultiPolygon':
            for polygon in geom:
                x, y = polygon.exterior.coords.xy
                all_x_coords.extend(x)
                all_y_coords.extend(y)

    # Calculate the 5th and 95th percentiles
    minx = np.percentile(all_x_coords, 5)
    maxx = np.percentile(all_x_coords, 95)
    miny = np.percentile(all_y_coords, 5)
    maxy = np.percentile(all_y_coords, 95)

    print(f"Bounding Box (using percentiles): ({minx}, {miny}, {maxx}, {maxy})")
    return (minx, miny, maxx, maxy)

recentered_csv_path = PROCESSED_ROOT / f"recentered_{group_id.replace('_id', '')}_geometries.csv"

# Mise en place
df = read_csv_with_progress(recentered_csv_path)
df['recentered_geometry'] = df['recentered_geometry'].apply(wkt.loads)

gdf = gpd.GeoDataFrame(df, geometry='recentered_geometry')

#minx, miny, maxx, maxy = all_bounds(gdf)
minx, miny, maxx, maxy = percentile_bounds(gdf)

bounding_box_polygon = gpd.GeoSeries({
    'geometry': gpd.GeoDataFrame({'geometry': [box(minx, miny, maxx, maxy)]})['geometry'].unary_union
})

ax = gdf.plot(edgecolor="black", facecolor="none")
bounding_box_polygon.boundary.plot(ax=ax, color="red", linewidth=2)
ax.set_adjustable('box')

# Create a histogram for x-coordinates and y-coordinates
all_x_coords = []
all_y_coords = []

for geom in gdf['recentered_geometry']:
    if geom.type == 'Polygon':
        x, y = geom.exterior.coords.xy
        all_x_coords.extend(x)
        all_y_coords.extend(y)
    elif geom.type == 'MultiPolygon':
        for polygon in geom:
            x, y = polygon.exterior.coords.xy
            all_x_coords.extend(x)
            all_y_coords.extend(y)

# Use twin axes to overlay histograms
ax_xhist = ax.twiny()
ax_yhist = ax.twinx()

# Plot histograms
ax_xhist.hist(all_x_coords, bins=200, orientation='vertical', alpha=0.4, color='blue')
ax_yhist.hist(all_y_coords, bins=200, orientation='horizontal', alpha=0.4, color='green')

# Adjusting the histogram scale if needed
ax_xhist.set_ylim(ax.get_ylim())
ax_yhist.set_xlim(ax.get_xlim())

plt.show()

print("goodbye")

# Bounding box floor drawings (using percentiles): (-14.882310945745425, -15.53997454446431, 14.99565944946157, 15.548318489641803)


In [None]:
group_id= 'floor_id'
recentered_csv_path = PROCESSED_ROOT / f"recentered_{group_id.replace('_id', '')}_geometries.csv"
df_init = read_csv_with_progress(recentered_csv_path)


In [None]:
def FP_renderer(row_number, df, encountered_apartment_ids):
    # Select the row of the dataframe
    curr_row = df.iloc[row_number]

    # Specify the site ID and apartment ID of interest
    site_id_of_interest = curr_row['site_id']
    apartment_id_of_interest = curr_row[group_id]

    # Check if the group_id has already been encountered
    if check_encountered_apartment_ids(row_number, apartment_id_of_interest, encountered_apartment_ids, end_row_number):
        return

    unit_df = df[(df['site_id'] == site_id_of_interest) & (df[group_id] == apartment_id_of_interest)]
    print (unit_df)
    unit_hash = get_unit_hash(unit_df)

    print("\ngenerating unit hash...")
    print (f"\n{unit_hash}")
    print("\ncomparing against hash set...")
    #print(generated_unit_hashes)

    if unit_hash in generated_unit_hashes:
        print('\nalready drawn similar unit and not doing it again...')
        return

    print("\nadding unique hash to set...")
    generated_unit_hashes.add(unit_hash)

    df = df[(df['site_id'] == site_id_of_interest) & (df[group_id] == apartment_id_of_interest)]

    # Parse WKT strings to shapely geometries
    #df.loc[:, 'geometry'] = df['geometry'].apply(wkt.loads)
    df.loc[:, 'recentered_geometry'] = df['recentered_geometry'].apply(wkt.loads)

    # Create a new figure and axis 
    minx = -20
    miny = -20
    maxx = 20
    maxy = 20
    dpi_value = 300
    fig_size_inch = image_size / dpi_value
    fig, ax = plt.subplots(figsize=(fig_size_inch, fig_size_inch))
    ax.set_xlim(minx, maxx)
    ax.set_ylim(miny, maxy)  # adjust as needed

    # Define colors for different types of elements
    '''
    colors = {
        'area': 'white',
        'separator': 'black',
        'opening': 'white',
        'feature': 'gray',
        'BATHROOM': 'lightcyan',
        'LIVING_ROOM': 'lightyellow',
        'BALCONY': 'lavender',
        'CORRIDOR': 'slategrey',
        'ROOM': 'beige',
        'BATHTUB': 'dimgray',
        'SHOWER': 'dimgray',
        'SINK': 'dimgray',
        'TOILET': 'dimgray',
        'KITCHEN': 'plum',
        'RAILING': 'slateblue',
        'WINDOW': 'white',
        'DOOR': 'whitesmoke',
        'ENTRANCE_DOOR': 'darkorchid',
        'DINING': 'salmon',
        'SHAFT': 'saddlebrown',
        'WALL': 'black',
        'STAIRCASE': 'rebeccapurple',
        'STAIRS': 'blueviolet',
        'STOREROOM': 'rosybrown',
        'COLUMN': 'black',
        'BASEMENT_COMPARTMENT': 'rosybrown',
    } 
    '''

    colors = {
        'BATHROOM': '#D3D3D3',          # Light Gray
        'LIVING_ROOM': '#E8E8E8',       # Very Light Gray
        'BALCONY': '#DCDCDC',           # Gainsboro Gray
        'CORRIDOR': '#778899',          # Light Slate Gray
        'ROOM': '#F5F5F5',              # White Smoke
        'BATHTUB': '#696969',           # Dim Gray
        'SHOWER': '#696969',            # Dim Gray
        'SINK': '#696969',              # Dim Gray
        'TOILET': '#696969',            # Dim Gray
        'KITCHEN': '#C0C0C0',           # Silver Gray
        'RAILING': 'dimgray',           # Slate Gray
        'WINDOW': 'gray',            # White
        'DOOR': '#D3D3D3',              # Light Gray
        'ENTRANCE_DOOR': '#D3D3D3',     # Brown Gray
        'DINING': '#E8E8E8',            # Indian Red Gray
        'SHAFT': 'black',             # Saddle Brown Gray
        'WALL': '#000000',              # Black
        'STAIRCASE': 'dimgray',         # Purple Gray
        'STAIRS': 'black',            # Medium Purple Gray
        'STOREROOM': 'dimgray',         # Rosy Brown Gray
        'COLUMN': '#000000',            # Black
        'BASEMENT_COMPARTMENT': '#BC8F8F' # Rosy Brown Gray
    }


    # Store polygons of type 'area'
    area_polygons = []
    global has_overlap
    has_overlap = False

    # First pass to detect if there's significant overlap in 'area' polygons within the entire apartment unit
    for i, row in df.iterrows():
        if row['recentered_geometry'].is_valid and row['recentered_geometry'].geom_type == 'Polygon':
            coords = np.array(row['recentered_geometry'].exterior.coords)
            if len(coords) >= 2:
                # Convert row geometry to shapely polygon
                polygon = ShapelyPolygon(coords)

                # If this row is an 'area', check for overlap
                if row['entity_subtype'] == 'area':
                    if is_significantly_overlapping(polygon, area_polygons):
                        has_overlap = True
                        print("\nMAISONNETTE ALARM \nSignificant overlap detected in the apartment unit. Skipping plotting...")
                        return # exit the loop once overlap is detected
                    else:
                        area_polygons.append(polygon)
    
    #else: # Loop through each row in the apartment data
    for i, row in df.iterrows():
         # Get the color for this type of element
        color = colors.get(row['entity_subtype'], 'white')

        # Check if the geometry is valid and if it's a polygon before plotting
        if row['recentered_geometry'].is_valid and row['recentered_geometry'].geom_type == 'Polygon':
            # Create a patch for the current geometry and add it to the axis
            coords = np.array(row['recentered_geometry'].exterior.coords)
            if len(coords) >= 2:
                patch = MplPolygon(coords, edgecolor=None, facecolor=color, alpha=1)
                ax.add_patch(patch)
            else:
                print(f"\nInvalid geometry for row {i}: {row['recentered_geometry']}")
        else:
            print(f"\nInvalid or non-polygon geometry for row {i}: {row['recentered_geometry']}")

    # Set the aspect of the plot to equal to ensure the floorplan isn't distorted
    ax.set_aspect('equal')

    # Automatically adjust the plot limits to include all elements
    #ax.autoscale_view()

    # Hide the axis
    ax.axis('off')

    # Save the figure to a file before showing it
    out_path = OUTPUT_ROOT / "FP_HD_groundtruth_512"
    filename = next_available_filename(out_path, 'FP')
    plt.savefig(filename, bbox_inches='tight', dpi = dpi_value)

    #bbox_inches='tight', pad_inches=0,

    # Show the plot
    #bookie.show_plt(filename)
    #image = cv2.imread(bookie.get_latest_image(out_path))
    #bookie.show_image(image)

    # Call functions
    #init_outline.get_xray()
    #init_outline.get_contour()

    print("\napartment successfully exported")

# Go
start_row_number = 0000
end_row_number = 200000

image_size = 512

# This set will store the hashes of the units we have already processed
generated_unit_hashes = set()
df = df_init

num_rows = len(df)  # Print the number of rows in the database
print(f"\nTotal number of rows in the database: {num_rows}")

encountered_apartment_ids = []
lp = LineProfiler()
lp_wrapper = lp(FP_renderer)

for row_number in range(start_row_number, end_row_number + 1):
    lp_wrapper(row_number, df, encountered_apartment_ids)

lp.print_stats()
print("\ngoodbye")


In [None]:
import matplotlib.pyplot as plt
from shapely.geometry import Polygon as ShapelyPolygon
from matplotlib.patches import Polygon as MplPolygon
import numpy as np
import os
import cv2
import geopandas as gpd

def FP_renderer(row_number, df, encountered_apartment_ids):
    # Select the row of the dataframe
    curr_row = df.iloc[row_number]

    # Specify the site ID and apartment ID of interest
    site_id_of_interest = curr_row['site_id']
    apartment_id_of_interest = curr_row['group_id']

    # Check if the group_id has already been encountered
    if check_encountered_apartment_ids(row_number, apartment_id_of_interest, encountered_apartment_ids, end_row_number):
        return

    unit_df = df[(df['site_id'] == site_id_of_interest) & (df['group_id'] == apartment_id_of_interest)]
    print(unit_df)
    unit_hash = get_unit_hash(unit_df)

    print("\ngenerating unit hash...")
    print(f"\n{unit_hash}")
    print("\ncomparing against hash set...")

    if unit_hash in generated_unit_hashes:
        print('\nalready drawn similar unit and not doing it again...')
        return

    print("\nadding unique hash to set...")
    generated_unit_hashes.add(unit_hash)

    df = df[(df['site_id'] == site_id_of_interest) & (df['group_id'] == apartment_id_of_interest)]

    # Parse WKT strings to shapely geometries
    df.loc[:, 'recentered_geometry'] = df['recentered_geometry'].apply(wkt.loads)

    # Create a new figure and axis 
    minx = -25.6
    miny = -25.6
    maxx = 25.6
    maxy = 25.6
    dpi_value = 100
    fig_size_inch = image_size / dpi_value
    fig, ax = plt.subplots(figsize=(fig_size_inch, fig_size_inch), dpi=dpi_value)
    ax.set_xlim(minx, maxx)
    ax.set_ylim(miny, maxy)

    colors = {
        'BATHROOM': '#D3D3D3',          # Light Gray
        'LIVING_ROOM': '#E8E8E8',       # Very Light Gray
        'BALCONY': '#DCDCDC',           # Gainsboro Gray
        'CORRIDOR': '#778899',          # Light Slate Gray
        'ROOM': '#F5F5F5',              # White Smoke
        'BATHTUB': '#696969',           # Dim Gray
        'SHOWER': '#696969',            # Dim Gray
        'SINK': '#696969',              # Dim Gray
        'TOILET': '#696969',            # Dim Gray
        'KITCHEN': '#C0C0C0',           # Silver Gray
        'RAILING': 'dimgray',           # Slate Gray
        'WINDOW': 'gray',               # White
        'DOOR': '#D3D3D3',              # Light Gray
        'ENTRANCE_DOOR': '#D3D3D3',     # Brown Gray
        'DINING': '#E8E8E8',            # Indian Red Gray
        'SHAFT': 'black',               # Saddle Brown Gray
        'WALL': '#000000',              # Black
        'STAIRCASE': 'dimgray',         # Purple Gray
        'STAIRS': 'black',              # Medium Purple Gray
        'STOREROOM': 'dimgray',         # Rosy Brown Gray
        'COLUMN': '#000000',            # Black
        'BASEMENT_COMPARTMENT': '#BC8F8F' # Rosy Brown Gray
    }

    # Store polygons of type 'area'
    area_polygons = []
    global has_overlap
    has_overlap = False

    # First pass to detect if there's significant overlap in 'area' polygons within the entire apartment unit
    for i, row in df.iterrows():
        if row['recentered_geometry'].is_valid and row['recentered_geometry'].geom_type == 'Polygon':
            coords = np.array(row['recentered_geometry'].exterior.coords)
            if len(coords) >= 2:
                # Convert row geometry to shapely polygon
                polygon = ShapelyPolygon(coords)

                # If this row is an 'area', check for overlap
                if row['entity_subtype'] == 'area':
                    if is_significantly_overlapping(polygon, area_polygons):
                        has_overlap = True
                        print("\nMAISONNETTE ALARM \nSignificant overlap detected in the apartment unit. Skipping plotting...")
                        return # exit the loop once overlap is detected
                    else:
                        area_polygons.append(polygon)
    
    for i, row in df.iterrows():
         # Get the color for this type of element
        color = colors.get(row['entity_subtype'], 'white')

        # Check if the geometry is valid and if it's a polygon before plotting
        if row['recentered_geometry'].is_valid and row['recentered_geometry'].geom_type == 'Polygon':
            # Create a patch for the current geometry and add it to the axis
            coords = np.array(row['recentered_geometry'].exterior.coords)
            if len(coords) >= 2:
                patch = MplPolygon(coords, edgecolor=None, facecolor=color, alpha=1)
                ax.add_patch(patch)
            else:
                print(f"\nInvalid geometry for row {i}: {row['recentered_geometry']}")
        else:
            print(f"\nInvalid or non-polygon geometry for row {i}: {row['recentered_geometry']}")

    # Set the aspect of the plot to equal to ensure the floorplan isn't distorted
    ax.set_aspect('equal')

    # Hide the axis
    ax.axis('off')

    # Save the figure to a file before showing it
    out_path = OUTPUT_ROOT / "FP_HD_groundtruth_512"
    filename = next_available_filename(out_path, 'FP')
    plt.savefig(filename, bbox_inches='tight', pad_inches=0, dpi=dpi_value)

    print("\napartment successfully exported")

# Go
start_row_number = 0
end_row_number = 200000

image_size = 512

group_id = 'floor_id'

# This set will store the hashes of the units we have already processed
generated_unit_hashes = set()
df = df_init

num_rows = len(df)  # Print the number of rows in the database
print(f"\nTotal number of rows in the database: {num_rows}")

encountered_apartment_ids = []
lp = LineProfiler()
lp_wrapper = lp(FP_renderer)

for row_number in range(start_row_number, end_row_number + 1):
    lp_wrapper(row_number, df, encountered_apartment_ids)

lp.print_stats()
print("\ngoodbye")

