In [18]:
import geopandas as gpd

# Read the file
df = gpd.read_file(r"D:\KUL MSDS 1\GRBcade60d34.dxf")

# Extract rooftops (building footprints)
rooftop_layers = ['GBA1', 'GBA2', 'GBA3', 'GBA4', 'GBA5', 'GBA6', 'GBA11']
df_rooftops = df[df['Layer'].isin(rooftop_layers)]
print(f"Found {len(df_rooftops)} rooftop features")

# Extract solar panels
df_solar = df[df['Layer'] == 'ZPT']
print(f"Found {len(df_solar)} solar panel features")

# garages/small structures (optional)
df_garages = df[df['Layer'].isin(['GBG1', 'GBG2', 'GBG3'])]
print(f"Found {len(df_garages)} garage/small structure features")

Found 8647 rooftop features
Found 8369 solar panel features
Found 43043 garage/small structure features


In [31]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon, Point
import numpy as np

# Read the file
df = gpd.read_file(r"D:\KUL MSDS 1\GRBcade60d34.dxf")

# Extract rooftops and solar panels
rooftop_layers = ['GBA1', 'GBA2', 'GBA3', 'GBA4', 'GBA5', 'GBA6', 'GBA11']
df_rooftops = df[df['Layer'].isin(rooftop_layers)].copy()
df_solar = df[df['Layer'] == 'ZPT'].copy()

print(f"Rooftop features: {len(df_rooftops)} LineStrings")
print(f"Solar panel features: {len(df_solar)} Points")

# Process rooftops
def linestrings_to_polygons(gdf):
    polygons = []
    valid_indices = []
    
    for idx, geometry in enumerate(gdf.geometry):
        try:
            if geometry.geom_type == 'LineString':
                coords = list(geometry.coords)
                if len(coords) >= 3:
                    if coords[0] != coords[-1]:
                        coords.append(coords[0])
                    polygon = Polygon(coords)
                    if polygon.is_valid and polygon.area > 0:
                        polygons.append(polygon)
                        valid_indices.append(idx)
        except:
            continue
    
    if polygons:
        result_gdf = gdf.iloc[valid_indices].copy()
        result_gdf.geometry = polygons
        return result_gdf
    else:
        return gdf.iloc[0:0].copy()

# Process solar panels 
def process_solar_panels_honest(gdf):
    solar_gdf = gdf.copy()
    solar_gdf['centroid_x'] = solar_gdf.geometry.x
    solar_gdf['centroid_y'] = solar_gdf.geometry.y
    return solar_gdf

# Convert rooftops to polygons and calculate areas
print("\nConverting rooftop LineStrings to Polygons...")
rooftop_polygons = linestrings_to_polygons(df_rooftops)

if len(rooftop_polygons) > 0:
    rooftop_polygons['area_original'] = rooftop_polygons.geometry.area
    rooftop_polygons['centroid_x'] = rooftop_polygons.geometry.centroid.x
    rooftop_polygons['centroid_y'] = rooftop_polygons.geometry.centroid.y
    print(f"Successfully converted {len(rooftop_polygons)} rooftops to polygons")

# Process solar panels honestly
solar_panels = process_solar_panels_honest(df_solar)

# Sort by area and get top 50
rooftop_polygons_sorted = rooftop_polygons.sort_values('area_original', ascending=False)
top_50_rooftops = rooftop_polygons_sorted.head(50)

# Display the 50 LARGEST rooftops
if len(top_50_rooftops) > 0:
    print(f"\n" + "="*80)
    print(f"TOP 50 LARGEST ROOFTOPS BY AREA")
    print(f"="*80)
    print(top_50_rooftops[['Layer', 'area_original', 'centroid_x', 'centroid_y']].to_string())
    
    print(f"\n" + "="*80)
    print(f"SUMMARY OF TOP 50 LARGEST ROOFTOPS")
    print(f"="*80)
    print(f"Total area of top 50 rooftops: {top_50_rooftops['area_original'].sum():.2f} units")
    print(f"Average area of top 50: {top_50_rooftops['area_original'].mean():.2f} units")
    print(f"Largest rooftop area: {top_50_rooftops['area_original'].max():.2f} units")
    print(f"Smallest of top 50: {top_50_rooftops['area_original'].min():.2f} units")
    
    # Breakdown by layer/building type
    print(f"\nBuilding type distribution in top 50:")
    print(top_50_rooftops['Layer'].value_counts())

# Show solar panel locations (without fake areas)
print(f"\n" + "="*80)
print(f"SOLAR PANEL LOCATIONS (Point data - no area information)")
print(f"="*80)
print(f"Total solar panel installations: {len(solar_panels)}")
print(f"First 50 solar panel locations:")
print(solar_panels[['Layer', 'centroid_x', 'centroid_y']].head(50).to_string())

# Overall statistics
if len(rooftop_polygons) > 0:
    print(f"\n" + "="*80)
    print(f"OVERALL ROOFTOP STATISTICS")
    print(f"="*80)
    print(f"Total rooftops analyzed: {len(rooftop_polygons)}")
    print(f"Total rooftop area: {rooftop_polygons['area_original'].sum():.2f} units")
    print(f"Average rooftop area: {rooftop_polygons['area_original'].mean():.2f} units")
    print(f"Top 50 represent {len(top_50_rooftops)/len(rooftop_polygons)*100:.1f}% of rooftops")
    print(f"Top 50 represent {top_50_rooftops['area_original'].sum()/rooftop_polygons['area_original'].sum()*100:.1f}% of total rooftop area")

# Show the absolute largest rooftops
if len(top_50_rooftops) > 10:
    print(f"\n" + "="*80)
    print(f"TOP 10 LARGEST ROOFTOPS")
    print(f"="*80)
    print(top_50_rooftops[['Layer', 'area_original', 'centroid_x', 'centroid_y']].head(10).to_string())

Rooftop features: 8647 LineStrings
Solar panel features: 8369 Points

Converting rooftop LineStrings to Polygons...
Successfully converted 8647 rooftops to polygons

TOP 50 LARGEST ROOFTOPS BY AREA
        Layer  area_original     centroid_x     centroid_y
122469   GBA5   12854.808606  171120.443394  174145.711126
120715   GBA2   12073.930168  174454.511966  174620.069110
116656   GBA1    3540.792784  172713.353486  173253.268886
116610   GBA1    3196.358985  171781.675850  172722.792739
117538   GBA1    3066.753625  171616.707089  172629.394734
122584   GBA5    2785.481673  171119.452409  172760.463901
116833   GBA1    2550.955973  171373.070202  174600.029771
117552   GBA1    2023.814685  174941.811436  170612.291758
117021   GBA1    1999.890752  173834.374215  176577.723339
116523   GBA1    1813.832453  171791.133361  173023.835248
117440   GBA1    1737.387120  173797.109065  175152.011520
117539   GBA1    1635.465657  171611.968748  172632.704289
117441   GBA1    1326.981323  17379