# Custom Tool

exported as a .shp file. Made sure to keep all the other subfiles together with the shape file in the same folder.

Use cdp kernel

In [1]:
# the bare minimum
import matplotlib.pyplot as plt  # for plotting
import geopandas as gpd  # for geospatial data handling
from matplotlib.lines import Line2D

# more advanced
from lonboard._map import Map
from lonboard._layer import PolygonLayer  # for mapping in 3D
from lonboard.colormap import (
    apply_categorical_cmap,
    apply_continuous_cmap,
)  # for assigning colors
from palettable.colorbrewer.sequential import PuRd_9  # for color palettes
from matplotlib.colors import LogNorm  # for logarithmic normalization
import pygwalker as pyg  # for creating interactive data visualizations

# Load data


In [10]:
nycod = gpd.read_file(r"C:\Users\drmim\Documents\CDP Summer 2025\Comp Modeling T F\pdr_CustomTool_CompModelingFinal\nyc_mappluto_25v2_shp")

# Basic exploration


previewing the data

In [11]:
nycod.head()

Unnamed: 0,Borough,Block,Lot,CD,BCT2020,BCTCB2020,CT2010,CB2010,SchoolDist,Council,...,FIRM07_FLA,PFIRM15_FL,Version,DCPEdited,Latitude,Longitude,Notes,Shape_Leng,Shape_Area,geometry
0,MN,1,10,101,1000500,10005000003,5,1000,2,1,...,1,1,25v2,,40.688774,-74.018704,,0.0,7414502.0,"POLYGON ((980898.728 191409.779, 980881.798 19..."
1,MN,3,1,101,1031900,10319001003,319,1001,2,1,...,1,1,25v2,t,40.702806,-74.015631,,0.0,921410.5,"MULTIPOLYGON (((979928.232 195976.551, 980006...."
2,MN,3,10,101,1031900,10319001006,319,1003,2,1,...,1,1,25v2,,40.700918,-74.014444,,0.0,42164.7,"POLYGON ((980198.105 194743.552, 980217.967 19..."
3,MN,5,10,101,1000900,10009001014,9,1017,2,1,...,1,1,25v2,,40.702776,-74.010722,,0.0,59912.94,"POLYGON ((981444.514 195254.036, 981143.712 19..."
4,MN,5,7501,101,1000900,10009001014,9,1017,2,1,...,1,1,25v2,,40.702266,-74.010607,,0.0,55990.21,"POLYGON ((981480.323 195094.261, 981420.026 19..."


checking column names to later reference

In [12]:
nycod.columns

Index(['Borough', 'Block', 'Lot', 'CD', 'BCT2020', 'BCTCB2020', 'CT2010',
       'CB2010', 'SchoolDist', 'Council', 'ZipCode', 'FireComp', 'PolicePrct',
       'HealthCent', 'HealthArea', 'Sanitboro', 'SanitDistr', 'SanitSub',
       'Address', 'ZoneDist1', 'ZoneDist2', 'ZoneDist3', 'ZoneDist4',
       'Overlay1', 'Overlay2', 'SPDist1', 'SPDist2', 'SPDist3', 'LtdHeight',
       'SplitZone', 'BldgClass', 'LandUse', 'Easements', 'OwnerType',
       'OwnerName', 'LotArea', 'BldgArea', 'ComArea', 'ResArea', 'OfficeArea',
       'RetailArea', 'GarageArea', 'StrgeArea', 'FactryArea', 'OtherArea',
       'AreaSource', 'NumBldgs', 'NumFloors', 'UnitsRes', 'UnitsTotal',
       'LotFront', 'LotDepth', 'BldgFront', 'BldgDepth', 'Ext', 'ProxCode',
       'IrrLotCode', 'LotType', 'BsmtCode', 'AssessLand', 'AssessTot',
       'ExemptTot', 'YearBuilt', 'YearAlter1', 'YearAlter2', 'HistDist',
       'Landmark', 'BuiltFAR', 'ResidFAR', 'CommFAR', 'FacilFAR', 'BoroCode',
       'BBL', 'CondoNo', 'Tract2

checking data types of each column

## Cleaning Data


In [5]:
nycod.Borough.value_counts()

Borough
QN    324168
BK    275745
SI    125414
BX     89268
MN     42099
Name: count, dtype: int64

In [13]:
# Keep only Manhattan
nycod = nycod[nycod['Borough'] == 'MN']

In [14]:
# Just peek at structure without loading everything
print(f"Dataset info:")
print(f"- Rows: {len(nycod):,}")
print(f"- Columns: {len(nycod.columns)}")
print(f"- Memory usage: {nycod.memory_usage(deep=True).sum() / 1024**2:.1f} MB")

Dataset info:
- Rows: 42,099
- Columns: 95
- Memory usage: 94.1 MB


In [15]:
# Keep columns relevant for material concentration analysis
material_analysis_columns = [
    # Location & Administrative
    'Borough',           # Geographic area
    'CD',               # Community District (neighborhood proxy)
    'ZipCode',          # Another neighborhood identifier
    'Address',          # Specific location
    
    # Building Classification & Use
    'BldgClass',        # Building class (very important for material type)
    'LandUse',          # Land use category
    'ZoneDist1',        # Primary zoning (residential vs commercial)
    'ZoneDist2',        # Secondary zoning
    
    # Building Physical Characteristics
    'LotArea',          # Lot size (sq ft)
    'BldgArea',         # Total building area (sq ft)
    'LotFront',         # Building frontage
    'LotDepth',         # Lot depth
    'BldgFront',        # Building frontage
    'BldgDepth',        # Building depth
    'NumBldgs',         # Number of buildings on lot
    'NumFloors',        # Height indicator (floors)
    'LtdHeight',        # Height limitations
    'BBL',
    
    # Building Areas by Use (for material assumptions)
    'ResArea',          # Residential area (likely concrete/wood)
    'ComArea',          # Commercial area (likely steel/concrete)
    'OfficeArea',       # Office area (likely steel)
    'RetailArea',       # Retail area
    'FactryArea',       # Factory area (likely steel)
    'GarageArea',       # Garage area
    
    # Age & Construction History
    'YearBuilt',        # Original construction year (key for material types)
    'YearAlter1',       # First alteration
    'YearAlter2',       # Second alteration
    
    # Units & Density
    'UnitsRes',         # Residential units
    'UnitsTotal',       # Total units
    
    # Building Ratios (density indicators)
    'BuiltFAR',         # Built floor area ratio
    'ResidFAR',         # Residential FAR
    'CommFAR',          # Commercial FAR
    
    # Coordinates for spatial analysis
    'Latitude',
    'Longitude',
    'XCoord',
    'YCoord',
    
    # Geometry (essential for GIS)
    'geometry'
]

# Filter the dataframe
nycod = nycod[material_analysis_columns]

# Preview the result
print(f"Kept {len(material_analysis_columns)} columns out of original dataset")
print(f"Dataset shape: {nycod.shape}")
print("\nColumns kept:")
for col in material_analysis_columns:
    print(f"  - {col}")

# Quick preview
nycod.head()

Kept 37 columns out of original dataset
Dataset shape: (42099, 37)

Columns kept:
  - Borough
  - CD
  - ZipCode
  - Address
  - BldgClass
  - LandUse
  - ZoneDist1
  - ZoneDist2
  - LotArea
  - BldgArea
  - LotFront
  - LotDepth
  - BldgFront
  - BldgDepth
  - NumBldgs
  - NumFloors
  - LtdHeight
  - BBL
  - ResArea
  - ComArea
  - OfficeArea
  - RetailArea
  - FactryArea
  - GarageArea
  - YearBuilt
  - YearAlter1
  - YearAlter2
  - UnitsRes
  - UnitsTotal
  - BuiltFAR
  - ResidFAR
  - CommFAR
  - Latitude
  - Longitude
  - XCoord
  - YCoord
  - geometry


Unnamed: 0,Borough,CD,ZipCode,Address,BldgClass,LandUse,ZoneDist1,ZoneDist2,LotArea,BldgArea,...,UnitsRes,UnitsTotal,BuiltFAR,ResidFAR,CommFAR,Latitude,Longitude,XCoord,YCoord,geometry
0,MN,101,10004,301 COMFORT ROAD,Y4,8,R3-2,C4-1,7600835,2542066,...,0,1,0.33,0.75,0.0,40.688774,-74.018704,979063,190216,"POLYGON ((980898.728 191409.779, 980881.798 19..."
1,MN,101,10004,10 BATTERY PARK,Q1,9,PARK,,945425,945425,...,0,4,1.0,0.0,0.0,40.702806,-74.015631,979916,195328,"MULTIPOLYGON (((979928.232 195976.551, 980006...."
2,MN,101,10004,1 SOUTH STREET,O2,5,M1-4,,123800,123800,...,0,1,1.0,0.0,2.0,40.700918,-74.014444,980245,194640,"POLYGON ((980198.105 194743.552, 980217.967 19..."
3,MN,101,10004,115 BROAD STREET,D5,3,C5-5,,54023,1044351,...,1320,1323,19.33,10.0,15.0,40.702776,-74.010722,981277,195317,"POLYGON ((981444.514 195254.036, 981143.712 19..."
4,MN,101,10004,125 BROAD STREET,RB,5,C5-5,,54023,1051990,...,0,26,19.47,10.0,15.0,40.702266,-74.010607,981309,195131,"POLYGON ((981480.323 195094.261, 981420.026 19..."


In [16]:
import numpy as np
import pandas as pd

# Create material percentage estimates based on building characteristics
def estimate_material_percentages(row):
    """Estimate material percentages based on building type, age, and use"""
    
    # Initialize material percentages
    concrete_pct = 0
    steel_pct = 0
    masonry_pct = 0
    wood_pct = 0
    
    year_built = row.get('YearBuilt', 1950)
    num_floors = row.get('NumFloors', 1)
    bldg_class = str(row.get('BldgClass', ''))
    
    # High-rise buildings (10+ floors) - mostly steel/concrete
    if num_floors >= 10:
        if year_built < 1960:
            steel_pct = 0.7
            concrete_pct = 0.25
            masonry_pct = 0.05
        else:
            concrete_pct = 0.6
            steel_pct = 0.35
            masonry_pct = 0.05
    
    # Mid-rise (4-9 floors) - mixed materials
    elif num_floors >= 4:
        if year_built < 1945:
            masonry_pct = 0.4
            steel_pct = 0.4
            concrete_pct = 0.2
        else:
            concrete_pct = 0.5
            steel_pct = 0.3
            masonry_pct = 0.2
    
    # Low-rise (1-3 floors) - varies by age and type
    else:
        if year_built < 1900:
            masonry_pct = 0.7
            wood_pct = 0.3
        elif year_built < 1945:
            masonry_pct = 0.5
            wood_pct = 0.3
            concrete_pct = 0.2
        else:
            concrete_pct = 0.4
            wood_pct = 0.4
            masonry_pct = 0.2
    
    # Adjust for building class
    if 'R' in bldg_class:  # Residential
        wood_pct += 0.1
        concrete_pct -= 0.05
        steel_pct -= 0.05
    elif 'C' in bldg_class or 'O' in bldg_class:  # Commercial/Office
        steel_pct += 0.1
        wood_pct -= 0.1
    
    # Normalize to 100%
    total = concrete_pct + steel_pct + masonry_pct + wood_pct
    if total > 0:
        return {
            'concrete_pct': concrete_pct / total * 100,
            'steel_pct': steel_pct / total * 100,
            'masonry_pct': masonry_pct / total * 100,
            'wood_pct': wood_pct / total * 100
        }
    else:
        return {'concrete_pct': 25, 'steel_pct': 25, 'masonry_pct': 25, 'wood_pct': 25}

# Apply the function to create material percentage columns
material_data = nycod.apply(estimate_material_percentages, axis=1, result_type='expand')
nycod = pd.concat([nycod, material_data], axis=1)

# Preview the results
print("Material percentage columns added:")
print(nycod[['YearBuilt', 'NumFloors', 'BldgClass', 'concrete_pct', 'steel_pct', 'masonry_pct', 'wood_pct']].head())

Material percentage columns added:
   YearBuilt  NumFloors BldgClass  concrete_pct  steel_pct  masonry_pct  \
0       1900        3.5        Y4          20.0        0.0         50.0   
1       1951        1.0        Q1          40.0        0.0         20.0   
2       1960        3.0        O2          40.0       10.0         20.0   
3       1969       32.0        D5          60.0       35.0          5.0   
4       1970       40.0        RB          55.0       30.0          5.0   

   wood_pct  
0      30.0  
1      40.0  
2      30.0  
3       0.0  
4      10.0  


# This is sample data set

In [17]:
# Create a small test sample (first 500 buildings for speed)
nycod_test = nycod.head(500).copy()

print(f"Creating test files with {len(nycod_test)} buildings instead of {len(nycod)}")

# Create a master test file with all data
nycod_test.to_file("test_manhattan_all_materials.shp")

# Create individual material test files with calculated material volumes
materials = ['concrete', 'steel', 'masonry', 'wood']

for material in materials:
    # Create subset with material-specific calculations
    material_data = nycod_test[['geometry', 'BldgArea', f'{material}_pct', 'Address', 'YearBuilt', 'NumFloors']].copy()
    
    # Calculate actual material volume (sq ft * percentage)
    material_data[f'{material}_volume'] = (material_data['BldgArea'] * material_data[f'{material}_pct'] / 100)
    
    # Rename for consistency
    material_data.rename(columns={
        f'{material}_pct': 'material_percentage',
        f'{material}_volume': 'material_volume'
    }, inplace=True)
    
    # Save individual test file
    material_data.to_file(f"test_manhattan_{material}.shp")
    
    print(f"Created TEST {material} file with {len(material_data)} buildings")
    print(f"  - Total {material} volume: {material_data['material_volume'].sum():,.0f} sq ft")

print("\nTest files created:")
print("- test_manhattan_all_materials.shp")
for material in materials:
    print(f"- test_manhattan_{material}.shp")

Creating test files with 500 buildings instead of 42099
Created TEST concrete file with 500 buildings
  - Total concrete volume: 49,303,997 sq ft
Created TEST steel file with 500 buildings
  - Total steel volume: 53,631,688 sq ft
Created TEST masonry file with 500 buildings
  - Total masonry volume: 10,496,893 sq ft
Created TEST wood file with 500 buildings
  - Total wood volume: 322,977 sq ft

Test files created:
- test_manhattan_all_materials.shp
- test_manhattan_concrete.shp
- test_manhattan_steel.shp
- test_manhattan_masonry.shp
- test_manhattan_wood.shp


  nycod_test.to_file("test_manhattan_all_materials.shp")
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_w

In [18]:
# Just peek at structure without loading everything
print(f"Dataset info:")
print(f"- Rows: {len(nycod):,}")
print(f"- Columns: {len(nycod.columns)}")
print(f"- Memory usage: {nycod.memory_usage(deep=True).sum() / 1024**2:.1f} MB")

Dataset info:
- Rows: 42,099
- Columns: 41
- Memory usage: 23.6 MB


# This is full data set

In [13]:
# Create a master file with all data
nycod.to_file("manhattan_all_materials.shp")

# Create individual material files with calculated material volumes
materials = ['concrete', 'steel', 'masonry', 'wood']

for material in materials:
    # Create subset with material-specific calculations
    material_data = nycod[['geometry', 'BldgArea', f'{material}_pct', 'Address', 'YearBuilt', 'NumFloors']].copy()
    
    # Calculate actual material volume (sq ft * percentage)
    material_data[f'{material}_volume'] = (material_data['BldgArea'] * material_data[f'{material}_pct'] / 100)
    
    # Rename for consistency
    material_data.rename(columns={
        f'{material}_pct': 'material_percentage',
        f'{material}_volume': 'material_volume'
    }, inplace=True)
    
    # Save individual file
    material_data.to_file(f"manhattan_{material}.shp")
    
    print(f"Created {material} file with {len(material_data)} buildings")
    print(f"  - Total {material} volume: {material_data['material_volume'].sum():,.0f} sq ft")

  nycod.to_file("manhattan_all_materials.shp")
  ogr_write(
  ogr_write(
  material_data.to_file(f"manhattan_{material}.shp")
  ogr_write(
  ogr_write(


Created concrete file with 42099 buildings
  - Total concrete volume: 677,477,882 sq ft


  material_data.to_file(f"manhattan_{material}.shp")
  ogr_write(
  ogr_write(


Created steel file with 42099 buildings
  - Total steel volume: 846,852,033 sq ft


  material_data.to_file(f"manhattan_{material}.shp")
  ogr_write(
  ogr_write(


Created masonry file with 42099 buildings
  - Total masonry volume: 287,437,446 sq ft


  material_data.to_file(f"manhattan_{material}.shp")
  ogr_write(
  ogr_write(


Created wood file with 42099 buildings
  - Total wood volume: 8,765,837 sq ft


## look at htmlweb.html for interactive website for Mapping systems final