# Download Microsoft Buildings (US release)

In [None]:
import arcpy
from arcpy.sa import *
import pandas as pd
import numpy as np
import os
import urllib.request
import zipfile

from arcgis.features import GeoAccessor, GeoSeriesAccessor

data_path = "your_path_here"
scratch_gdb = "your_scratch_path_here"

arcpy.env.workspace = data_path
arcpy.env.overwriteOutput = True
arcpy.env.outputCoordinateSystem = None
arcpy.env.parallelProcessingFactor = "100%"

In [None]:
states_dict = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "DistrictofColumbia": "DC",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "NewHampshire": "NH",
    "NewJersey": "NJ",
    "NewMexico": "NM",
    "NewYork": "NY",
    "NorthCarolina": "NC",
    "NorthDakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "RhodeIsland": "RI",
    "SouthCarolina": "SC",
    "SouthDakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "WestVirginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY"
}

## Download all Microsoft Building Footprints

In [None]:
# Base URL for the downloads
base_url = "https://minedbuildings.z5.web.core.windows.net/legacy/usbuildings-v2/" 

# List of states 
states = list(states_dict.keys())

# Location to save the files
location = os.path.join(data_path, "microsoft_buildings/microsoft_buildings_zipped")
os.makedirs(location, exist_ok=True)

In [None]:
# Loop through the states and download the files
for i in states:
    print(i)
    
    # Construct the URL for the download
    url = f"{base_url}{i}.geojson.zip"
    
    # Construct the file path to save the downloaded file
    file_path = os.path.join(location, f"{i}.zip")
    
    try:
        # Download the file
        urllib.request.urlretrieve(url, file_path)
        print(f"Downloaded {i}")
    except Exception as e:
        # If there's an error, print it and continue
        print(f"Failed to download {url}: {e}")

## Unzip the buildings

In [None]:
unzipped_folder = os.path.join(data_path, "microsoft_buildings/microsoft_buildings_unzipped")  
os.makedirs(unzipped_folder, exist_ok=True)

# Loop through each ZIP file in the folder
for zip_filename in os.listdir(location):
    if zip_filename.endswith(".zip"):
        zip_path = os.path.join(location, zip_filename)
        
        # Create a new folder to extract to (named after the zip file without the extension)
        extract_folder = os.path.join(unzipped_folder, os.path.splitext(zip_filename)[0])
        os.makedirs(extract_folder, exist_ok=True)
        
        try:
            # Extract the ZIP file to the new folder
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(extract_folder)
            print(f"Successfully extracted: {zip_filename}")
        except Exception as e:
            print(f"Failed to extract {zip_filename}: {e}")

## Process and add to GDB

In [None]:
# Folder containing the geojson files
input_folder = os.path.join(data_path, "microsoft_buildings/microsoft_buildings_unzipped")

# Geodatabase to store the exported feature classes
output_gdb = os.path.join(data_path, "microsoft_buildings/microsoft_buildings.gdb") 
arcpy.env.workspace = output_gdb


# Use arcpy.da.Walk() to iterate through the subfolders and locate all geojson files. Just remove filename filter if needed - helpful if you stop and restart
for dirpath, dirnames, filenames in os.walk(input_folder):
    for filename in filenames:
        if filename.endswith(".geojson"): 
            # Full path to the GeoPackage file
            geojson_path = os.path.join(dirpath, filename)

            state_abbreviation = states_dict[os.path.splitext(filename)[0]]
            print(f"Working on {state_abbreviation}")
            
            output_json = os.path.join(scratch_gdb, "mbd_unproj")
            arcpy.conversion.JSONToFeatures(geojson_path, output_json, "POLYGON")

            print(f"- Converted {state_abbreviation} to feature class")
            
            output_fc = os.path.join(output_gdb, state_abbreviation+"_mbd")
            arcpy.management.Project(output_json, output_fc, arcpy.SpatialReference("USA Contiguous Albers Equal Area Conic USGS"))
            arcpy.management.RepairGeometry(output_fc)
            print(f"- Projected and repaired {filename} to {output_fc}")