In [1]:
!pip install pandas requests



Collecting pandas
  Downloading pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl (12.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.6/12.6 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading tzdata-2025.1-py2.py3-none-any.whl (346 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m346.8/346.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hInstalling collected packages: tzdata, pandas
Successfully installed pandas-2.2.3 tzdata-2025.1
[0m

In [2]:
import pandas as pd
import requests
import sqlite3

In [3]:
# Load the CSV file
df = pd.read_csv("BldgIDs.csv")

# Rename columns if necessary (Ensure correct BBL column name)
df.rename(columns={"BLL_ID": "BBL_ID"}, inplace=True)

# Filter only NYC properties
df_nyc = df[df["State"] == "NY"].copy()

# Drop rows with missing BBL_ID
df_nyc = df_nyc.dropna(subset=["BBL_ID"])

# Display the first few rows
df_nyc.head()


Unnamed: 0,Property ID,APTS,Address,State,Type,BBL_ID
15,BKLN-SUNGARDEN,17.0,"655 41st St, Brooklyn, NY 11232",NY,Coop,3091800000.0
16,NYC-2CHARLTON,185.0,"2 Charlton St, New York, NY 10014",NY,Coop,1005060000.0
17,NYC-W83ST,69.0,"46 W 83rd St, New York, NY 10024",NY,Coop,1011960000.0
18,BKLN-152FREEMAN,8.0,"152 Freeman St, Brooklyn, NY 11222",NY,Condo,3025120000.0
19,NYC-W93ST,90.0,"123 W 93rd St, New York, NY 10025",NY,Coop,1012240000.0


In [4]:
# NYC PLUTO API Endpoint
API_URL = "https://data.cityofnewyork.us/resource/64uk-42ks.json"

# Function to fetch building data for a given BBL ID
def get_building_details(bbl_id):
    params = {
        "$where": f"bbl='{bbl_id}'",
        "$limit": 1  # Limit to one record per BBL ID
    }

    response = requests.get(API_URL, params=params)

    if response.status_code == 200 and response.json():
        data = response.json()[0]  # Extract first result
        return {
            "BBL_ID": bbl_id,
            "NumFloors": data.get("numfloors"),
            "YearBuilt": data.get("yearbuilt"),
            "LandUse": data.get("landuse"),
            "ResidentialUnits": data.get("unitsres"),
            "TotalUnits": data.get("unitstotal"),
            "LotArea": data.get("lotarea"),
            "BuildingArea": data.get("bldgarea")
        }
    else:
        return {
            "BBL_ID": bbl_id,
            "NumFloors": None,
            "YearBuilt": None,
            "LandUse": None,
            "ResidentialUnits": None,
            "TotalUnits": None,
            "LotArea": None,
            "BuildingArea": None
        }


In [5]:
# Fetch data for all NYC BBL IDs
building_data = []

for bbl in df_nyc["BBL_ID"]:
    details = get_building_details(str(int(bbl)))  # Convert to string (remove decimals if any)
    building_data.append(details)

# Convert to DataFrame
df_building_info = pd.DataFrame(building_data)

# Save results to CSV
df_building_info.to_csv("NYC_Building_Details.csv", index=False)

# Display the collected data
from IPython.display import display
display(df_building_info)


Unnamed: 0,BBL_ID,BuildingArea,LandUse,LotArea,NumFloors,ResidentialUnits,TotalUnits,YearBuilt
0,3091800050,,,,,,,
1,1005060027,172836.0,3.0,16342.0,16.0,175.0,181.0,1966.0
2,1011960052,56949.0,3.0,10217.0,8.0,66.0,66.0,1911.0
3,3025120038,5625.0,2.0,2500.0,4.0,5.0,5.0,2017.0
4,1012240023,107243.0,3.0,13900.0,10.0,90.0,90.0,1926.0
5,3007580048,13096.0,2.0,4508.0,5.0,16.0,16.0,1930.0
6,3009220017,16896.0,2.0,6010.0,4.0,24.0,24.0,1928.0
7,3007390030,13752.0,2.0,5074.0,4.0,16.0,16.0,1913.0
8,3009170058,27614.0,2.0,10017.0,4.0,40.0,40.0,1927.0
9,3009170048,33528.0,2.0,12521.0,4.0,40.0,40.0,1927.0


In [6]:
import sqlite3

# Connect to SQLite database
conn = sqlite3.connect("daisychain_buildings.db")

# Store data in the database
df_building_info.to_sql("nyc_building_info", conn, if_exists="replace", index=False)

# Commit and close the connection
conn.commit()
conn.close()

print("✅ NYC building data successfully stored in the database!")


✅ NYC building data successfully stored in the database!


In [7]:
# Connect to SQLite database
conn = sqlite3.connect("daisychain_buildings.db")

# Query example: Show buildings with 10+ floors
query = """
    SELECT * FROM nyc_building_info WHERE NumFloors >= 10
"""

df_query = pd.read_sql(query, conn)
conn.close()

# Display results
from IPython.display import display
display(df_query)



Unnamed: 0,BBL_ID,BuildingArea,LandUse,LotArea,NumFloors,ResidentialUnits,TotalUnits,YearBuilt
0,1005060027,172836,3,16342,16.0,175,181,1966
1,1011960052,56949,3,10217,8.0,66,66,1911
2,3025120038,5625,2,2500,4.0,5,5,2017
3,1012240023,107243,3,13900,10.0,90,90,1926
4,3007580048,13096,2,4508,5.0,16,16,1930
5,3009220017,16896,2,6010,4.0,24,24,1928
6,3007390030,13752,2,5074,4.0,16,16,1913
7,3009170058,27614,2,10017,4.0,40,40,1927
8,3009170048,33528,2,12521,4.0,40,40,1927
9,3052680089,18240,2,5103,4.0,16,16,1931
