In [1]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd() / 'src')) 
from nyc_sales.clean import DataCleaner as dc  
from nyc_sales.extract import DataExtractor as de  
from src.nyc_sales.ingest import DataIngester as di 
from src.nyc_sales.visualize import Visualizer as v   
from src.nyc_sales.metrics import MetricsCalculator as mc

In [5]:
# Step 1: Extract the data from raw Excel files and normalize column names
raw_sales = de.extract(src_dir='data/r', trgt_dir='data/c')

1) Saved 2024_staten_island.csv
2) Saved 2019_manhattan.csv
3) Saved 2018_statenisland.csv
4) Saved 2024_bronx.csv
5) Saved 2016_bronx.csv
6) Saved 2015_manhattan.csv
7) Saved 2016_manhattan.csv
8) Saved 2019_statenisland.csv
9) Saved 2015_brooklyn.csv
10) Saved 2020_queens.csv
11) Saved 2018_manhattan.csv
12) Saved 2021_bronx.csv
13) Saved 2023_queens.csv
14) Saved 2022_brooklyn.csv
15) Saved rollingsales_statenisland.csv
16) Saved 2015_statenisland.csv
17) Saved 2017_bronx.csv
18) Saved 2021_staten_island.csv
19) Saved 2016_queens.csv
20) Saved 2018_queens.csv
21) Saved 2019_bronx.csv
22) Saved rollingsales_brooklyn.csv
23) Saved 2018_brooklyn.csv
24) Saved 2023_bronx.csv
25) Saved 2021_brooklyn.csv
26) Saved 2022_staten_island.csv
27) Saved 2021_queens.csv
28) Saved 2024_brooklyn.csv
29) Saved 2016_statenisland.csv
30) Saved 2020_brooklyn.csv
31) Saved 2020_bronx.csv
32) Saved 2020_manhattan.csv
33) Saved 2019_brooklyn.csv
34) Saved 2023_staten_island.csv
35) Saved 2020_staten_islan

In [2]:
# Step 2: Load the extracted CSVs into a single DataFrame - Returns concatenated DataFrame from CSVs
clean_sales = de.load(src_dir='data/c') 

In [3]:
# Step 3: Clean/transform the data for downstream use and analysis
intermediate_sales = dc.clean(src_dir='data/c', trgt_dir='data/i', file_name='nyc_sales_2015_2025.csv')

In [4]:
# Step 4: Ingest the cleaned and aggregated summary data into 'year' partitioned files
summary_sales = di.ingest('data/i', 'data/p', 'nyc_sales_summary.csv')

In [5]:
# Step 5: Compute the custom matrix (affordability + market breadth at borough/year level)
matrix = mc.compute('data/p', 'data/p', 'nyc_sales_custom_matrix.csv')

In [6]:
# Q1: How have neighborhood prices evolved across boroughs between 2015 and 2025?
borough_trajectories_fig = v.create_borough_trajectories(df=intermediate_sales)
v.savefig(borough_trajectories_fig, "borough_trajectories.png")

# Q2: Which boroughs experienced the steepest declines in entry-level affordability?
affordability_fig = v.create_affordability_index_plot(df=matrix)
v.savefig(affordability_fig, "borough_affordability_index.png")

# Q3: Was the post-COVID rebound broad-based (high Market Breadth) or concentrated in select neighborhoods?
market_breadth_fig = v.create_market_breadth_plot(df=matrix)
v.savefig(market_breadth_fig, "market_breadth.png")

# Q4: Where does 2025 YTD stand relative to pre-COVID and prior-cycle peaks?
snapshot_fig = v.create_2025_snapshot(df=intermediate_sales)
for idx, fig in enumerate(snapshot_fig):
    v.savefig(fig, f"snapshot_2025_vs_benchmarks_{idx}.png")