In [28]:
import pandas as pd
import os

Aggregate dataset

In [29]:
# Read master data
df = pd.read_csv('../data/cleaned/nopv_master.csv')
display(df)

Unnamed: 0,boro,block,lot,ease,address_1,address_2,address_3,"city,_state,_zip",country,tax_class,...,fiscal_year,date_published,published_year,published_month,published_day,rc1_desc,rc2_desc,rc3_desc,rc4_desc,rc5_desc
0,1,11,14,,,855 AVENUE OF THE AMERIC,,NEW YORK NY 10001-4105,,2,...,2025,2025-05-14,2025,5,14,Other Change,Unknown,Unknown,Unknown,Unknown
1,1,15,1004,,,1 S BOULEVARD,,NYACK NY 10960-3604,,4,...,2025,2025-05-14,2025,5,14,Fully Exempt and now restored to taxable,Unknown,Unknown,Unknown,Unknown
2,1,18,1134,,,88 GREENWICH ST,APT 718,NEW YORK NY 10006-2229,,2,...,2025,2025-05-14,2025,5,14,Other Change,Unknown,Unknown,Unknown,Unknown
3,1,18,1160,,,88 GREENWICH ST,APT 815,NEW YORK NY 10006-2231,,2,...,2025,2025-05-14,2025,5,14,Other Change,Unknown,Unknown,Unknown,Unknown
4,1,26,1008,,,55 EXCHANGE PL,,NEW YORK NY 10005-3301,,4,...,2025,2025-05-14,2025,5,14,Other Change,Unknown,Unknown,Unknown,Unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28193,5,8047,188,,,230 JOHNSON AVE,,STATEN ISLAND NY 10307-1240,,1,...,2025,2025-05-14,2025,5,14,Other Change,Unknown,Unknown,Unknown,Unknown
28194,5,8047,1001,,,2201 NEPTUNE AVE,,BROOKLYN NY 11224-2375,,4,...,2025,2025-05-14,2025,5,14,Alteration,Economics,Unknown,Unknown,Unknown
28195,5,8047,1002,,,2201 NEPTUNE AVE,,BROOKLYN NY 11224-2375,,4,...,2025,2025-05-14,2025,5,14,Alteration,Economics,Unknown,Unknown,Unknown
28196,5,8047,1003,,,2201 NEPTUNE AVE,,BROOKLYN NY 11224-2375,,2,...,2025,2025-05-14,2025,5,14,Alteration,Economics,Unknown,Unknown,Unknown


In [50]:
# Define Property tax rates for current tax year

tax_rates = {'1': 20.085,
             '2': 12.500,
             '3': 11.181, 
             '4': 10.762}

# Set today's date
today = pd.Timestamp.today().normalize()


# A complete table of Reason and corresponding code
## Note that the reason descriptions are worded 
# -- slightly different in excel package and master csv
reason = {'Code': ['A', 'AP', 'B', 'D', 'E', 'E0', 
                   'E1', 'E2', 'E3', 'E4', 'E5',
                   'M', 'N', 'T', 'NP', 'P', 'S', 'X'], 
          'rc1_desc': ['Alteration', 'Apportionment', 'Building in Progress Last Year', 
                       'Demolition', 'Fully Exempt and is now restored to taxable',
                       'Sales', 'Economics', 'Administrative Review (Assessor initiated)',
                       'Sales (Taxpayer initiated Change)', 'Economics (Taxpayer initiated Change)',
                       'Administrative Review (Taxpayer initiated)', 'MV only Changes',
                       'New Building', 'Transfer to or from REUC', 'New Building in Progress',
                       'Physical Change', 'Gain or Loss to Street', 
                       'Change other than fully exempt restored'
                       ]}
reason_lookup = pd.DataFrame(reason)
display(reason_lookup)


Unnamed: 0,Code,rc1_desc
0,A,Alteration
1,AP,Apportionment
2,B,Building in Progress Last Year
3,D,Demolition
4,E,Fully Exempt and is now restored to taxable st...
5,E0,Sales
6,E1,Economics
7,E2,Administrative Review (Assessor initiated)
8,E3,Sales (Taxpayer initiated Change)
9,E4,Economics (Taxpayer initiated Change)


In [51]:
# Store results for all classes
all_class_tables = []

for class_num in ['1', '2', '3', '4']:

    # Filter by class and publication date

    df_class = df[
        (df['tax_class'] == class_num) &
        (pd.to_datetime(df['date_published']) <= today)
    ]

    # Prepare aggregation
    df_agg = df_class[['rc1_desc', 'taxable_value_final_roll', 'taxable_value']]
    df_count = df_agg.groupby('rc1_desc').size().reset_index(name='# of Parcels')
    df_change = df_agg.groupby('rc1_desc').sum().reset_index()
    df_change['Change in Taxable AV'] = (
        df_change['taxable_value_final_roll'] - df_change['taxable_value']
    )
    df_change = df_change[['rc1_desc', 'Change in Taxable AV']]

    # Merge and complete reason list
    df_table = pd.merge(df_count, df_change, on='rc1_desc', how='outer')
    df_table = reason_lookup.merge(df_table, on='rc1_desc', how='left')  # ensures all reasons shown

    # Clean missing values
    df_table['# of Parcels'] = df_table['# of Parcels'].fillna(0).astype(int)
    df_table['Change in Taxable AV'] = df_table['Change in Taxable AV'].fillna(0).astype(int)

    # Compute total tax
    df_table['Total Tax'] = df_table['Change in Taxable AV'] * tax_rates[class_num]
    df_table['Class'] = class_num

    # Store
    all_class_tables.append(df_table)

# Combine across all classes

final_df = pd.concat(all_class_tables, ignore_index=True)

display(final_df)
 

Unnamed: 0,Code,rc1_desc,# of Parcels,Change in Taxable AV,Total Tax,Class
0,A,Alteration,16,-69052,-1.386909e+06,1
1,AP,Apportionment,4,29520,5.929092e+05,1
2,B,Building in Progress Last Year,16,258378,5.189522e+06,1
3,D,Demolition,128,-1891294,-3.798664e+07,1
4,E,Fully Exempt and is now restored to taxable st...,0,0,0.000000e+00,1
...,...,...,...,...,...,...
67,T,Transfer to or from REUC,0,0,0.000000e+00,4
68,NP,New Building in Progress,24,-2196922,-2.364327e+07,4
69,P,Physical Change,0,0,0.000000e+00,4
70,S,Gain or Loss to Street,0,0,0.000000e+00,4
