In [1]:
import os
import pandas as pd

In [2]:
PROJECT_ROOT = os.getcwd()
PROJECT_ROOT
DATA_CLEAN_DIR = os.path.join(PROJECT_ROOT, "data_clean")
DATA_FINAL_DIR = os.path.join(PROJECT_ROOT, "data_final")

print("PROJECT_ROOT:", PROJECT_ROOT)
print("DATA_CLEAN_DIR:", DATA_CLEAN_DIR)
print("DATA_FINAL_DIR:", DATA_FINAL_DIR)

clean_csv_path = os.path.join(DATA_CLEAN_DIR, "evictions_clean.csv")
print("Loading from:", clean_csv_path)

evictions_clean = pd.read_csv(clean_csv_path, parse_dates=["executed_date"])
evictions_clean.shape

PROJECT_ROOT: c:\Users\Christopher\Documents\Python Projects\New_York_City_Eviction
DATA_CLEAN_DIR: c:\Users\Christopher\Documents\Python Projects\New_York_City_Eviction\data_clean
DATA_FINAL_DIR: c:\Users\Christopher\Documents\Python Projects\New_York_City_Eviction\data_final
Loading from: c:\Users\Christopher\Documents\Python Projects\New_York_City_Eviction\data_clean\evictions_clean.csv


(120084, 26)

In [3]:
evictions_by_month = (
    evictions_clean
    .groupby(["year", "month", "year_month"], as_index=False)
    .agg(total_evictions=("court_index_number", "count"))
    .sort_values(["year", "month"])
)

evictions_by_month.head(30)

Unnamed: 0,year,month,year_month,total_evictions
0,2017,1,2017-01,2237
1,2017,2,2017-02,1695
2,2017,3,2017-03,2040
3,2017,4,2017-04,1860
4,2017,5,2017-05,2080
5,2017,6,2017-06,1993
6,2017,7,2017-07,1696
7,2017,8,2017-08,2034
8,2017,9,2017-09,1776
9,2017,10,2017-10,2019


In [4]:
evictions_by_month["year"].min(), evictions_by_month["year"].max(), evictions_by_month.shape

(2017, 2025, (101, 4))

In [5]:
by_month_path = os.path.join(DATA_FINAL_DIR, "evictions_by_month.csv")
evictions_by_month.to_csv(by_month_path, index=False)

by_month_path

'c:\\Users\\Christopher\\Documents\\Python Projects\\New_York_City_Eviction\\data_final\\evictions_by_month.csv'

In [6]:
evictions_by_borough = (
    evictions_clean
    .groupby(["year", "borough"], as_index=False)
    .agg(
        total_evictions=("court_index_number", "count")
    )
    .sort_values(["year", "borough"])
)

evictions_by_borough.head()

Unnamed: 0,year,borough,total_evictions
0,2017,BRONX,7658
1,2017,BROOKLYN,6355
2,2017,MANHATTAN,3450
3,2017,QUEENS,4325
4,2017,STATEN ISLAND,734


In [7]:
evictions_by_borough["year"].min(), evictions_by_borough["year"].max(), evictions_by_borough.shape

(2017, 2025, (45, 3))

In [8]:
by_borough_path = os.path.join(DATA_FINAL_DIR, "evictions_by_borough.csv")
evictions_by_borough.to_csv(by_borough_path, index=False)

by_borough_path


'c:\\Users\\Christopher\\Documents\\Python Projects\\New_York_City_Eviction\\data_final\\evictions_by_borough.csv'

In [9]:
evictions_points = evictions_clean[evictions_clean["has_geo"]].copy()
evictions_points.shape

(109465, 26)

In [10]:
point_columns = [
    "court_index_number",
    "docket_number",
    "executed_date",
    "year",
    "month",
    "year_month",
    "borough",
    "eviction_address",
    "eviction_apt_num",
    "eviction_zip",
    "nta",
    "community_board",
    "council_district",
    "census_tract",
    "bin",
    "bbl",
    "residential_commercial_ind",
    "ejectment_standardized",
    "is_ejectment",
    "eviction_possession",
    "marshal_first_name",
    "marshal_last_name",
]

# We will still keep latitude/longitude separately for geometry


In [11]:
import json

features = []

for _, row in evictions_points.iterrows():
    geometry = {
        "type": "Point",
        "coordinates": [row["longitude"], row["latitude"]],
    }

    propertries = {col: row[col] for col in point_columns}

    feature = {
    "type": "Feature",
    "geometry": geometry,
    "propertries": propertries,
    }

    features.append(feature)

geojson_obj = {
    "type": "FeatureCollection",
    "features": features,
}

In [12]:
geojson_path = os.path.join(DATA_FINAL_DIR, "evictions_points.geojson")

with open(geojson_path, "w", encoding="utf-8") as f:
    json.dump(geojson_obj, f, default=str)

geojson_path


'c:\\Users\\Christopher\\Documents\\Python Projects\\New_York_City_Eviction\\data_final\\evictions_points.geojson'

In [13]:
import json

# Load the GeoJSON file you just saved
geojson_path = os.path.join(DATA_FINAL_DIR, "evictions_points.geojson")

with open(geojson_path, "r", encoding="utf-8") as f:
    gj = json.load(f)

# Inspect the structure
print("Top-level keys:", gj.keys())
print("Number of features:", len(gj["features"]))

# Show the first feature (index 0)
first_feature = gj["features"][0]
first_feature


Top-level keys: dict_keys(['type', 'features'])
Number of features: 109465


{'type': 'Feature',
 'geometry': {'type': 'Point', 'coordinates': [-73.91015, 40.859892]},
 'propertries': {'court_index_number': 'B051541/15',
  'docket_number': 360641,
  'executed_date': '2017-01-05 00:00:00',
  'year': 2017,
  'month': 1,
  'year_month': '2017-01',
  'borough': 'BRONX',
  'eviction_address': '120 WEST 183 STREET',
  'eviction_apt_num': '15',
  'eviction_zip': 10453,
  'nta': 'Kingsbridge Heights',
  'community_board': 7.0,
  'council_district': 14.0,
  'census_tract': 255.0,
  'bin': 2014902.0,
  'bbl': 2032230034.0,
  'residential_commercial_ind': 'Residential',
  'ejectment_standardized': 'Not an Ejectment',
  'is_ejectment': False,
  'eviction_possession': 'Possession',
  'marshal_first_name': 'Richard',
  'marshal_last_name': 'Mccoy'}}