# Part 1
we use dataset from [NYC Open Data](https://data.cityofnewyork.us/City-Government/Evictions/6z8x-wfk4/about_data)

In [2]:
# import necessary libraries
import pandas as pd
import plotly.express as px
import requests

In [3]:
# import NYC Housing Eviction dataset with pagination
url = "https://data.cityofnewyork.us/resource/6z8x-wfk4.json"

n_pages = []

all_records = []

# limit number of records
limit = 1000
# set starting point
offset = 0
page = 1

# pagination loop

while True:
    params = {"$limit": limit, "$offset": offset}
    response2 = requests.get(url, params=params)
    data = response2.json()

    # stop to last page
    if not data:
        break

    all_records.extend(data)
    offset += limit
    page += 1

# combine all pages

df_evic = pd.json_normalize(all_records)

print(f"Total records :{len(df_evic)}")
df_evic


Total records :123176


Unnamed: 0,court_index_number,docket_number,eviction_address,eviction_apt_num,executed_date,marshal_first_name,marshal_last_name,residential_commercial_ind,borough,eviction_zip,ejectment,eviction_possession,latitude,longitude,community_board,council_district,census_tract,bin,bbl,nta
0,304550/22,362872,2141 STARLING AVENUE,309,2023-07-20T00:00:00.000,Thomas,Bia,Residential,BRONX,10462,Not an Ejectment,Possession,40.836202,-73.853806,9,18,222,2041215,2039470001,Parkchester
1,313827/21,010352,875 BOYNTON AVENUE,7G,2024-02-27T00:00:00.000,David,Smith,Residential,BRONX,10473,Not an Ejectment,Possession,40.821223,-73.876510,9,17,28,2092721,2036230020,Soundview-Castle Hill-Clason Point-Harding Park
2,60461/18,8415,168-02 144 AVENUE,,2019-01-15T00:00:00.000,Bernard,Blake,Residential,QUEENS,11434,Not an Ejectment,Possession,40.669089,-73.771561,12,31,330,4271237,4125960069,Springfield Gardens North
3,9622/18,005763,3555 OXFORD AVENUE,3F,2018-08-02T00:00:00.000,Jeffrey,Rose,Residential,BRONX,10463,Not an Ejectment,Possession,40.886026,-73.908863,8,11,295,2084152,2057930467,Spuyten Duyvil-Kingsbridge
4,313777/22,109527,1035 GRAND CONCOURSE,6CS,2023-04-13T00:00:00.000,Justin,Grossman,Residential,BRONX,10452,Not an Ejectment,Possession,40.830567,-73.920669,4,8,195,2002874,2024710024,West Concourse
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123171,309225/21,001075,933-935 EAST 39TH STREET WHOLE PREMISES,,2023-09-11T00:00:00.000,Charles,Kemp,Residential,BROOKLYN,11210,Not an Ejectment,Possession,,,,,,,,
123172,K334763/23,112420,2101 BEDFORD AVE,1D,2024-07-08T00:00:00.000,Ileana,Rivera,Residential,BROOKLYN,11226,Not an Ejectment,Possession,40.653121,-73.956079,17,40,822,3116642,3050840010,Prospect Lefferts Gardens-Wingate
123173,B74095/16B,099955,1855 MONROE AVENUE,4F,2018-02-09T00:00:00.000,Darlene,Barone,Residential,BRONX,10457,Not an Ejectment,Possession,40.848084,-73.906164,5,15,23302,2007545,2028010018,Mount Hope
123174,K84890/18,092582,3615 CHURCH AVENUE,3F,2019-01-03T00:00:00.000,Ileana,Rivera,Residential,BROOKLYN,11203,Not an Ejectment,Possession,40.651115,-73.943557,17,41,816,3108774,3048730036,East Flatbush-Farragut


In [4]:
# if response.status_code == 200:
#     eviction_data = response.json()

# df = pd.DataFrame(eviction_data)
# df

# Part 2: Data Structure

In [None]:
# check data types

df_evic.dtypes


court_index_number            str
docket_number                 str
eviction_address              str
eviction_apt_num              str
executed_date                 str
marshal_first_name            str
marshal_last_name             str
residential_commercial_ind    str
borough                       str
eviction_zip                  str
ejectment                     str
eviction_possession           str
latitude                      str
longitude                     str
community_board               str
council_district              str
census_tract                  str
bin                           str
bbl                           str
nta                           str
dtype: object

we found that executed date is in str. format, so we need to change the format to date time format.

In [8]:
# change into datetime
df_evic["executed_date"] = pd.to_datetime(df_evic["executed_date"], errors="coerce")

In [11]:
# check results
df_evic.dtypes

court_index_number                       str
docket_number                            str
eviction_address                         str
eviction_apt_num                         str
executed_date                 datetime64[us]
marshal_first_name                       str
marshal_last_name                        str
residential_commercial_ind               str
borough                                  str
eviction_zip                             str
ejectment                                str
eviction_possession                      str
latitude                                 str
longitude                                str
community_board                          str
council_district                         str
census_tract                             str
bin                                      str
bbl                                      str
nta                                      str
dtype: object

In [9]:
# check missing values
df_evic.isnull().sum()

court_index_number                0
docket_number                     0
eviction_address                  0
eviction_apt_num              17997
executed_date                     0
marshal_first_name                0
marshal_last_name                 0
residential_commercial_ind        0
borough                           0
eviction_zip                      0
ejectment                         0
eviction_possession               0
latitude                      10861
longitude                     10861
community_board               10861
council_district              10861
census_tract                  10861
bin                           11262
bbl                           11262
nta                           10861
dtype: int64

In [14]:
# check residential vs commercial evictions
df_evic["residential_commercial_ind"].value_counts()

residential_commercial_ind
Residential    112206
Commercial      10970
Name: count, dtype: int64

# Part 3: Preparing the Visulaization

In [24]:
# calculate total evictions by borough by year
# add year column
df_evic["year"] = df_evic["executed_date"].dt.year

# new df with total evictions by borough and year
evictions_by_borough_year = (
    df_evic.groupby(["borough", "year"]).size().reset_index(name="total_evictions")
)

evictions_by_borough_year


Unnamed: 0,borough,year,total_evictions
0,BRONX,2017,7658
1,BRONX,2018,7140
2,BRONX,2019,6244
3,BRONX,2020,1088
4,BRONX,2021,29
5,BRONX,2022,1175
6,BRONX,2023,4320
7,BRONX,2024,4705
8,BRONX,2025,5885
9,BRONX,2026,601


In [15]:
# evictions by building type each year
evictions_by_building_year = (
    df_evic.groupby(["residential_commercial_ind", "year"])
    .size()
    .reset_index(name="total_evictions")
)
evictions_by_building_year

Unnamed: 0,residential_commercial_ind,year,total_evictions
0,Commercial,2017,1719
1,Commercial,2018,1842
2,Commercial,2019,1677
3,Commercial,2020,358
4,Commercial,2021,108
5,Commercial,2022,979
6,Commercial,2023,1379
7,Commercial,2024,1283
8,Commercial,2025,1510
9,Commercial,2026,115


In [None]:
# calculating total evictions by borough to date
eviction_by_borough = (
    df_evic.groupby("borough").size().reset_index(name="total_evictions")
)
eviction_by_borough.dtypes


# Part 4: Visualization

In [36]:
# sample visualiztion
fig = px.bar(
    eviction_by_borough,
    x="borough",
    y="total_evictions",
    title="Total Evictions by Borough",
    labels={"total_evictions": "Total_Evictions", "borough": "Borough"},
    color="borough",
    color_discrete_sequence=px.colors.qualitative.Dark24_r,
    category_orders={
        "borough": eviction_by_borough.sort_values("total_evictions", ascending=False)[
            "borough"
        ]
    },
)

fig.show()

# Time Series Analysis

## Eviction By Borough By Year

In [39]:
fig2 = px.line(
    evictions_by_borough_year,
    x="year",
    y="total_evictions",
    color="borough",
    title="Total Evictions by Borough Over Time",
    labels={"total_evictions": "Total Evictions", "year": "Year"},
    color_discrete_sequence=px.colors.qualitative.Dark24_r,
)

fig2.show()

## Eviction by Type by Year

In [46]:
fig3 = px.line(
    evictions_by_building_year,
    x="year",
    y="total_evictions",
    color="residential_commercial_ind",
    title="Total Evictions by Building Type Over Time",
    labels={"total_evictions": "Total Evictions", "year": "Year"},
    color_discrete_sequence=px.colors.qualitative.Dark24_r,
)

fig3.show()

# Part 5: Conclusion

TBD