## Are rents in NYC going to drop anytime soon?

### Import libraries and files

In [14]:
# Import libraries
import pandas as pd
from prophet import Prophet
from IPython.display import clear_output

In [15]:
# Read rental data from Street Easy
df = pd.read_csv('https://cdn-charts.streeteasy.com/rentals/OneBd/medianAskingRent_OneBd.zip',compression='zip')
df.head(10)

Unnamed: 0,areaName,Borough,areaType,2010-01,2010-02,2010-03,2010-04,2010-05,2010-06,2010-07,...,2023-06,2023-07,2023-08,2023-09,2023-10,2023-11,2023-12,2024-01,2024-02,2024-03
0,All Downtown,Manhattan,submarket,2995.0,2950.0,2900.0,2975.0,2995.0,3000.0,3000.0,...,4670.0,4500.0,4500.0,4550.0,4500.0,4500.0,4600.0,4730.0,4750.0,4705.0
1,All Midtown,Manhattan,submarket,2800.0,2800.0,2850.0,2895.0,2900.0,2970.0,3000.0,...,4490.0,4400.0,4391.0,4329.0,4210.0,4173.0,4210.0,4300.0,4308.0,4395.0
2,All Upper East Side,Manhattan,submarket,2350.0,2300.0,2350.0,2470.0,2500.0,2500.0,2500.0,...,3650.0,3500.0,3500.0,3500.0,3550.0,3600.0,3585.0,3600.0,3795.0,3695.0
3,All Upper Manhattan,Manhattan,submarket,1500.0,1516.0,1545.0,1516.0,1550.0,1575.0,1550.0,...,2495.0,2450.0,2475.0,2450.0,2400.0,2400.0,2400.0,2450.0,2495.0,2500.0
4,All Upper West Side,Manhattan,submarket,2600.0,2565.0,2495.0,2500.0,2500.0,2600.0,2600.0,...,4000.0,4100.0,4100.0,4124.0,4095.0,4000.0,3995.0,4000.0,4198.0,4200.0
5,Astoria,Queens,neighborhood,1395.0,1450.0,1450.0,1395.0,1400.0,1550.0,1475.0,...,2450.0,2450.0,2495.0,2495.0,2400.0,2400.0,2400.0,2463.0,2500.0,2500.0
6,Auburndale,Queens,neighborhood,,,,,,,,...,,,,,,,,,,
7,Bath Beach,Brooklyn,neighborhood,,,,,,,,...,,1750.0,,,1700.0,,,1850.0,1800.0,1995.0
8,Battery Park City,Manhattan,neighborhood,2875.0,2800.0,2900.0,2873.0,2800.0,2900.0,2950.0,...,4895.0,4895.0,4868.0,4730.0,4515.0,4425.0,4325.0,4320.0,4355.0,4545.0
9,Bay Ridge,Brooklyn,neighborhood,1350.0,1350.0,1350.0,1325.0,1300.0,1300.0,1300.0,...,1900.0,1950.0,1999.0,2000.0,1900.0,1900.0,1913.0,1950.0,2039.0,1985.0


### User Input - Choose Borough and Area

In [12]:
# Create a list of boroughs
boroughs = df['Borough'].dropna().unique().tolist()
# Create an empty list to hold the selected areas and selected boroughs
selected_areas = []
selected_boroughs = []

# loop till the user chooses to not enter a new area
while True:
    # Try-except block
    try:
        # Print the selected areas with their boroughs
        if len(selected_areas) > 0:
            print('You have selected the following areas:')
            print(*selected_areas,sep=', ')
            print()
            
        # Print the boroughs in the list
        for i,borough in enumerate(boroughs):
            print(f'{i+1}. {borough}')
            
        # Ask the user to choose from a borough
        borough_input = int(input('\nPlease select a Borough from the list (-1 to exit): '))
        
        # Check if the borough is valid
        if len(boroughs) >= borough_input > 0:
            
            # Create a list of boroughs based on the borough selected by the user
            areas = df.loc[df['Borough'] == boroughs[borough_input-1], 'areaName'].tolist()

            # Print the areas in the list
            print()
            for i,area in enumerate(areas):
                print(f'{i+1}. {area}')

            # Ask the user to choose from an area
            print()
            area_input = int(input('\nPlease select an Area from the list: '))

            # Check if the area is valid
            if len(areas) >= area_input > 0:
                # Add the selected area and borough to a list
                selected_boroughs.append(boroughs[borough_input-1])
                selected_areas.append(areas[area_input-1])
                # Clear output area 
                clear_output()
            else:
                # Clear output area 
                clear_output()
                print('Invalid area choosen. Please try again!\n')
            
        # Break in case if the user enters -1  
        elif borough_input == -1:
            break
        
        # In case an invalid borough is chosen
        else:
            # Clear output area 
            clear_output()
            print('Invalid borough choosen. Please try again!\n')
        
    except:
        # Print an exception in case if the user provides an invalid output
        clear_output()
        print('Invalid input!\n')

You have selected the following areas:
Midtown West, Upper East Side, Queens, Bronx

1. Manhattan
2. Queens
3. Brooklyn
4. Bronx
5. Staten Island

Please select a Borough from the list (-1 to exit): -1


In [13]:
# Print the final list of areas and boroughs
for i in range(len(selected_boroughs)):
    print(f'{selected_boroughs[i]}-{selected_areas[i]}')

Manhattan-Midtown West
Manhattan-Upper East Side
Queens-Queens
Bronx-Bronx


### Filter and clean data

In [None]:
# Create a new dataframe to data of the areas selected
df_nyc = pd.DataFrame()
# Add the data to the dataframe
for i in range(len(selected_boroughs)):
    df_nyc = pd.concat([df_nyc,
                             df.loc[(df['Borough'] == selected_boroughs[i]) & 
                                    (df['areaName'] == selected_areas[i])]])
# Reset index and display dataframe
df_nyc

In [None]:
# Drop unwanted columns
df_nyc.drop(['Borough', 'areaType'], axis=1, inplace=True)
df_nyc

In [None]:
# Set index to areaName
df_nyc.set_index('areaName', inplace=True)
df_nyc

In [None]:
# Transpose the dataframe to move the dates to the index and rename the index to 'Date'
df_nyc = df_nyc.transpose().rename_axis('', axis=1)
df_nyc.tail()

In [None]:
# Check the datatype of the index
df_nyc.index.dtype

In [None]:
# Change the datatype of the index to datetime
df_nyc.index = pd.to_datetime(df_nyc.index)
df_nyc.index.dtype

In [None]:
# Check null values
df_nyc.isnull().sum()

In [None]:
# Group by year and quarter
df_year_quarter = df_nyc\
                    .groupby([df_nyc.index.year, df_nyc.index.quarter])\
                    .mean()
df_year_quarter.tail(10)

### Plot Data to visualize trends

In [None]:
# Plot the data for columns with 'All' in them
df_year_quarter.plot(figsize=(12,10))

In [None]:
# Plot the data for the last one year
df_nyc.loc['2023-May':'2024'].plot(figsize=(12,10))

### Prepare the dataframes for Prophet

In [None]:
df_nyc.reset_index(inplace=True)
df_nyc.head()

In [None]:
df_nyc.shape

In [None]:
# Create a list of dataframes for Prophet
list_df = []

for i in range(df_nyc.shape[1]-1):
    temp_df = df_nyc.iloc[:,[0,i+1]].copy()
    temp_df.columns = ['ds', 'y']
    list_df.append(temp_df)

# Check any one of the dataframes in the list
list_df[0].head()

### Use Prophet to predict values

In [None]:
# Create a list to hold the Prophet models
m = [Prophet() for _ in range(len(list_df))]

In [None]:
# Fit the Prophet model
for i in range(len(list_df)):
    m[i].fit(list_df[i])

In [None]:
# Ask user to enter duration of future prediction in months
while True:
    
    try:
        duration = int(input('How many months into the future do you want forecast: '))
        
        if duration <= 0:
            print('Duration should be positive! Please try again')
        else:
            break
    except:
        print('Invalid entry! Please try again')

In [None]:
# Forecast for the duration input of monthly trends
future = [m[i].make_future_dataframe(periods=duration, freq='M') for i in range(len(list_df))]
    
# Check any one of the dataframes in the list
future[i].tail()

In [None]:
# Make predictions using the dataframe
forecast = [m[i].predict(future[i]) for i in range(len(list_df))]

forecast[i].tail()

In [None]:
# Plot the Prophet predictions
for i in range(len(list_df)):
    print(f'{selected_boroughs[i]}-{selected_areas[i]}')
    display(m[i].plot(forecast[i]))

In [None]:
# Plot the components
for i in range(len(list_df)):
    print(f'{selected_boroughs[i]}-{selected_areas[i]}')
    display(m[i].plot_components(forecast[i]))

In [None]:
# Set the index in the forecast DataFrame to the ds datetime column
for i in range(len(list_df)):
    forecast[i] = forecast[i].set_index('ds')
# Check to see that the index has been set properly
forecast[i].tail()

In [None]:
# Plot predictions of our forecast for the 12 month period
for i in range(len(list_df)):
    forecast[i][['yhat', 'yhat_lower', 'yhat_upper']].iloc[(duration*-1):,:].plot(title=f'{selected_boroughs[i]}-{selected_areas[i]}')

### Additional Analysis

In [None]:
# Check if there are any correlations in the rents for the areas selected
df_nyc.set_index('index').corr()

In [None]:
# Calculate percentage change in rents for the areas choosen based on yhat
# Create an empty list
df_change = []
# Create a for loop to calculate the percentages
for i in range(len(list_df)):
    # Splice the date to start from March 2024 till the end of the dataframe
    df_temp = forecast[i].loc['2024-03':,'yhat']
    # Calculate the percentage and add it to the list
    df_change.append((df_temp.iloc[-1]-df_temp.iloc[0])/df_temp.iloc[0]*100)

In [None]:
# Create a new dataframe with Borough, Area and % Change expected
df_percentage = pd.DataFrame(
    {'Borough': selected_boroughs,
     'Area': selected_areas,
     '% Change Expected': df_change
    })
df_percentage