# Analysis of Historical NYC Sales Data 2003-2018

In [4]:
# Import required analysis libraries
import numpy as np
import pandas as pd
import csv

In [41]:
# Import required visualization libraries
%matplotlib inline
import hvplot.pandas
import plotly.express as px
from panel.interact import interact

### Importing Borough sales data, pre-processed using SQL

In [103]:
manhattan_path = 'four_boroughs.csv'
manhattan_df = pd.read_csv(manhattan_path)
manhattan_df.head()

Unnamed: 0,NEIGHBORHOOD,BUILDING CLASS CATEGORY,BLOCK,LOT,ZIPCODE,SALE PRICE,SALE DATE
0,BATHGATE,01 ONE FAMILY HOMES,2907,23,10457.0,0.0,2003-03-24 00:00:00
1,BATHGATE,01 ONE FAMILY HOMES,2917,15,10457.0,130000.0,2003-05-27 00:00:00
2,BATHGATE,01 ONE FAMILY HOMES,3028,25,10457.0,204000.0,2003-04-07 00:00:00
3,BATHGATE,01 ONE FAMILY HOMES,3030,55,10457.0,235000.0,2003-07-24 00:00:00
4,BATHGATE,01 ONE FAMILY HOMES,3035,2,10457.0,125500.0,2003-05-02 00:00:00


In [104]:
# remove irrelevant columns
manhattan_df.rename(columns={'SALE DATE':'SALEDATE'}, inplace=True)
manhattan_df['Year'] = manhattan_df.SALEDATE.str[:4]
manhattan_df['Year']=manhattan_df['Year']
manhattan_df['NEIGHBORHOOD']=manhattan_df['NEIGHBORHOOD'].str.strip()
manhattan_df.drop(columns=['BLOCK','LOT','ZIPCODE','SALEDATE'], inplace=True)


# filter out potential transfers, and not sales, define new dataframe
manhattan_sales_df = manhattan_df[manhattan_df['SALE PRICE']>5000]
print(manhattan_sales_df.describe())
manhattan_sales_df.head()


         SALE PRICE
count  9.461990e+05
mean   1.277961e+06
std    1.459841e+07
min    5.001000e+03
25%    2.850000e+05
50%    4.950000e+05
75%    8.170000e+05
max    4.111112e+09


Unnamed: 0,NEIGHBORHOOD,BUILDING CLASS CATEGORY,SALE PRICE,Year
1,BATHGATE,01 ONE FAMILY HOMES,130000.0,2003
2,BATHGATE,01 ONE FAMILY HOMES,204000.0,2003
3,BATHGATE,01 ONE FAMILY HOMES,235000.0,2003
4,BATHGATE,01 ONE FAMILY HOMES,125500.0,2003
5,BATHGATE,01 ONE FAMILY HOMES,215000.0,2003


In [105]:
mttn_neigh_sales = manhattan_sales_df.groupby(['NEIGHBORHOOD','Year']).sum().reset_index()
manhattan_neighborhoods = list(set(mttn_neigh_sales['NEIGHBORHOOD']))
mttn_neigh_sales.head()

Unnamed: 0,NEIGHBORHOOD,Year,SALE PRICE
0,3004,2006,681408.0
1,AIRPORT JFK,2006,12177408.0
2,AIRPORT JFK,2016,7800000.0
3,AIRPORT LA GUARDIA,2003,2556890.0
4,AIRPORT LA GUARDIA,2004,3634000.0


## Manhattan Real Estate Sales by Neighbhorhood (2003 -2018)

In [106]:
def mttn_neigh_sales_plot(Neighborhood):
    mttn_slice = mttn_neigh_sales[mttn_neigh_sales['NEIGHBORHOOD'].isin([Neighborhood])]
    return mttn_slice.hvplot.line(
        x='Year',
        y='SALE PRICE',
        title='Manhattan Real Estate Sales'
        ).opts(xlabel='Year', ylabel='Total Sales',yformatter="%.0f")
interact(mttn_neigh_sales_plot, Neighborhood=manhattan_neighborhoods)

## Rate of Change and Standard Deviation of Rate of Change by Neighborhood

In [107]:
def neighborhood_stats(Neighborhood):
        mttn_slice = mttn_neigh_sales[mttn_neigh_sales['NEIGHBORHOOD'].isin([Neighborhood])]
        pct_change = mttn_slice['SALE PRICE'].pct_change()
        avg_pct_change = pct_change.mean()
        stdev = pct_change.std()
        return [avg_pct_change , stdev]
    

In [108]:
mttn_stats = {}
for Neighborhood in manhattan_neighborhoods:
    mttn_stats[Neighborhood] = neighborhood_stats(Neighborhood)


In [110]:
len(mttn_stats)

204

In [93]:
mttn_df = pd.DataFrame(mttn_stats)

In [96]:
mttn_df.head()

Unnamed: 0,UPPER EAST SIDE (59-79),HARLEM-CENTRAL,SOHO,FASHION,GREENWICH VILLAGE-WEST,UPPER WEST SIDE (96-116),JAVITS CENTER,FLATIRON,HARLEM-EAST,MANHATTAN VALLEY,...,UPPER WEST SIDE (79-96),GREENWICH VILLAGE-CENTRAL,EAST VILLAGE,MANHATTAN-UNKNOWN,MIDTOWN WEST,UPPER WEST SIDE (59-79),UPPER EAST SIDE (79-96),CHINATOWN,HARLEM-WEST,LOWER EAST SIDE
0,0.083081,0.206696,0.278907,0.438053,0.119178,0.21095,1.390304,0.499744,0.251337,0.20744,...,0.088935,0.132296,0.181712,3.257392,0.303625,0.102497,0.064235,0.269633,0.381034,0.216407
1,0.357104,0.545362,0.770567,0.92699,0.321626,0.633346,3.36863,1.090057,0.660179,0.695971,...,0.297253,0.407622,0.436257,9.353586,0.78019,0.340533,0.268563,0.633072,1.389142,0.461658
