In [42]:
# Import dependencies

import pandas as pd
import numpy as np
import time
import plotly.express as px
import plotly.graph_objects as go
from datetime import date
from random import random

In [43]:
# Read data from csv files
path_home_for_sale = "home_data/for_sale_data/home_for_sale_data_Jan-03-2021.csv"
path_home_sold = "home_data/sold_data/home_sold_data_Jan-03-2021.csv"

homes_for_sale = pd.read_csv(path_home_for_sale, index_col = 0)
homes_sold = pd.read_csv(path_home_sold, index_col = 0)



In [44]:
# print first 5 rows of homes for sale
homes_for_sale.head(5)


Unnamed: 0,title,MLS ID,transaction_type,home_type,level,first day on market,last day on market,sold_price,list_price,bedrooms,bathrooms,den,sqft,exposure,parking,locker,maintanance fee,description,link,address
0,53 Havenbrook Blvd | House For Sale,C5072154,for sale,Residential House,,20201229.0,,,1388000,4.0,3.0,0.0,2000-2500,S,,,,Welcome To This Spectacular Gorgeous Home In A...,https://www.realmaster.com/en/toronto-on/53-ha...,"53 Havenbrook Blvd, Toronto, Ontario, M2J1A7"
1,30 Hedgewood Rd | House For Sale,C4963019,for sale,Residential House,,20201021.0,,,2988000,3.0,4.0,2.0,,W,,,,Stately &amp; Stunning. Extensively Reno'd On ...,https://www.realmaster.com/en/toronto-on/30-he...,"30 Hedgewood Rd, Toronto, Ontario, M2L1L5"
2,283 Adelaide Street West 04 | Condo Apt For Sale,,for sale,Residential Condo Apt,,20201122.0,,,809000,2.0,2.0,0.0,800-899,SW,,,,超值楼花转让，著名开发商Pinnacle International在市中心科技区地址283...,https://www.realmaster.com/en/toronto-on/283-a...,"04 283 Adelaide Street West, Toronto, Ontario,..."
3,206 Dunforest Ave | House For Sale,C5073052,for sale,Residential House,,20210101.0,,,3488000,4.0,6.0,2.0,3500-5000,N,,,,Spectacular Customized Home. Exceptional Elega...,https://www.realmaster.com/en/toronto-on/206-d...,"206 Dunforest Ave, Toronto, Ontario, M2N4J7"
4,60 Colborne St 1505 | Apartment For Sale,C5069867,for sale,Residential Apartment,15.0,20201222.0,,,1225000,2.0,2.0,0.0,1000-1199,Nw,OwnedE8,Owned,984.21,Prime Yonge/King/Church Location!Luxury Upgrad...,https://www.realmaster.com/en/toronto-on/60-co...,"1505 60 Colborne St, Toronto, Ontario, M5E0B7"


In [45]:
# print first 5 rows of homes sold
homes_sold.head(5)

Unnamed: 0,title,MLS ID,transaction_type,home_type,level,first day on market,last day on market,sold_price,list_price,bedrooms,bathrooms,den,sqft,exposure,parking,locker,maintanance fee,description,link,address
0,1080 Bay St 2308 | Apartment Sold Price,C4974749,sold,Residential Apartment,22.0,20201101,20201218,718000,749000,1.0,1.0,0.0,500-599,Nw,Owned84,Owned,410.53,Another Pemberton Built Signature @ Sw Bay/Blo...,https://www.realmaster.com/en/toronto-on/1080-...,"2308 1080 Bay St, Toronto, Ontario, M5S 0A5"
1,107 Kingslake Rd | House Sold Price,C4934370,sold,Residential House,,20200930,20201230,1110000,1270000,5.0,3.0,0.0,1500-2000,E,,,,A Prime Location In Sought After Neighbourhood...,https://www.realmaster.com/en/toronto-on/107-k...,"107 Kingslake Rd, Toronto, Ontario, M2J3E9"
2,365 Church St 2704 | Apartment Sold Price,C4899879,sold,Residential Apartment,27.0,20200905,20201216,406000,419900,0.0,1.0,0.0,0-499,N,,,270.21,Near New Luxury Open Concept Studio In The Hea...,https://www.realmaster.com/en/toronto-on/365-c...,"2704 365 Church St, Toronto, Ontario, M5B 0B5"
3,275 Yorkland Rd 2912 | Apartment Sold Price,C5071322,sold,Residential Apartment,28.0,20201228,20201230,474000,484900,1.0,1.0,0.0,500-599,Sw,Owned5623,Owned,439.54,***Wow*** Attention First Time Home Buyers And...,https://www.realmaster.com/en/toronto-on/275-y...,"2912 275 Yorkland Rd, Toronto, Ontario, M2J0A7"
4,30 Stadium Rd 259 | Townhouse Sold Price,C4959473,sold,Residential Townhouse,1.0,20201019,20210101,725000,740000,2.0,1.0,0.0,600-699,Ns,Owned105,,380.0,Amazing Property In The Heart Of The City. Sou...,https://www.realmaster.com/en/toronto-on/30-st...,"259 30 Stadium Rd, Toronto, Ontario, M5V 3P4"


In [46]:
# only see condos
condition_lst_1 = (homes_for_sale['home_type'] == 'Residential Apartment') | (homes_for_sale['home_type'] == 'Residential Condo Apt')
condition_lst_2 = homes_sold['home_type'] == 'Residential Apartment'

condos_for_sale = homes_for_sale[condition_lst_1]
condos_sold = homes_sold[condition_lst_2]

In [47]:
# check condos for sale
condos_for_sale.shape, condos_for_sale.head(5)

((3384, 20),
                                               title    MLS ID  \
 2  283 Adelaide Street West 04 | Condo Apt For Sale       NaN   
 4          60 Colborne St 1505 | Apartment For Sale  C5069867   
 5       70 Town Centre Crt 404 | Apartment For Sale  E5073682   
 6         50 Charles St E 4008 | Apartment For Sale  C5073674   
 7          361 Front St W 2002 | Apartment For Sale  C5070499   
 
   transaction_type              home_type level  first day on market  \
 2         for sale  Residential Condo Apt   NaN           20201122.0   
 4         for sale  Residential Apartment    15           20201222.0   
 5         for sale  Residential Apartment     4           20210103.0   
 6         for sale  Residential Apartment    40           20210103.0   
 7         for sale  Residential Apartment    17           20201222.0   
 
    last day on market sold_price list_price  bedrooms  bathrooms  den  \
 2                 NaN        NaN    809,000       2.0        2.0  0.0   
 

In [48]:
# check condos sold
condos_sold.shape, condos_sold.head(5)

((1424, 20),
                                              title    MLS ID transaction_type  \
 0          1080 Bay St 2308 | Apartment Sold Price  C4974749             sold   
 2        365 Church St 2704 | Apartment Sold Price  C4899879             sold   
 3      275 Yorkland Rd 2912 | Apartment Sold Price  C5071322             sold   
 6  4091 Sheppard Ave E 1008 | Apartment Sold Price  E5061202             sold   
 7  151 Village Green Sq 907 | Apartment Sold Price  E4988153             sold   
 
                home_type level  first day on market  last day on market  \
 0  Residential Apartment    22             20201101            20201218   
 2  Residential Apartment    27             20200905            20201216   
 3  Residential Apartment    28             20201228            20201230   
 6  Residential Apartment    10             20201209            20201220   
 7  Residential Apartment     9             20201112            20201215   
 
   sold_price list_price  bedrooms 

In [110]:
# Find the trend of number of sold homes through time (daily)

last_day_on_markets_lst = list(condos_sold['last day on market'])
min_day = min(last_day_on_markets_lst)
max_day = max(last_day_on_markets_lst)

min_day, max_day


(20200314, 20201231)

In [114]:
# Find the trend of number of new listing homes through time (daily)

first_day_on_markets_lst = list(condos_for_sale['first day on market'])
min_first_day = min(first_day_on_markets_lst)
max_first_day = max(first_day_on_markets_lst)

min_first_day, max_first_day

(20181203.0, 20210103.0)

In [203]:
# Given a cutoff day, plot new listing number or sold number through time (daily) 
# if n_days_average is specified, plot moving number plot
def plot_single_day_data(date_lst, cutoff_day, n_days_average=0):
    num_per_day_dict = {}

    for d in date_lst:
        d_string = str(int(d))

        if d_string not in num_per_day_dict:
            num_per_day_dict[d_string] = 1
        else:
            num_per_day_dict[d_string] += 1

    days = list(num_per_day_dict.keys())

    days = [int(d) for d in days]
    days.sort()

    num_lst = [num_per_day_dict[str(d)] for d in days if d >= cutoff_day]

    x_axis = [str(d) for d in days if d >= cutoff_day]
    
    
    if n_days_average == 0:

        fig = go.Figure()

#         fig.add_trace(go.Scatter(x=x_axis, y=num_lst,
#                             mode='markers',
#                             name='number'))
        fig.add_trace(go.Scatter(x=x_axis, y=num_lst,
                            mode='lines+markers',
                            name='line + number',
                            marker_color='rgba(255, 102, 102, 1)'
                                ))

        fig.show()
    else:
        half_boundary = (n_days_average - 1) // 2

        left = half_boundary
        right = len(num_lst) - half_boundary

        homes_num_average = []

        for i in range(len(num_lst)):

            if i >= half_boundary and i < right:
                value = sum(num_lst[i - half_boundary : i + half_boundary + 1]) / n_days_average
                homes_num_average.append(value)



        x_average = x_axis[half_boundary:right]

        assert(len(x_average) == len(homes_num_average))

        fig = go.Figure()

#         fig.add_trace(go.Scatter(x=x_average, y=homes_num_average,
#                             mode='markers',
#                             name='number sold'))
        fig.add_trace(go.Scatter(x=x_average, y=homes_num_average,
                            mode='lines+markers',
                            name='line + number',
                            marker_color='rgba(255, 102, 102, 1)'))

        fig.show()

In [204]:
# Plot home sold number per day
plot_single_day_data(last_day_on_markets_lst, cutoff_day=20201123)

In [202]:
# Plot average home sold number per day
plot_single_day_data(last_day_on_markets_lst, cutoff_day=20201123, n_days_average=7)

In [197]:
# Plot new home listing number per day
plot_single_day_data(first_day_on_markets_lst, cutoff_day=20201123)

In [200]:
# Plot average new home listing number per day
plot_single_day_data(first_day_on_markets_lst, cutoff_day=20201123, n_days_average=7)

In [None]:
# Find the trend of the Ratio: number of new listing / number of sale

In [216]:
def helper_for_ratio(date_lst):
    num_per_day_dict = {}

    for d in date_lst:
        d_string = str(int(d))

        if d_string not in num_per_day_dict:
            num_per_day_dict[d_string] = 1
        else:
            num_per_day_dict[d_string] += 1
    return num_per_day_dict

def helper_plot(x_lst, y_lst):
    fig = go.Figure()


    fig.add_trace(go.Scatter(x=x_lst, y=y_lst,
                        mode='lines+markers',
                        name='line + number',
                        marker_color='rgba(255, 102, 102, 1)'))

    fig.show()
    
# if n_days_average is not 0, plot the average n days ratio
def plot_the_sell_buy_ratio(new_listing_dates, sold_dates, cutoff_day, n_days_average=0):
    # for new listing, get the dictionary {listing date : num of listing}
    new_listing_dict = helper_for_ratio(new_listing_dates)
    
    # for sold homes, get the dictionary {sold date : num of sold homes}
    sold_dict = helper_for_ratio(sold_dates)

    
    listing_days = list(new_listing_dict.keys())
    listing_days = [int(d) for d in listing_days if int(d) >= cutoff_day]
    listing_days.sort()
    
    sold_days = list(sold_dict.keys())
    sold_days = [int(d) for d in sold_days if int(d) >= cutoff_day]
    sold_days.sort()
    
    
    
    if n_days_average == 0:
        # get the intersection
        ratio_dates = list(set(listing_days).intersection(set(sold_days)))
        ratio_dates.sort()
        
        ratio_lst = [new_listing_dict[str(ratio_dates[i])] / sold_dict[str(ratio_dates[i])] for i in range(len(ratio_dates))]
        x_axis = [str(d) for d in ratio_dates]
        helper_plot(x_lst=x_axis, y_lst=ratio_lst)
        
    else:
        pass

In [219]:
plot_the_sell_buy_ratio(new_listing_dates=first_day_on_markets_lst, 
                        sold_dates=last_day_on_markets_lst, 
                        cutoff_day=20201123)