# Toronto Dwellings Analysis

In this assignment, you will perform fundamental analysis for the Toronto dwellings market to allow potential real estate investors to choose rental investment properties.

In [16]:
# initial imports
import os
import pandas as pd
import matplotlib.pyplot as plt

import plotly.express as px
from pathlib import Path
from dotenv import load_dotenv
import matplotlib

%matplotlib inline

In [17]:
import hvplot.pandas

In [18]:
# Read the Mapbox API key
load_dotenv('example.env')
map_box_api = os.getenv("mapbox")
px.set_mapbox_access_token(map_box_api)

## Load Data

In [19]:
# Read the census data into a Pandas DataFrame
file_path = Path("toronto_neighbourhoods_census_data.csv")
to_data = pd.read_csv(file_path, index_col="year")
to_data.head()

Unnamed: 0_level_0,neighbourhood,single_detached_house,apartment_five_storeys_plus,movable_dwelling,semi_detached_house,row_house,duplex,apartment_five_storeys_less,other_house,average_house_value,shelter_costs_owned,shelter_costs_rented
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2001,Agincourt North,3715,1480,0,1055,1295,195,185,5,200388,810,870
2001,Agincourt South-Malvern West,3250,1835,0,545,455,105,425,0,203047,806,892
2001,Alderwood,3175,315,0,470,50,185,370,0,259998,817,924
2001,Annex,1060,6090,5,1980,605,275,3710,165,453850,1027,1378
2001,Banbury-Don Mills,3615,4465,0,240,380,15,1360,0,371864,1007,1163


- - - 

## Dwelling Types Per Year

In this section, you will calculate the number of dwelling types per year. Visualize the results using bar charts and the Pandas plot function. 

**Hint:** Use the Pandas `groupby` function.

**Optional challenge:** Plot each bar chart in a different color.

In [20]:
# Calculate the number of dwelling types units per year (hint: use groupby and sum)
to_data_types = to_data.drop(columns=["neighbourhood","average_house_value", "shelter_costs_owned", "shelter_costs_rented"])
number_dwelling_types = to_data_types.groupby([to_data_types.index]).sum()
number_dwelling_types

Unnamed: 0_level_0,single_detached_house,apartment_five_storeys_plus,movable_dwelling,semi_detached_house,row_house,duplex,apartment_five_storeys_less,other_house
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2001,300930,355015,75,90995,52355,23785,116900,3040
2006,266860,379400,165,69430,54690,44095,162850,1335
2011,274940,429220,100,72480,60355,44750,163895,2165
2016,269680,493270,95,71200,61565,48585,165575,2845


In [21]:
# Save the dataframe as a csv file
number_dwelling_types.to_csv("number_dwelling_types.csv")

In [22]:
# # Helper create_bar_chart function
def create_bar_chart(data_p, title_p, xlabel_p, ylabel_p, color_p):
    bar_chart = data_p.hvplot.bar(
    title = title_p, 
    xlabel = xlabel_p, 
    ylabel = ylabel_p,
    color= color_p,
    rot=90,
    height=500,
    width= 500
    ).opts(
    yformatter="%.0f")
    return bar_chart




In [23]:
# Create a bar chart per year to show the number of dwelling types

# Bar chart for 2001
n_2001=number_dwelling_types.loc[2001,:]
create_bar_chart(n_2001, "Dwelling Types in Toronto in 2001", "2001", " Dwelling Type Units", "red")

In [24]:
# Bar chart for 2006
n_2006=number_dwelling_types.loc[2006,:]
create_bar_chart(n_2006, "Dwelling Types in Toronto in 2006", "2006", " Dwelling Type Units", "blue")

In [25]:
# Bar chart for 2011
n_2011=number_dwelling_types.loc[2011,:]
create_bar_chart(n_2011, "Dwelling Types in Toronto in 2011", "2011", " Dwelling Type Units", "yellow")

In [26]:
# Bar chart for 2016
n_2016=number_dwelling_types.loc[2016,:]
create_bar_chart(n_2016, "Dwelling Types in Toronto in 2016", "2016", " Dwelling Type Units", "magenta")

- - - 

## Average Monthly Shelter Costs in Toronto Per Year

In this section, you will calculate the average monthly shelter costs for owned and rented dwellings and the average house value for each year. Plot the results as a line chart.

**Optional challenge:** Plot each line chart in a different color.

In [32]:
# Calculate the average monthly shelter costs for owned and rented dwellings
avg_monthly_shelter_costs= to_data[["shelter_costs_owned", "shelter_costs_rented" ]]
average_cost_dwellings= owned_rented_dwellings.groupby([to_data.index]).mean()
average_cost_dwellings

NameError: name 'owned_rented_dwellings' is not defined

In [33]:
# Helper create_line_chart function

def create_line_chart(data_p, title_p, xlabel_p, ylabel_p, color_p):
    line_chart = data_p.hvplot(
    title = title_p, 
    xlabel = xlabel_p, 
    ylabel = ylabel_p,
    color= color_p,
    width = 800,
    ).opts(
    yformatter="%.0f")
    return line_chart    

In [34]:
# Create two line charts, one to plot the monthly shelter costs for owned dwelleing and other for rented dwellings per year

# Line chart for owned dwellings
owned_dwellings =average_cost_dwellings.loc[:, "shelter_costs_owned"]
create_line_chart(owned_dwellings, "Average Monthly Shelter Cost for Owned Dwellings in Toronto", "Year", " Avg Monthly Shelter Costs", "blue")

In [35]:
# Line chart for rented dwellings
rented_dwellings =average_cost_dwellings.loc[:, "shelter_costs_rented"]
create_line_chart(rented_dwellings, "Average Monthly Shelter Cost for Rented Dwellings in Toronto", "Year", " Avg Monthly Shelter Costs", "orange")

## Average House Value per Year

In [36]:
# Calculate the average house value per year
avg_yearly_house_value = to_data["average_house_value" ].groupby([to_data.index]).mean()
avg_yearly_house_value

year
2001    289882.885714
2006    424059.664286
2011    530424.721429
2016    664068.328571
Name: average_house_value, dtype: float64

In [37]:
# Plot the average house value per year as a line chart
create_line_chart(avg_yearly_house_value, "Average House Value in Toronto", "Year", " Avg House Value", "blue")

- - - 

## Average House Value by Neighbourhood

In this section, you will use `hvplot` to create an interactive visualization of the average house value with a dropdown selector for the neighbourhood.

**Hint:** It will be easier to create a new DataFrame from grouping the data and calculating the mean house values for each year and neighbourhood.

In [38]:
# Create a new DataFrame with the mean house values by neighbourhood per year
mean_house_values = to_data[["neighbourhood","average_house_value"]].reset_index()
mean_house_values.head()

Unnamed: 0,year,neighbourhood,average_house_value
0,2001,Agincourt North,200388
1,2001,Agincourt South-Malvern West,203047
2,2001,Alderwood,259998
3,2001,Annex,453850
4,2001,Banbury-Don Mills,371864


In [39]:
# Use hvplot to create an interactive line chart of the average house value per neighbourhood
# The plot should have a dropdown selector for the neighbourhood
from panel.interact import interact
from panel import widgets


def choose_neighbourhood(neighbour_hood):
    
    return mean_house_values.loc[mean_house_values['neighbourhood']==neighbour_hood].hvplot.line(
        x = 'year',
        y='average_house_value',
        title= f"neighbourhood: {neighbour_hood}",
    ).opts(
    yformatter="%.0f")

neighbourhood_choice= mean_house_values['neighbourhood'].drop_duplicates()

interact(choose_neighbourhood, neighbour_hood= neighbourhood_choice )

## Number of Dwelling Types per Year

In this section, you will use `hvplot` to create an interactive visualization of the average number of dwelling types per year with a dropdown selector for the neighbourhood.

**Hint:** It will be easier to create a new DataFrame from grouping the data and calculating the mean number of dwelling types for each year and neighbourhood.

In [40]:
# Fetch the data of all dwelling types per year
all_dwelling_types = to_data.reset_index()
all_dwelling_types.head()

Unnamed: 0,year,neighbourhood,single_detached_house,apartment_five_storeys_plus,movable_dwelling,semi_detached_house,row_house,duplex,apartment_five_storeys_less,other_house,average_house_value,shelter_costs_owned,shelter_costs_rented
0,2001,Agincourt North,3715,1480,0,1055,1295,195,185,5,200388,810,870
1,2001,Agincourt South-Malvern West,3250,1835,0,545,455,105,425,0,203047,806,892
2,2001,Alderwood,3175,315,0,470,50,185,370,0,259998,817,924
3,2001,Annex,1060,6090,5,1980,605,275,3710,165,453850,1027,1378
4,2001,Banbury-Don Mills,3615,4465,0,240,380,15,1360,0,371864,1007,1163


In [41]:
# Use hvplot to create an interactive bar chart of the number of dwelling types per neighbourhood
# The plot should have a dropdown selector for the neighbourhood


def choose_neighbourhood(neighbour_hood):
    
    return all_dwelling_types.loc[all_dwelling_types['neighbourhood']==neighbour_hood].hvplot.bar(
        x = 'year',
        y=["single_detached_house","apartment_five_storeys_plus","movable_dwelling", "semi_detached_house", "row_house","duplex", "apartment_five_storeys_less","other_house"],
        title= f"neighbourhood: {neighbour_hood}",
        rot= 90,
        height = 500
    ).opts(
    yformatter="%.0f")

neighbourhood_choice= mean_house_values['neighbourhood'].drop_duplicates()

interact(choose_neighbourhood, neighbour_hood= neighbourhood_choice )

- - - 

## The Top 10 Most Expensive Neighbourhoods

In this section, you will need to calculate the house value for each neighbourhood and then sort the values to obtain the top 10 most expensive neighbourhoods on average. Plot the results as a bar chart.

In [42]:
# Getting the data from the top 10 expensive neighbourhoods
avg_value_neighbourhood= to_data.groupby("neighbourhood").mean()
# nlargest(10, "average_house_value")
top_10_neighbourhoods= avg_value_neighbourhood.nlargest(10, "average_house_value")
top_10_neighbourhoods

Unnamed: 0_level_0,single_detached_house,apartment_five_storeys_plus,movable_dwelling,semi_detached_house,row_house,duplex,apartment_five_storeys_less,other_house,average_house_value,shelter_costs_owned,shelter_costs_rented
neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Bridle Path-Sunnybrook-York Mills,2260.0,331.25,0.0,36.25,90.0,25.0,40.0,0.0,1526485.75,2360.75,2321.75
Forest Hill South,1742.5,2031.25,1.25,61.25,45.0,75.0,1027.5,3.75,1195992.5,1781.0,1313.75
Lawrence Park South,3472.5,773.75,0.0,126.25,38.75,225.0,966.25,16.25,1094027.75,1954.0,1372.75
Rosedale-Moore Park,2498.75,4641.25,0.0,486.25,245.0,327.5,1618.75,2.5,1093640.0,1909.75,1537.25
St.Andrew-Windfields,3225.0,1670.0,0.0,185.0,552.5,97.5,586.25,5.0,999107.0,1880.25,1384.5
Casa Loma,916.25,2310.0,0.0,288.75,201.25,162.5,1192.5,2.5,981064.25,1873.75,1547.75
Bedford Park-Nortown,4865.0,1981.25,0.0,43.75,57.5,287.5,1275.0,88.75,930415.25,1786.75,1255.0
Forest Hill North,1488.75,3392.5,0.0,12.5,16.25,82.5,402.5,1.25,851680.5,1722.75,1245.5
Kingsway South,2326.25,576.25,0.0,66.25,48.75,20.0,336.25,2.5,843234.25,1736.75,1622.0
Yonge-St.Clair,565.0,3948.75,0.0,425.0,212.5,172.5,1308.75,6.25,813220.25,1680.75,1369.0


In [45]:
# Plotting the data from the top 10 expensive neighbourhoods
title = "Top 10 Expensive Neighbourhoods in Toronto"
top_10_neighbourhoods["average_house_value"].hvplot.bar(rot=90, 
                                                        height =600, 
                                                        title= title, 
                                                        xlabel= "Neighbourhood", ylabel= "Avg. House Value").opts(
    yformatter="%.0f")

- - - 

## Cost Analysis

In this section, you will use Plotly express to a couple of plots that investors can interactively filter and explore various factors related to the house value of the Toronto's neighbourhoods. 

### Create a bar chart row facet to plot the average house values for all Toronto's neighbourhoods per year

In [46]:
df =to_data.reset_index()
fig = px.bar(df, 
             x="neighbourhood", 
             y="average_house_value",
             color="average_house_value", 
             facet_row="year",
             labels={
                 'neighbourhood':'Neighbourhoods',
                 'average_house_value': 'Avg. House Value',
                 'year': 'Facet_row'
             },
             height =1000
            )
fig.show()

### Create a sunburst chart to conduct a costs analysis of most expensive neighbourhoods in Toronto per year

In [47]:
# Fetch the data from all expensive neighbourhoods per year.
expensive_neighbourhood = to_data.reset_index().sort_values(['year','shelter_costs_owned'], ascending=[True, False] )
expensive_neighbourhood_final=expensive_neighbourhood.reset_index()
expensive_neighbourhood_final.drop(columns=['index']).head()


Unnamed: 0,year,neighbourhood,single_detached_house,apartment_five_storeys_plus,movable_dwelling,semi_detached_house,row_house,duplex,apartment_five_storeys_less,other_house,average_house_value,shelter_costs_owned,shelter_costs_rented
0,2001,Bridle Path-Sunnybrook-York Mills,2275,110,0,25,15,10,20,0,927466,1983,1790
1,2001,Kingsway South,2370,260,0,65,45,0,340,10,512334,1362,1340
2,2001,Casa Loma,1035,1700,0,415,190,185,1090,5,596077,1241,1500
3,2001,Rosedale-Moore Park,2610,3880,0,520,225,290,1735,0,664476,1219,1540
4,2001,Bay Street Corridor,10,7575,0,0,15,0,240,0,257404,1218,1142


In [48]:
# Calculate the top 10 most expensive neighbourhoods per year and concat the data into one DF.
exp_2001=expensive_neighbourhood_final.loc[expensive_neighbourhood_final['year']==2001].nlargest(10,['shelter_costs_owned'])
exp_2006=expensive_neighbourhood_final.loc[expensive_neighbourhood_final['year']==2006].nlargest(10,['shelter_costs_owned'])
exp_2011=expensive_neighbourhood_final.loc[expensive_neighbourhood_final['year']==2011].nlargest(10,['shelter_costs_owned'])
exp_2016=expensive_neighbourhood_final.loc[expensive_neighbourhood_final['year']==2016].nlargest(10,['shelter_costs_owned'])
exp_nbhood_sunburst=pd.concat([exp_2001,exp_2006,exp_2011,exp_2016],axis=0)
exp_nbhood_sunburst=exp_nbhood_sunburst.reset_index()
exp_nbhood_sunburst=exp_nbhood_sunburst.drop(columns=['level_0','index'])
exp_nbhood_sunburst.head()

Unnamed: 0,year,neighbourhood,single_detached_house,apartment_five_storeys_plus,movable_dwelling,semi_detached_house,row_house,duplex,apartment_five_storeys_less,other_house,average_house_value,shelter_costs_owned,shelter_costs_rented
0,2001,Bridle Path-Sunnybrook-York Mills,2275,110,0,25,15,10,20,0,927466,1983,1790
1,2001,Kingsway South,2370,260,0,65,45,0,340,10,512334,1362,1340
2,2001,Casa Loma,1035,1700,0,415,190,185,1090,5,596077,1241,1500
3,2001,Rosedale-Moore Park,2610,3880,0,520,225,290,1735,0,664476,1219,1540
4,2001,Bay Street Corridor,10,7575,0,0,15,0,240,0,257404,1218,1142


In [49]:
# Create the sunburst chart
sunburst_plot = px.sunburst(exp_nbhood_sunburst, path=['year', 'neighbourhood'], 
                  values='average_house_value',  color='shelter_costs_owned',
                  hover_data=['shelter_costs_owned', 'shelter_costs_rented'],
                  color_continuous_scale='Blues',
                  title = "Cost Analysis of Most Expensive Neighbourhoods in Toronto per Year", width=700, height=500
                     )
sunburst_plot.show()

- - - 

## Neighbourhood Map

In this section, you will read in neighbourhoods location data and build an interactive map with the average house value per neighbourhood. Use a `scatter_mapbox` from Plotly express to create the visualization. Remember, you will need your Mapbox API key for this.

### Load Location Data

In [50]:
# Load neighbourhoods coordinates data
file_path = Path("toronto_neighbourhoods_coordinates.csv")
df_neighbourhood_locations = pd.read_csv(file_path)
df_neighbourhood_locations.head()

Unnamed: 0,neighbourhood,lat,lon
0,Agincourt North,43.805441,-79.266712
1,Agincourt South-Malvern West,43.788658,-79.265612
2,Alderwood,43.604937,-79.541611
3,Annex,43.671585,-79.404001
4,Banbury-Don Mills,43.737657,-79.349718


### Data Preparation

You will need to join the location data with the mean values per neighbourhood.

1. Calculate the mean values for each neighbourhood.

2. Join the average values with the neighbourhood locations.

In [51]:
# Calculate the mean values for each neighborhood
mean_neighbourhood = to_data.groupby('neighbourhood').mean().reset_index()
mean_neighbourhood.head()


Unnamed: 0,neighbourhood,single_detached_house,apartment_five_storeys_plus,movable_dwelling,semi_detached_house,row_house,duplex,apartment_five_storeys_less,other_house,average_house_value,shelter_costs_owned,shelter_costs_rented
0,Agincourt North,3435.0,1947.5,2.5,863.75,1406.25,512.5,547.5,10.0,329811.5,1109.0,983.5
1,Agincourt South-Malvern West,2897.5,2180.0,1.25,375.0,456.25,523.75,628.75,32.5,334189.0,1131.25,985.0
2,Alderwood,2903.75,302.5,1.25,503.75,76.25,302.5,502.5,1.25,427922.5,1166.75,1003.25
3,Annex,751.25,7235.0,1.25,1375.0,613.75,355.0,4605.0,83.75,746977.0,1692.75,1315.25
4,Banbury-Don Mills,3572.5,5388.75,1.25,273.75,626.25,32.5,1340.0,0.0,612039.0,1463.5,1242.75


In [52]:
# Join the average values with the neighbourhood locations
neighbourhood_combined_df = pd.merge(df_neighbourhood_locations,mean_neighbourhood, on='neighbourhood')
neighbourhood_combined_df.head()

Unnamed: 0,neighbourhood,lat,lon,single_detached_house,apartment_five_storeys_plus,movable_dwelling,semi_detached_house,row_house,duplex,apartment_five_storeys_less,other_house,average_house_value,shelter_costs_owned,shelter_costs_rented
0,Agincourt North,43.805441,-79.266712,3435.0,1947.5,2.5,863.75,1406.25,512.5,547.5,10.0,329811.5,1109.0,983.5
1,Agincourt South-Malvern West,43.788658,-79.265612,2897.5,2180.0,1.25,375.0,456.25,523.75,628.75,32.5,334189.0,1131.25,985.0
2,Alderwood,43.604937,-79.541611,2903.75,302.5,1.25,503.75,76.25,302.5,502.5,1.25,427922.5,1166.75,1003.25
3,Annex,43.671585,-79.404001,751.25,7235.0,1.25,1375.0,613.75,355.0,4605.0,83.75,746977.0,1692.75,1315.25
4,Banbury-Don Mills,43.737657,-79.349718,3572.5,5388.75,1.25,273.75,626.25,32.5,1340.0,0.0,612039.0,1463.5,1242.75


### Mapbox Visualization

Plot the average values per neighbourhood using a Plotly express `scatter_mapbox` visualization.

In [53]:
# Create a scatter mapbox to analyze neighbourhood info
map_1 = px.scatter_mapbox(
    neighbourhood_combined_df,
    lat="lat",
    lon="lon",
    color='average_house_value'
)
map_1.show()