In [None]:
import pandas as pd
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [None]:
dataset = pd.read_csv('/https://raw.githubusercontent.com/callysto/data-files/main/data-viz-of-the-week/canada-rent/canada-rent.csv')

dataset

In [None]:
#dictionaries that we'll use to create columns which can be used for machine learning
prov_codes = {'Alberta': 403,'Saskatchewan':639,'Saskatchewan/Alberta':403,'Quebec':514,'Ontario':249,'Ontario/Quebec':249,'Manitoba':204,'New Brunswick':506,'New Brunswick/Quebec':506,'Newfoundland and Labrador':709,'Northwest Territories':867,'Prince Edward Island':902,'Nova Scotia':902,'British Columbia':778}
Rooms = {'Bachelor units':0,'One bedroom units':1,'Two bedroom units':2,'Three bedroom units':3}

dataset = dataset[dataset['VALUE'].notna()]
dataset.rename(columns={'REF_DATE':'YEAR','VALUE':'MONTHLY RENT'},inplace=True)
dataset['PROVINCE'] = dataset['GEO'].map(lambda x: x.split(', ')[-1])
dataset['PROVINCE'].replace('Saskachewan/Alberta','Saskatchewan/Alberta',inplace=True)
dataset['PROVINCE CODE'] = dataset['PROVINCE'].map(lambda x: prov_codes[x])
dataset['# of Rooms'] = dataset['Type of unit'].map(lambda x:Rooms[x])

dataset

In [None]:
grouped = dataset.groupby(['PROVINCE','YEAR','Type of unit'])[['MONTHLY RENT','PROVINCE CODE','# of Rooms']].mean().reset_index()
grouped

In [None]:
province = 'Alberta' #change this to any province you'd like

province_grouped = grouped.loc[grouped['PROVINCE'] == province]
px.line(province_grouped,x='YEAR',y='MONTHLY RENT',color='Type of unit', title='Average Monthly Rent of ' + province + ' through the Years')

In [None]:
average_per_province = grouped.groupby(['PROVINCE','YEAR'])['MONTHLY RENT'].mean().reset_index()
px.bar(average_per_province,x='YEAR',y='MONTHLY RENT',color='PROVINCE',title='Average Monthly Rent by Year')

In [None]:
px.bar(grouped,x='PROVINCE',y='MONTHLY RENT',color='Type of unit',barmode='group',animation_frame='YEAR',title='Average Monthly Rent Animation')

In [None]:
specific_year = 2022 # you can change this number to look at a specific year

year_info = grouped.loc[grouped['YEAR'] == specific_year]
year_info.sort_values('MONTHLY RENT',ascending=False,inplace=True)
px.bar(year_info,x='PROVINCE',y='MONTHLY RENT',color='Type of unit',barmode='group',title='Average Monthly Rent in ' + str(specific_year) + ' for each Canadian Province')

In [None]:
target = grouped['MONTHLY RENT']
features = grouped[['YEAR','PROVINCE CODE','# of Rooms']]

X_train,X_test,Y_train,Y_test = train_test_split(features,target,test_size=0.33,random_state=42)

model = Ridge(alpha=0.5).fit(X_train,Y_train)
y_pred = model.predict(X_test)

X_test['Y_test'] = Y_test
X_test['Y_pred'] = y_pred

province_of_interest = 'Ontario' #change this to any province you're interested in
province_code = prov_codes[province_of_interest]

X_test = X_test[X_test['PROVINCE CODE'] == province_code]

X_test


In [None]:
X_test.sort_values(['YEAR','# of Rooms'],inplace=True)
fig = make_subplots(rows=4,cols=1,shared_xaxes=True,subplot_titles=('Bachelor Units','One Bedroom Units','Two Bedroom Units','Three Bedroom Units'))

for i in range(4):
    fig.append_trace(go.Line(x= X_test[X_test['# of Rooms'] == i]['YEAR'],y= X_test[X_test['# of Rooms'] == i]['Y_test'],line=dict(color='royalblue', width= 3)),row=i+1,col=1)
    fig.append_trace(go.Line(x= X_test[X_test['# of Rooms'] == i]['YEAR'],y= X_test[X_test['# of Rooms'] == i]['Y_pred'],line=dict(color='firebrick', width= 4,dash='dash')),row=i+1,col=1)

fig.update_layout(height=1000,width=1000,title_text='Prediction vs Actual Rent values in ' + province_of_interest,showlegend=False)
fig.show()
