In [44]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [45]:
data = pd.read_csv('cleaned_canada.csv')
dataBC = data[data['Province'] == 'BC']
dataBC = dataBC.sort_values(by='City')

In [46]:
dataBC = dataBC.filter(['City', 'Province', 'Price', 'Bedrooms', 'Bathrooms', 'Acreage', 'Property Type', 'Square Footage', 'Garage', 'Parking'])
data_bc_single_family = dataBC[dataBC['Property Type'] == 'Single Family']

In [47]:
metro_vancouver_cities = [
    "Vancouver", "Burnaby", "Richmond", "Surrey", "Coquitlam",
    "North Vancouver", "West Vancouver", "New Westminster",
    "Delta", "Port Coquitlam", "Port Moody", "Langley"
]
data_bc_single_family = data_bc_single_family[data_bc_single_family['City'].isin(metro_vancouver_cities)]

In [48]:
# data_bc_single_family.sort_values(by='Price', ascending=False)
# data_bc_single_family.to_csv('cleaned_up_data.csv')
data_bc_single_family.head()

Unnamed: 0,City,Province,Price,Bedrooms,Bathrooms,Acreage,Property Type,Square Footage,Garage,Parking
12202,Burnaby,BC,2499000.0,6.0,4.0,0.27,Single Family,2990.0,Yes,Yes
3888,Burnaby,BC,2428800.0,6.0,3.0,0.17,Single Family,2845.0,Yes,Yes
12174,Burnaby,BC,2490000.0,8.0,4.0,0.14,Single Family,2526.0,Yes,No
12167,Burnaby,BC,2190000.0,4.0,1.0,0.14,Single Family,1938.0,Yes,No
3936,Burnaby,BC,2298000.0,3.0,2.0,0.15,Single Family,1830.0,No,No


In [49]:
features = ['Price', 'Bedrooms', 'Bathrooms', 'Acreage', 'Square Footage']
scaler = MinMaxScaler()

data_scaled = data_bc_single_family.copy()
data_scaled[features] = scaler.fit_transform(data_bc_single_family[features])

data_scaled["Price"] = 1 - data_scaled['Price']
data_scaled['Garage'] = data_scaled['Garage'].map({'Yes': 1, 'No': 0})
data_scaled['Parking'] = data_scaled['Parking'].map({'Yes': 1, 'No': 0})
data_scaled.head()

Unnamed: 0,City,Province,Price,Bedrooms,Bathrooms,Acreage,Property Type,Square Footage,Garage,Parking
12202,Burnaby,BC,0.961999,0.428571,0.333333,3.8e-05,Single Family,0.136571,1,1
3888,Burnaby,BC,0.963199,0.428571,0.25,2.4e-05,Single Family,0.129473,1,1
12174,Burnaby,BC,0.962153,0.571429,0.333333,2e-05,Single Family,0.113858,1,0
12167,Burnaby,BC,0.967279,0.285714,0.083333,2e-05,Single Family,0.085075,1,0
3936,Burnaby,BC,0.965434,0.214286,0.166667,2.1e-05,Single Family,0.079789,0,0


In [61]:
score_features = [
    'Price',
    'Bedrooms',
    'Bathrooms',
    'Square Footage',
    'Acreage',
    'Garage',
    'Parking'
]

weights = np.array([1/len(score_features)] * len(score_features))
data_scored = data_scaled.copy()
data_scored['Score'] = data_scaled[score_features].dot(weights)
data_scored.sort_values(by='Score', ascending=False, inplace=True)
data_scored

Unnamed: 0,City,Province,Price,Bedrooms,Bathrooms,Acreage,Property Type,Square Footage,Garage,Parking,Score
24918,Surrey,BC,0.868005,0.785714,1.000000,0.000245,Single Family,0.792892,1,1,0.778122
21250,Surrey,BC,0.955352,0.857143,0.916667,0.000028,Single Family,0.315826,1,1,0.720716
14576,North Vancouver,BC,0.765485,0.642857,1.000000,0.000083,Single Family,0.624798,1,1,0.719032
24797,Langley,BC,0.893977,0.571429,0.916667,0.000280,Single Family,0.640707,1,1,0.717580
12735,Surrey,BC,0.946794,0.857143,0.916667,0.000070,Single Family,0.302217,1,1,0.717556
...,...,...,...,...,...,...,...,...,...,...,...
15704,Vancouver,BC,0.953439,0.000000,0.000000,0.000013,Single Family,0.074649,0,0,0.146871
24767,Langley,BC,0.786177,0.142857,0.083333,0.000332,Single Family,0.013706,0,0,0.146629
15648,Coquitlam,BC,0.934705,0.000000,0.083333,0.000024,Single Family,0.002448,0,0,0.145787
10859,Richmond,BC,0.666382,0.142857,0.083333,0.000172,Single Family,0.048950,0,0,0.134528
