In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np
import time
import ast

In [2]:
property = pd.read_csv("keez_rentals_with_features_corrected.csv")
property.head()

Unnamed: 0,Currency,Price,Address,State,Parish,Square Footage,Link,Bedrooms,Bathrooms,Interior_Features,Parking,Lot_Features
0,USD,1100.0,OXFORD ROAD,"Kingston 5, Kingston & St. Andrew",Kingston & St. Andrew,750.0,https://www.getkeez.com/properties/55225,2.0,1.0,"['Fully Walled', '24 Hour Security', 'Flooring...",['Open'],"['View - Mountain', 'Central location', '', ''..."
1,USD,1100.0,OXFORD ROAD,"Kingston 5, Kingston & St. Andrew",Kingston & St. Andrew,750.0,https://www.getkeez.com/properties/55225,2.0,1.0,"['Fully Walled', '24 Hour Security', 'Flooring...",['Open'],"['View - Mountain', 'Central location', '', ''..."
2,JMD,150000.0,OXFORD ROAD,"Kingston 10, Kingston & St. Andrew",Kingston & St. Andrew,720.0,https://www.getkeez.com/properties/58402,2.0,1.0,"['Water Tank', 'Ceiling Fans', 'Fully Walled',...",['Multiple C/P'],"['', '', '', '', '', '']"
3,JMD,220000.0,KENSINGTON CRESCENT,"Kingston 5, Kingston & St. Andrew",Kingston & St. Andrew,900.0,https://www.getkeez.com/properties/54274,1.0,2.0,"['Furnished', 'Flooring: Ceramic Tile']",['Multiple C/P'],"['View - Mountain', 'Central location', 'Easy ..."
4,JMD,172000.0,LORDS ROAD,"Kingston 5, Kingston & St. Andrew",Kingston & St. Andrew,43560.0,https://www.getkeez.com/properties/56675,2.0,1.0,"['Garden Area', 'Water Tank', 'Water Heater', ...",[''],"['', '', '', '', '', '', '', '', '']"


In [3]:
# options = Options()
# options.add_argument('--headless')
# options.add_argument('--disable-gpu')
# driver = webdriver.Chrome(options=options)

# def scrape_property_details(url):
#     driver.get(url)
#     time.sleep(1)

#     bedrooms, bathrooms = None, None
#     try:
#         facts = driver.find_elements(By.CLASS_NAME, 'col-fact')
#         for fact in facts:
#             parts = fact.text.split(':')
#             if len(parts) == 2:
#                 label = parts[0].strip().lower()
#                 value = parts[1].strip()
#                 if 'bedroom' in label:
#                     bedrooms = value
#                 elif 'bathroom' in label:
#                     bathrooms = value
#     except:
#         pass

#     try:
#         interior = [li.text.strip() for li in driver.find_element(By.XPATH, "//h5[text()='Interior Features']/following-sibling::ul").find_elements(By.TAG_NAME, "li")]
#     except:
#         interior = None

#     try:
#         parking = [li.text.strip() for li in driver.find_element(By.XPATH, "//h5[text()='Parking']/following-sibling::ul").find_elements(By.TAG_NAME, "li")]
#     except:
#         parking = None

#     try:
#         lot = [li.text.strip() for li in driver.find_element(By.XPATH, "//h5[text()='Lot Features']/following-sibling::ul").find_elements(By.TAG_NAME, "li")]
#     except:
#         lot = None

#     return {
#         'Bedrooms': bedrooms,
#         'Bathrooms': bathrooms,
#         'Interior_Features': interior,
#         'Parking': parking,
#         'Lot_Features': lot
#     }

In [4]:
# def extract_details(details):
#     return (
#         details.get('Bedrooms'),
#         details.get('Bathrooms'),
#         details.get('Interior_Features'),
#         details.get('Parking'),
#         details.get('Lot_Features')
#     ) if details else (None, None, None, None, None)

# property[['Bedrooms', 'Bathrooms', 'Interior_Features', 'Parking', 'Lot_Features']] = property['Link'].apply(
#     lambda x: pd.Series(extract_details(scrape_property_details(x)))
# )

In [5]:
# property.to_csv("keez_rentals_with_features.csv", index=False)
# print(property[['Link', 'Bedrooms', 'Bathrooms', 'Interior_Features', 'Parking', 'Lot_Features']])

# driver.quit()

In [6]:
# property['Price'] = property['Price'].replace('[\$,]', '', regex=True).replace(',', '', regex=True).astype(str).str.replace(',', '').astype(float)

In [7]:
# property['Square Footage'] = property['Square Footage'].str.extract(r'([\d,]+)').replace(',', '', regex=True).astype(float)

In [8]:
# property['Bedrooms'] = pd.to_numeric(property['Bedrooms'], errors='coerce')
# property['Bathrooms'] = pd.to_numeric(property['Bathrooms'], errors='coerce')

In [9]:
# property.to_csv("keez_rentals_with_features_corrected.csv", index=False)

In [10]:
def safe_parse_list(val):
    try:
        if val is None or (isinstance(val, float) and pd.isna(val)):
            return []
        if isinstance(val, str):
            parsed = ast.literal_eval(val)
            if isinstance(parsed, list):
                return [str(f).strip().title() for f in parsed if f]
        elif isinstance(val, list):
            return [str(f).strip().title() for f in val if f]
    except (ValueError, SyntaxError):
        return []
    return []

property['Interior_Features'] = property['Interior_Features'].apply(safe_parse_list)

all_features = set()
property['Interior_Features'].apply(lambda features: all_features.update(features))

for feature in sorted(all_features):
    col_name = feature.replace(":", "").replace(",", "").replace(" ", "_")  # Optional: sanitize column names
    property[col_name] = property['Interior_Features'].apply(lambda x: int(feature in x))

In [11]:
property.columns

Index(['Currency', 'Price', 'Address', 'State', 'Parish', 'Square Footage',
       'Link', 'Bedrooms', 'Bathrooms', 'Interior_Features', 'Parking',
       'Lot_Features', '24_Hour_Security', 'Additional_Accom.',
       'Appliances_Only', 'Built_In_Vacuum', 'Cable', 'Ceiling_Fans',
       'Flooring_Ceramic_Tile', 'Flooring_Ceramic_TileLaminate',
       'Flooring_Ceramic_TileMarble', 'Flooring_Ceramic_TileOther',
       'Flooring_Ceramic_TilePorcelain', 'Flooring_Ceramic_TileTerrazzo',
       'Flooring_Ceramic_TileWood', 'Flooring_Laminate', 'Flooring_Marble',
       'Flooring_Mixed', 'Flooring_MixedPorcelain', 'Flooring_OtherPorcelain',
       'Flooring_Porcelain', 'Flooring_Terrazzo',
       'Flooring_Wall_To_Wall_CarpetCeramic_Tile', 'Flooring_Wood',
       'Fully_Fenced', 'Fully_Walled', 'Furnished', 'Garden_Area', 'Grilled',
       'Guest_House', 'Hot_Tub', 'Hurricane_Shutters', 'Jacuzzi/Whirlpool',
       'Jetted_Bathtub', 'Kitchen_Built-In(S)', 'Main_Level_Entry',
       'Night_Se

In [12]:
property['Parking'] = property['Parking'].apply(safe_parse_list)

all_parking_features = set()
property['Parking'].apply(lambda features: all_parking_features.update(features))

for feature in sorted(all_parking_features):
    col_name = feature.replace(":", "").replace(",", "").replace(" ", "_")  # Sanitize column names
    property[col_name] = property['Parking'].apply(lambda x: int(feature in x))

In [13]:
property['Lot_Features'] = property['Lot_Features'].apply(safe_parse_list)

all_parking_features = set()
property['Lot_Features'].apply(lambda features: all_parking_features.update(features))

for feature in sorted(all_parking_features):
    col_name = feature.replace(":", "").replace(",", "").replace(" ", "_")  # Sanitize column names
    property[col_name] = property['Lot_Features'].apply(lambda x: int(feature in x))

In [14]:
USD_TO_JMD = 159

property.loc[property['Currency'] == 'USD', 'Price'] *= USD_TO_JMD
property.loc[property['Currency'] == 'USD', 'Currency'] = 'JMD'

In [15]:
property.drop(columns=["Interior_Features", "Parking", "Lot_Features", "Link", "State", "Currency"], inplace=True)

In [16]:
property.query("Parish == 'Kingston & St. Andrew'")

Unnamed: 0,Price,Address,Parish,Square Footage,Bedrooms,Bathrooms,24_Hour_Security,Additional_Accom.,Appliances_Only,Built_In_Vacuum,...,Landscaped,Low_Maint_Yard,Private_Setting,Quiet_Area,Recreation_Nearby,Road_-_Paved,View_-_City,View_-_Mountain,View_-_Ocean,Waterfront_-_Ocean
0,174900.0,OXFORD ROAD,Kingston & St. Andrew,750.0,2.0,1.0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,174900.0,OXFORD ROAD,Kingston & St. Andrew,750.0,2.0,1.0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,150000.0,OXFORD ROAD,Kingston & St. Andrew,720.0,2.0,1.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,220000.0,KENSINGTON CRESCENT,Kingston & St. Andrew,900.0,1.0,2.0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,172000.0,LORDS ROAD,Kingston & St. Andrew,43560.0,2.0,1.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
304,75000.0,SWAIN SPRING ROAD,Kingston & St. Andrew,800.0,2.0,1.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
305,75000.0,SWAIN SPRING ROAD,Kingston & St. Andrew,800.0,2.0,1.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
306,65000.0,"13 Lakeside Drive, Camrose, Kingston 17",Kingston & St. Andrew,,,,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
309,397500.0,4 LIPSCOMBE DR KINGSTON 9,Kingston & St. Andrew,1500.0,,,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
property['Bedrooms'] = property['Bedrooms'].fillna(property['Bedrooms'].median())
property['Bathrooms'] = property['Bathrooms'].fillna(property['Bathrooms'].median())

In [18]:
property[property['Square Footage'].isna()]
property['Square Footage'] = property['Square Footage'].fillna(property['Square Footage'].median())

In [19]:
property.columns

Index(['Price', 'Address', 'Parish', 'Square Footage', 'Bedrooms', 'Bathrooms',
       '24_Hour_Security', 'Additional_Accom.', 'Appliances_Only',
       'Built_In_Vacuum', 'Cable', 'Ceiling_Fans', 'Flooring_Ceramic_Tile',
       'Flooring_Ceramic_TileLaminate', 'Flooring_Ceramic_TileMarble',
       'Flooring_Ceramic_TileOther', 'Flooring_Ceramic_TilePorcelain',
       'Flooring_Ceramic_TileTerrazzo', 'Flooring_Ceramic_TileWood',
       'Flooring_Laminate', 'Flooring_Marble', 'Flooring_Mixed',
       'Flooring_MixedPorcelain', 'Flooring_OtherPorcelain',
       'Flooring_Porcelain', 'Flooring_Terrazzo',
       'Flooring_Wall_To_Wall_CarpetCeramic_Tile', 'Flooring_Wood',
       'Fully_Fenced', 'Fully_Walled', 'Furnished', 'Garden_Area', 'Grilled',
       'Guest_House', 'Hot_Tub', 'Hurricane_Shutters', 'Jacuzzi/Whirlpool',
       'Jetted_Bathtub', 'Kitchen_Built-In(S)', 'Main_Level_Entry',
       'Night_Security', 'Partial_Furnished', 'Roll_Down_Shutters',
       'Security_System', 'Sprin

In [20]:
property = property.query("Parish == 'Kingston & St. Andrew'")

In [21]:
property

Unnamed: 0,Price,Address,Parish,Square Footage,Bedrooms,Bathrooms,24_Hour_Security,Additional_Accom.,Appliances_Only,Built_In_Vacuum,...,Landscaped,Low_Maint_Yard,Private_Setting,Quiet_Area,Recreation_Nearby,Road_-_Paved,View_-_City,View_-_Mountain,View_-_Ocean,Waterfront_-_Ocean
0,174900.0,OXFORD ROAD,Kingston & St. Andrew,750.0,2.0,1.0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,174900.0,OXFORD ROAD,Kingston & St. Andrew,750.0,2.0,1.0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,150000.0,OXFORD ROAD,Kingston & St. Andrew,720.0,2.0,1.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,220000.0,KENSINGTON CRESCENT,Kingston & St. Andrew,900.0,1.0,2.0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,172000.0,LORDS ROAD,Kingston & St. Andrew,43560.0,2.0,1.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
304,75000.0,SWAIN SPRING ROAD,Kingston & St. Andrew,800.0,2.0,1.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
305,75000.0,SWAIN SPRING ROAD,Kingston & St. Andrew,800.0,2.0,1.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
306,65000.0,"13 Lakeside Drive, Camrose, Kingston 17",Kingston & St. Andrew,1465.0,2.0,2.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
309,397500.0,4 LIPSCOMBE DR KINGSTON 9,Kingston & St. Andrew,1500.0,2.0,2.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
FEATURES = ['Square Footage', 'Bedrooms', 'Bathrooms', '24_Hour_Security', 'Furnished',
            'Garden_Area', 'Swimming_Pool', 'Central_Location', 'Gated_Community',
            'View_-_Ocean', 'Waterfront_-_Ocean']

def train_model(data, target='Price'):
    X = data[FEATURES]
    y = data[target]

    pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ])
    X = pipeline.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor(n_estimators=100, max_depth=12, min_samples_leaf=2, random_state=42)
    model.fit(X_train, y_train)
    return model, pipeline, X_test, y_test

In [23]:
def evaluate(model, X_test, y_test):
    preds = model.predict(X_test)
    return mean_squared_error(y_test, preds), r2_score(y_test, preds)

In [24]:
def get_user_input():
    values = {}
    for f in FEATURES:
        while True:
            val = input(f"{f}: ")
            try:
                values[f] = float(val) if f in ('Square Footage', 'Bedrooms', 'Bathrooms') else int(val)
                break
            except:
                print("Invalid input.")
    price = float(input("Asking price: "))
    rent = float(input("Estimated rent: "))
    return values, price, rent

In [25]:
def is_good_investment(model, pipeline, property_features, rent, price):
    df = pd.DataFrame([property_features])
    X = pipeline.transform(df)
    predicted = model.predict(X)[0]

    # Applying the 1% rule for rent-to-price ratio
    if predicted > price * 1.1 and rent > price * 0.01:
        return "Likely a good investment."
    elif predicted > price:
        return "Might be reasonable."
    return "May not be a good investment."

In [26]:
def main(path='property_rentals.csv'):
    data = pd.read_csv(path)
    model, pipeline, X_test, y_test = train_model(data)
    mse, r2 = evaluate(model, X_test, y_test)
    print(f"MSE: {mse:.2f}, R²: {r2:.2f}")

    features, price, rent = get_user_input()
    result = is_good_investment(model, pipeline, features, rent, price)
    print(result)

In [27]:
main()

MSE: 10684482600.10, R²: 0.46


Square Footage:  1500
Bedrooms:  3
Bathrooms:  2
24_Hour_Security:  1
Furnished:  1
Garden_Area:  0
Swimming_Pool:  0
Central_Location:  0
Gated_Community:  1
View_-_Ocean:  0
Waterfront_-_Ocean:  0
Asking price:  260000
Estimated rent:  250000


Likely a good investment.
