In [2]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder, StandardScaler
from fuzzywuzzy import process  

In [3]:

property_data = pd.read_json('properties.json')

In [4]:

def clean_rooms_info(rooms):
    if pd.isna(rooms) or not isinstance(rooms, str):
        return [np.nan, np.nan, np.nan, np.nan]
    
    
    rooms = rooms.replace(' Save', '').strip()
    
    
    bedrooms = bathrooms = toilets = parking_spaces = np.nan

    if 'Bedrooms' in rooms:
        bedrooms_match = re.search(r'(\d+)\s*Bedrooms?', rooms)
        if bedrooms_match:
            bedrooms = int(bedrooms_match.group(1))
    
    if 'Bathrooms' in rooms:
        bathrooms_match = re.search(r'(\d+)\s*Bathrooms?', rooms)
        if bathrooms_match:
            bathrooms = int(bathrooms_match.group(1))

    if 'Toilets' in rooms:
        toilets_match = re.search(r'(\d+)\s*Toilets?', rooms)
        if toilets_match:
            toilets = int(toilets_match.group(1))

    if 'Parking Spaces' in rooms:
        parking_spaces_match = re.search(r'(\d+)\s*Parking Spaces?', rooms)
        if parking_spaces_match:
            parking_spaces = int(parking_spaces_match.group(1))
    
    return [bedrooms, bathrooms, toilets, parking_spaces]


property_data[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']] = property_data['rooms'].apply(
    lambda x: pd.Series(clean_rooms_info(x))
)

In [5]:

property_data['Bedrooms'] = pd.to_numeric(property_data['Bedrooms'], errors='coerce')
property_data['Bathrooms'] = pd.to_numeric(property_data['Bathrooms'], errors='coerce')
property_data['Toilets'] = pd.to_numeric(property_data['Toilets'], errors='coerce')
property_data['Parking Spaces'] = pd.to_numeric(property_data['Parking Spaces'], errors='coerce')


property_data['price'] = property_data['price'].replace('[\₦\$,]', '', regex=True).astype(float)


label_encoder = LabelEncoder()
property_data['location_encoded'] = label_encoder.fit_transform(property_data['location'])


property_data['Bedrooms'] = property_data['Bedrooms'].fillna(property_data['Bedrooms'].median())
property_data['Bathrooms'] = property_data['Bathrooms'].fillna(property_data['Bathrooms'].median())
property_data['Toilets'] = property_data['Toilets'].fillna(property_data['Toilets'].median())
property_data['Parking Spaces'] = property_data['Parking Spaces'].fillna(property_data['Parking Spaces'].median())


property_data.dropna(subset=['price'], inplace=True)


X = property_data[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces', 'location_encoded']]
y = property_data['price']


valid_index = X.dropna().index
X_clean = X.loc[valid_index].reset_index(drop=True)
y_clean = y.loc[valid_index].reset_index(drop=True)


In [6]:

scaler = StandardScaler()
X_clean_scaled = pd.DataFrame(scaler.fit_transform(X_clean[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']]), 
                              columns=['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces'])


X_clean_scaled['location_encoded'] = X_clean['location_encoded']


if X_clean_scaled.empty or y_clean.empty:
    print("\nError: The dataset is empty after preprocessing. Please check data integrity.")
else:
    
    X_train, X_test, y_train, y_test = train_test_split(X_clean_scaled, y_clean, test_size=0.2, random_state=42)

    
    price_model = RandomForestRegressor(n_estimators=100, random_state=42)
    price_model.fit(X_train, y_train)

    
    y_pred = price_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    print(f"Root Mean Squared Error for Price Prediction: {rmse}")

Root Mean Squared Error for Price Prediction: 12390017276.838114


In [7]:

    
def get_closest_location(input_location, location_list):
        best_match, score = process.extractOne(input_location, location_list)
        if score >= 80:  
            return best_match
        return None

    
nn_model = NearestNeighbors(n_neighbors=5, algorithm='auto')
nn_model.fit(X_clean_scaled)


In [8]:
 
def recommend_properties_with_fuzzy_matching(example_property, tolerance=0.1):
        example_encoded = example_property.copy()
        
        
        example_features = {
            'Bedrooms': example_encoded.get('Bedrooms', 0),
            'Bathrooms': example_encoded.get('Bathrooms', 0),
            'Toilets': example_encoded.get('Toilets', 0),
            'Parking Spaces': example_encoded.get('Parking Spaces', 0)
        }

        
        example_features_scaled = scaler.transform(pd.DataFrame([example_features]))
        example_features_scaled = pd.DataFrame(example_features_scaled, columns=['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces'])

        
        closest_location = get_closest_location(example_encoded['location'], property_data['location'].unique())
        if closest_location:
            location_encoded = label_encoder.transform([closest_location])[0]
            example_features_scaled['location_encoded'] = location_encoded
        else:
            print("Location not found in dataset. Recommending properties based on other features.")
            example_features_scaled['location_encoded'] = -1  

        
        if example_features_scaled['location_encoded'].iloc[0] == -1:
            X_search = X_clean_scaled.drop(columns=['location_encoded'])  
            example_features_scaled = example_features_scaled.drop(columns=['location_encoded'])
        else:
            X_search = X_clean_scaled  

        
        nn_model_adjusted = NearestNeighbors(n_neighbors=5, algorithm='auto')
        nn_model_adjusted.fit(X_search)

        
        example_features_array = example_features_scaled.to_numpy()

        
        distances, indices = nn_model_adjusted.kneighbors(example_features_array)
        valid_indices = [idx for idx in indices[0] if idx < len(property_data)]

        if not valid_indices:
            print("No valid recommendations found.")
            return pd.DataFrame()

        
        recommended_properties = property_data.iloc[valid_indices]
        return recommended_properties[['location', 'Bedrooms', 'Bathrooms', 'Toilets', 'price']]

In [9]:
 
example_property = {
        "location": "Ikeja",  
        "Bedrooms": 4,
        "Bathrooms": 4,
        "Toilets": 4,
        "Parking Spaces": 0
    }

    
recommended_properties = recommend_properties_with_fuzzy_matching(example_property)
if not recommended_properties.empty:
        print("\nRecommended Properties with Fuzzy Matching:")
        print(recommended_properties[['location', 'Bedrooms', 'Bathrooms', 'Toilets', 'price']])
else:
        print("No matching property found.")



Recommended Properties with Fuzzy Matching:
                location  Bedrooms  Bathrooms  Toilets        price
384   Ogba, Ikeja, Lagos       4.0        4.0      4.0  110000000.0
1411  Ogba, Ikeja, Lagos       4.0        4.0      4.0  160000000.0
3115  Ogba, Ikeja, Lagos       4.0        4.0      4.0  285000000.0
162   Ogba, Ikeja, Lagos       4.0        4.0      5.0  180000000.0
434   Ogba, Ikeja, Lagos       4.0        4.0      5.0   90000000.0




In [21]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder, StandardScaler
from fuzzywuzzy import process
import joblib

# Load data
property_data = pd.read_json('properties.json')

# Function to clean room info
def clean_rooms_info(rooms):
    if pd.isna(rooms) or not isinstance(rooms, str):
        return [np.nan, np.nan, np.nan, np.nan]
    
    rooms = rooms.replace(' Save', '').strip()
    bedrooms = bathrooms = toilets = parking_spaces = np.nan

    bedrooms_match = re.search(r'(\d+)\s*Bedrooms?', rooms)
    bathrooms_match = re.search(r'(\d+)\s*Bathrooms?', rooms)
    toilets_match = re.search(r'(\d+)\s*Toilets?', rooms)
    parking_spaces_match = re.search(r'(\d+)\s*Parking Spaces?', rooms)

    if bedrooms_match:
        bedrooms = int(bedrooms_match.group(1))
    if bathrooms_match:
        bathrooms = int(bathrooms_match.group(1))
    if toilets_match:
        toilets = int(toilets_match.group(1))
    if parking_spaces_match:
        parking_spaces = int(parking_spaces_match.group(1))

    return [bedrooms, bathrooms, toilets, parking_spaces]

# Apply clean_rooms_info to extract room info
property_data[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']] = property_data['rooms'].apply(
    lambda x: pd.Series(clean_rooms_info(x))
)

# Clean and convert price column
property_data['price'] = property_data['price'].replace('[\₦\$,]', '', regex=True).astype(float)

# Encode location
label_encoder = LabelEncoder()
property_data['location_encoded'] = label_encoder.fit_transform(property_data['location'])

# Fill missing values in room columns
for column in ['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']:
    property_data[column] = property_data[column].fillna(property_data[column].median())

# Drop rows where price is missing
property_data.dropna(subset=['price'], inplace=True)

# Define features and target variable
X = property_data[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces', 'location_encoded']]
y = property_data['price']

# Handle missing values in X and y
valid_index = X.dropna().index
X_clean = X.loc[valid_index].reset_index(drop=True)
y_clean = y.loc[valid_index].reset_index(drop=True)

# Train-test split and scale features
X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']]), 
                              columns=['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces'])
X_test_scaled = pd.DataFrame(scaler.transform(X_test[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']]), 
                             columns=['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces'])
X_train_scaled['location_encoded'] = X_train['location_encoded'].values
X_test_scaled['location_encoded'] = X_test['location_encoded'].values

# Train the recommendation model
recommendation_model = RandomForestRegressor(n_estimators=100, random_state=42)
recommendation_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = recommendation_model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error for Price Prediction: {rmse}")

# Function to get user preferences based on favorites and viewed properties
def extract_user_preferences(user_data, property_data):
    # Extract properties that user favorited or viewed
    behavior_set = set(user_data['favorites'] + user_data['viewed_properties'])
    behavior_properties = property_data[property_data.index.isin(behavior_set)]
    
    # Calculate median values for numeric fields as user preferences
    user_profile = {
        'Bedrooms': behavior_properties['Bedrooms'].median(),
        'Bathrooms': behavior_properties['Bathrooms'].median(),
        'Toilets': behavior_properties['Toilets'].median(),
        'Parking Spaces': behavior_properties['Parking Spaces'].median(),
        'location': behavior_properties['location'].mode()[0]
    } if not behavior_properties.empty else {
        # If no data, use median values of the whole dataset
        'Bedrooms': property_data['Bedrooms'].median(),
        'Bathrooms': property_data['Bathrooms'].median(),
        'Toilets': property_data['Toilets'].median(),
        'Parking Spaces': property_data['Parking Spaces'].median(),
        'location': property_data['location'].mode()[0]
    }
    
    return user_profile

# Fuzzy matching and recommendations based on user preferences
def recommend_properties_based_on_user_behavior(user_data, property_data, tolerance=0.1, n_recommendations=5):
    user_preferences = extract_user_preferences(user_data, property_data)
    example_property = {
        "location": user_preferences['location'],
        "Bedrooms": user_preferences['Bedrooms'],
        "Bathrooms": user_preferences['Bathrooms'],
        "Toilets": user_preferences['Toilets'],
        "Parking Spaces": user_preferences['Parking Spaces']
    }
    
    # Match location using the closest match in existing locations
    closest_location = get_closest_location(user_preferences['location'], property_data['location'].unique())
    if closest_location:
        location_encoded = label_encoder.transform([closest_location])[0]
    else:
        print("Location not found in dataset. Recommending properties based on other features.")
        location_encoded = -1

    # Prepare feature data for nearest neighbor search
    example_features = pd.DataFrame([{
        "Bedrooms": example_property['Bedrooms'],
        "Bathrooms": example_property['Bathrooms'],
        "Toilets": example_property['Toilets'],
        "Parking Spaces": example_property['Parking Spaces'],
        "location_encoded": location_encoded
    }])

    # Scale the features
    example_features_scaled = pd.DataFrame(scaler.transform(example_features[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']]), 
                                           columns=['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces'])
    example_features_scaled['location_encoded'] = example_features['location_encoded']

    # Nearest Neighbors model for recommendations
    nn_model = NearestNeighbors(n_neighbors=n_recommendations, algorithm='auto').fit(X_clean)
    distances, indices = nn_model.kneighbors(example_features_scaled)
    
    valid_indices = [idx for idx in indices[0] if idx < len(property_data)]
    recommendations = property_data.iloc[valid_indices][['location', 'Bedrooms', 'Bathrooms', 'Toilets', 'price']]
    
    # If not enough recommendations, add random properties to reach n_recommendations
    if len(recommendations) < n_recommendations:
        remaining_count = n_recommendations - len(recommendations)
        random_properties = property_data.sample(remaining_count)
        recommendations = pd.concat([recommendations, random_properties])

    return recommendations

# Example user data
user_data = {
    "user_id": 1,
    "favorites": [384, 1411, 200],
    "viewed_properties": [162, 434, 300]
}

# Get personalized recommendations
personalized_recommendations = recommend_properties_based_on_user_behavior(user_data, property_data)

if not personalized_recommendations.empty:
    print("\nPersonalized Recommended Properties:")
    print(personalized_recommendations[['location', 'Bedrooms', 'Bathrooms', 'Toilets', 'price']])
else:
    print("No personalized recommendations found.")

# Save models and encoders
joblib.dump(recommendation_model, "recommendation_model.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(nn_model, "nearest_neighbors_model.pkl")

print("Model and supporting objects saved as .pkl files.")


Root Mean Squared Error for Price Prediction: 12569525215.946709

Personalized Recommended Properties:
                                     location  Bedrooms  Bathrooms  Toilets  \
5148                       Ogba, Ikeja, Lagos       1.0        1.0      2.0   
2901                       Ogba, Ikeja, Lagos       2.0        2.0      3.0   
7571                       Ogba, Ikeja, Lagos       3.0        3.0      3.0   
8099  Ogidan Bus Stop, Sangotedo, Ajah, Lagos       1.0        4.0      2.0   
8633           Ogidan, Sangotedo, Ajah, Lagos       2.0        2.0      3.0   

           price  
5148  65000000.0  
2901  70000000.0  
7571  75000000.0  
8099  25000000.0  
8633  55000000.0  
Model and supporting objects saved as .pkl files.


In [10]:
# Import libraries
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder, StandardScaler
from fuzzywuzzy import process
import joblib

# Load data
property_data = pd.read_json('properties.json')

# Function to clean room info
def clean_rooms_info(rooms):
    if pd.isna(rooms) or not isinstance(rooms, str):
        return [np.nan, np.nan, np.nan, np.nan]
    
    rooms = rooms.replace(' Save', '').strip()
    bedrooms = bathrooms = toilets = parking_spaces = np.nan

    bedrooms_match = re.search(r'(\d+)\s*Bedrooms?', rooms)
    bathrooms_match = re.search(r'(\d+)\s*Bathrooms?', rooms)
    toilets_match = re.search(r'(\d+)\s*Toilets?', rooms)
    parking_spaces_match = re.search(r'(\d+)\s*Parking Spaces?', rooms)

    if bedrooms_match:
        bedrooms = int(bedrooms_match.group(1))
    if bathrooms_match:
        bathrooms = int(bathrooms_match.group(1))
    if toilets_match:
        toilets = int(toilets_match.group(1))
    if parking_spaces_match:
        parking_spaces = int(parking_spaces_match.group(1))

    return [bedrooms, bathrooms, toilets, parking_spaces]

# Apply clean_rooms_info to extract room info
property_data[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']] = property_data['rooms'].apply(
    lambda x: pd.Series(clean_rooms_info(x))
)

# Clean and convert price column
property_data['price'] = property_data['price'].replace('[\₦\$,]', '', regex=True).astype(float)

# Encode location
label_encoder = LabelEncoder()
property_data['location_encoded'] = label_encoder.fit_transform(property_data['location'])

# Fill missing values in room columns
for column in ['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']:
    property_data[column] = property_data[column].fillna(property_data[column].median())

# Drop rows where price is missing
property_data.dropna(subset=['price'], inplace=True)

# Define features and target variable
X = property_data[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces', 'location_encoded']]
y = property_data['price']

# Handle missing values in X and y
valid_index = X.dropna().index
X_clean = X.loc[valid_index].reset_index(drop=True)
y_clean = y.loc[valid_index].reset_index(drop=True)

# Train-test split and scale features
X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']]), 
                              columns=['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces'])
X_test_scaled = pd.DataFrame(scaler.transform(X_test[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']]), 
                             columns=['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces'])
X_train_scaled['location_encoded'] = X_train['location_encoded'].values
X_test_scaled['location_encoded'] = X_test['location_encoded'].values

# Train the recommendation model
recommendation_model = RandomForestRegressor(n_estimators=100, random_state=42)
recommendation_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = recommendation_model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error for Price Prediction: {rmse}")

# Train the Nearest Neighbors model for recommendations
nn_model = NearestNeighbors(n_neighbors=5, algorithm='auto')
nn_model.fit(X_clean)

# Function to get user preferences based on favorites and viewed properties
def extract_user_preferences(user_data, property_data):
    # Extract properties that user favorited or viewed
    behavior_set = set(user_data['favorites'] + user_data['viewed_properties'])
    behavior_properties = property_data[property_data.index.isin(behavior_set)]
    
    # Calculate median values for numeric fields as user preferences
    user_profile = {
        'Bedrooms': behavior_properties['Bedrooms'].median(),
        'Bathrooms': behavior_properties['Bathrooms'].median(),
        'Toilets': behavior_properties['Toilets'].median(),
        'Parking Spaces': behavior_properties['Parking Spaces'].median(),
        'location': behavior_properties['location'].mode()[0]
    } if not behavior_properties.empty else {
        # If no data, use median values of the whole dataset
        'Bedrooms': property_data['Bedrooms'].median(),
        'Bathrooms': property_data['Bathrooms'].median(),
        'Toilets': property_data['Toilets'].median(),
        'Parking Spaces': property_data['Parking Spaces'].median(),
        'location': property_data['location'].mode()[0]
    }
    
    return user_profile

# Function to get the closest location match
def get_closest_location(user_location, available_locations):
    """
    Finds the closest location to the user preference using fuzzy matching.
    Args:
    - user_location (str): Location preference of the user.
    - available_locations (list): List of available locations in the dataset.
    
    Returns:
    - str: The closest matching location.
    """
    closest_match = process.extractOne(user_location, available_locations)
    if closest_match and closest_match[1] > 75:  # Minimum threshold for a match score
        return closest_match[0]
    return None

# Fuzzy matching and recommendations based on user preferences
def recommend_properties_based_on_user_behavior(user_data, property_data, tolerance=0.1, n_recommendations=5):
    user_preferences = extract_user_preferences(user_data, property_data)
    example_property = {
        "location": user_preferences['location'],
        "Bedrooms": user_preferences['Bedrooms'],
        "Bathrooms": user_preferences['Bathrooms'],
        "Toilets": user_preferences['Toilets'],
        "Parking Spaces": user_preferences['Parking Spaces']
    }
    
    # Match location using the closest match in existing locations
    closest_location = get_closest_location(user_preferences['location'], property_data['location'].unique())
    if closest_location:
        location_encoded = label_encoder.transform([closest_location])[0]
    else:
        print("Location not found in dataset. Recommending properties based on other features.")
        location_encoded = -1

    # Prepare feature data for recommendation model prediction
    example_features = pd.DataFrame([{
        "Bedrooms": example_property['Bedrooms'],
        "Bathrooms": example_property['Bathrooms'],
        "Toilets": example_property['Toilets'],
        "Parking Spaces": example_property['Parking Spaces'],
        "location_encoded": location_encoded
    }])

    # Scale the features for recommendation model
    example_features_scaled = pd.DataFrame(scaler.transform(example_features[['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces']]), 
                                           columns=['Bedrooms', 'Bathrooms', 'Toilets', 'Parking Spaces'])
    example_features_scaled['location_encoded'] = example_features['location_encoded']

    # Get model predictions
    property_data['predicted_price'] = recommendation_model.predict(X_clean)

    # Filter properties based on user's preference
    recommended_properties = property_data[(property_data['Bedrooms'] == example_property['Bedrooms']) &
                                           (property_data['Bathrooms'] == example_property['Bathrooms']) &
                                           (property_data['Toilets'] == example_property['Toilets'])]

    # Ensure unique locations in the recommendations
    recommended_properties = recommended_properties.drop_duplicates(subset=['location'])

    # Sort by predicted price and select top recommendations
    recommended_properties = recommended_properties.sort_values(by='predicted_price', ascending=True).head(n_recommendations)
    
    # If fewer than desired recommendations, add random unique-location properties
    if len(recommended_properties) < n_recommendations:
        remaining_count = n_recommendations - len(recommended_properties)
        additional_properties = property_data[~property_data['location'].isin(recommended_properties['location'])].sample(remaining_count)
        recommended_properties = pd.concat([recommended_properties, additional_properties])

    return recommended_properties[['location', 'Bedrooms', 'Bathrooms', 'Toilets', 'price', 'predicted_price']].head(n_recommendations)


# Example user data
user_data = {
    "user_id": 1,
    "favorites": [384, 1411, 200],
    "viewed_properties": [162, 434, 300]
}

# Get personalized recommendations
personalized_recommendations = recommend_properties_based_on_user_behavior(user_data, property_data)

if not personalized_recommendations.empty:
    print("\nPersonalized Recommended Properties:")
    print(personalized_recommendations[['location', 'Bedrooms', 'Bathrooms', 'Toilets', 'price']])
else:
    print("No personalized recommendations found.")

# Save models and encoders
joblib.dump(recommendation_model, "recommendation_model.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(nn_model, "nearest_neighbors_model.pkl")

print("Model and supporting objects saved as .pkl files.")


Root Mean Squared Error for Price Prediction: 12569525215.946709

Personalized Recommended Properties:
                                             location  Bedrooms  Bathrooms  \
7345  Akora Villa Estate, Adeniyi Jones, Ikeja, Lagos       5.0        5.0   
7591                     Ikate Elegushi, Lekki, Lagos       2.0        2.0   
2367                                     Ikoyi, Lagos       4.0        4.0   
6991                        Ogba, Ifako-Ijaiye, Lagos       4.0        4.0   
6728                      Orchid, Ikota, Lekki, Lagos       3.0        3.0   

      Toilets        price  
7345      6.0  300000000.0  
7591      3.0  120000000.0  
2367      5.0    1500000.0  
6991      5.0   60000000.0  
6728      4.0   67000000.0  
Model and supporting objects saved as .pkl files.
