In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler
np.set_printoptions(threshold=np.inf)
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [2]:
dataset = pd.read_csv('propertiesUpdated4.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
categorical_data = X[:, [0, 1]]
encoder = OneHotEncoder()
onehot_encoded = encoder.fit_transform(categorical_data)
onehot_encoded_array = onehot_encoded.toarray()
preprocessed_X = np.concatenate([onehot_encoded_array, X[:, 2:]], axis=1)


In [4]:

print(y)

['6,850,000' '10,000,000' '5,700,000' '7,510,000' '8,511,300' '3,150,000'
 '3,500,000' '6,100,000' '2,278,506' '4,112,700' '5,500,000' '7,346,875'
 '4,200,000' '4,500,000' '16,000,000' '2,400,000' '3,200,000' '2,800,000'
 '9,200,000' '14,500,000' '1,286,000' '1,950,000' '10,130,000' '3,500,000'
 '3,440,000' '3,250,000' '3,500,000' '3,000,000' '3,300,000' '2,600,000'
 '1,000,000' '4,850,000' '2,400,000' '4,450,000' '3,328,000' '4,000,000'
 '14,250,000' '2,545,000' '2,375,000' '15,000,000' '2,900,000' '5,250,000'
 '5,750,000' '5,300,000' '3,120,000' '3,892,200' '4,434,000' '3,000,000'
 '4,000,000' '1,600,000' '6,600,000' '1,933,000' '1,600,000' '3,845,000'
 '4,150,000' '1,000,000' '3,150,000' '2,600,000' '1,950,000' '1,480,000'
 '14,900,000' '9,500,000' '3,417,000' '2,340,000' '5,600,000' '4,000,000'
 '3,700,000' '2,200,000' '5,268,000' '2,650,000' '2,700,000' '2,147,280'
 '7,200,000' '2,250,000' '4,500,000' '5,250,000' '6,500,000' '6,000,000'
 '13,133,316' '6,000,000' '7,600,000' '4,350

In [5]:
print(X)

[['Duplex' ' North Investors Area' 4 4 '345']
 ['Villa' ' 5th Settlement Compounds' 3 3 '285']
 ['Apartment' ' 5th Settlement Compounds' 3 3 '210']
 ['Townhouse' ' New Capital Compounds' 4 4 '230']
 ['Penthouse' ' 5th Settlement Compounds' 5 6 '284']
 ['Apartment' ' Ring Road' 3 2 '172']
 ['Villa' ' North Investors Area' 5 4 '250']
 ['Townhouse' ' 5th Settlement Compounds' 4 4 '220']
 ['Apartment' ' Ring Road' 2 2 '114']
 ['Apartment' ' 5th Settlement Compounds' 3 3 '204']
 ['Apartment' ' 5th Settlement Compounds' 4 3 '172']
 ['Villa' ' 5th Settlement Compounds' 3 3 '327']
 ['Villa' ' New Capital Compounds' 4 3 '330']
 ['Apartment' ' Al Rehab' 3 3 '235']
 ['Villa' ' 5th Settlement Compounds' 4 3 '777']
 ['Apartment' ' Ring Road' 1 2 '124']
 ['Apartment' ' 5th Settlement Compounds' 3 3 '185']
 ['Penthouse' ' 5th Settlement Compounds' 3 2 '165']
 ['Villa' ' South Investors Area' 5 4 '465']
 ['Villa' ' 5th Settlement Compounds' 4 6 '320']
 ['Apartment' ' Mostakbal City Compounds' 2 2 '125

In [6]:
num_onehot_columns = onehot_encoded_array.shape[1]
numerical_features = preprocessed_X[:, num_onehot_columns:]
onehot_encoded_features = preprocessed_X[:, :num_onehot_columns]
for col_idx in range(numerical_features.shape[1]):
    col_values = numerical_features[:, col_idx]
    for row_idx, value in enumerate(col_values):
        try:
            numerical_features[row_idx, col_idx] = float(value.replace(',', '')) if isinstance(value, str) else value
        except AttributeError:
            # If the value is not a string, just keep it as is
            pass

standard_scaler = StandardScaler()
standardized_numerical_features = standard_scaler.fit_transform(numerical_features)

scaled_X_standard = np.concatenate([onehot_encoded_features, standardized_numerical_features], axis=1)

X = scaled_X_standard

In [7]:
import numpy as np

def remove_commas_from_array(arr):
    """
    Removes commas from the elements in a NumPy array with nested lists.

    Parameters:
    arr (numpy.ndarray): The input NumPy array.

    Returns:
    numpy.ndarray: A new NumPy array with commas removed from the elements.
    """
    # Flatten the nested lists in the array
    

    # Remove commas from values and convert to float
    new_arr = np.array([float(''.join(str(value).split(','))) for value in arr])

    return new_arr

In [8]:
# Assuming 'Y' contains the prices you want to remove commas from
Y = np.array(y)

# Call the function to remove commas from 'Y'
new_Y = remove_commas_from_array(Y)
print(new_Y)

[6.85000000e+06 1.00000000e+07 5.70000000e+06 7.51000000e+06
 8.51130000e+06 3.15000000e+06 3.50000000e+06 6.10000000e+06
 2.27850600e+06 4.11270000e+06 5.50000000e+06 7.34687500e+06
 4.20000000e+06 4.50000000e+06 1.60000000e+07 2.40000000e+06
 3.20000000e+06 2.80000000e+06 9.20000000e+06 1.45000000e+07
 1.28600000e+06 1.95000000e+06 1.01300000e+07 3.50000000e+06
 3.44000000e+06 3.25000000e+06 3.50000000e+06 3.00000000e+06
 3.30000000e+06 2.60000000e+06 1.00000000e+06 4.85000000e+06
 2.40000000e+06 4.45000000e+06 3.32800000e+06 4.00000000e+06
 1.42500000e+07 2.54500000e+06 2.37500000e+06 1.50000000e+07
 2.90000000e+06 5.25000000e+06 5.75000000e+06 5.30000000e+06
 3.12000000e+06 3.89220000e+06 4.43400000e+06 3.00000000e+06
 4.00000000e+06 1.60000000e+06 6.60000000e+06 1.93300000e+06
 1.60000000e+06 3.84500000e+06 4.15000000e+06 1.00000000e+06
 3.15000000e+06 2.60000000e+06 1.95000000e+06 1.48000000e+06
 1.49000000e+07 9.50000000e+06 3.41700000e+06 2.34000000e+06
 5.60000000e+06 4.000000

In [9]:
Y = np.array(new_Y).reshape(-1, 1)

# Apply Standardization (Z-score scaling) to the prices in Y
price_scaler = StandardScaler()
scaled_Y = price_scaler.fit_transform(Y)

# Convert the scaled Y back to a 1D array if needed
scaled_Y = scaled_Y.flatten()
y = scaled_Y

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [11]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [12]:
y_pred = regressor.predict(X_test)


# Calculate evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R2) Score:", r2)

Mean Absolute Error (MAE): 0.3092028030917424
Mean Squared Error (MSE): 1.240769390007197
Root Mean Squared Error (RMSE): 1.113898285305798
R-squared (R2) Score: 0.3066547526851201


In [13]:
def predict_house_price(type_of_house, location, bedrooms, bathrooms, sqm, regressor, encoder, scaler):
    """
    Predict the price of a house using the trained regressor.

    Parameters:
    type_of_house (str): Type of house from the list of types.
    location (str): Location from the list of locations.
    bedrooms (int): Number of bedrooms.
    bathrooms (int): Number of bathrooms.
    sqm (float): Square meters of the house.
    regressor: Trained regression model (e.g., LinearRegression).
    encoder: OneHotEncoder used for encoding the type_of_house and location.
    scaler: StandardScaler used for scaling numerical features.

    Returns:
    float: Predicted price of the house.
    """

    # Create a list of features using the input values
    feature_values = [[type_of_house, location]]

    # Encode the categorical features (type_of_house and location)
    encoded_features = encoder.transform(feature_values)

    # Scale the numerical features (bedrooms, bathrooms, sqm)
    numerical_features = np.array([[bedrooms, bathrooms, sqm]])
    scaled_features = scaler.transform(numerical_features)

    # Combine the encoded and scaled features
    processed_features = np.hstack((encoded_features.toarray(), scaled_features))

    # Predict the price using the regressor
    predicted_price = regressor.predict(processed_features)

    return predicted_price[0]  # Return the first (and only) prediction as a single float

In [14]:
def unscale_price(scaled_price, scaler):
    """
    Unscales a scaled price using the provided scaler.

    Parameters:
    scaled_price (float): The scaled price value to be unscaled.
    scaler: The StandardScaler used for scaling the target variable.

    Returns:
    float: The unscaled price value.
    """
    unscaled_price = scaler.inverse_transform([[scaled_price]])
    return unscaled_price[0, 0]

In [15]:
# Assuming you have trained a regressor 'regressor', encoded the categorical features with 'encoder',
# and scaled the numerical features with 'scaler'

type_of_house = "Villa"  # Replace with the actual type from the list of types
location = " 5th Settlement Compounds"  # Replace with the actual location from the list of locations
bedrooms = 6
bathrooms = 4
sqm = 650.0

predicted_price = predict_house_price(type_of_house, location, bedrooms, bathrooms, sqm, regressor, encoder, standard_scaler)
print("Predicted Price:", unscale_price(predicted_price , price_scaler))

Predicted Price: 17519756.905076645


In [23]:
import pandas as pd

# Read the Excel file (replace 'your_file.xlsx' with the actual file path)
data = pd.read_csv('propertiesUpdated4.csv')

# Access the specific column (replace 'Column_Name' with the actual column name)
column_data = data['type']

# Get unique values as an array
unique_values = column_data.unique()

# Display the array
print(unique_values)
print(unique_values[0])


['Duplex' 'Villa' 'Apartment' 'Townhouse' 'Penthouse' 'iVilla'
 'Twin House' 'Hotel Apartment' 'Chalet' 'Compound']
Duplex


In [27]:
import tkinter as tk
from tkinter import ttk
import numpy as np
from sklearn.preprocessing import StandardScaler

# Define your trained regressor, encoder, and price scaler here
# regressor = ...
# encoder = ...
# standard_scaler = ...

def predict_house_price(type_of_house, location, bedrooms, bathrooms, sqm, regressor, encoder, scaler):
    """
    Predict the price of a house using the trained regressor.

    Parameters:
    type_of_house (str): Type of house from the list of types.
    location (str): Location from the list of locations.
    bedrooms (int): Number of bedrooms.
    bathrooms (int): Number of bathrooms.
    sqm (float): Square meters of the house.
    regressor: Trained regression model (e.g., LinearRegression).
    encoder: OneHotEncoder used for encoding the type_of_house and location.
    scaler: StandardScaler used for scaling numerical features.

    Returns:
    float: Predicted price of the house.
    """

    # Create a list of features using the input values
    feature_values = [[type_of_house, location]]

    # Encode the categorical features (type_of_house and location)
    encoded_features = encoder.transform(feature_values)

    # Scale the numerical features (bedrooms, bathrooms, sqm)
    numerical_features = np.array([[bedrooms, bathrooms, sqm]])
    scaled_features = scaler.transform(numerical_features)

    # Combine the encoded and scaled features
    processed_features = np.hstack((encoded_features.toarray(), scaled_features))

    # Predict the price using the regressor
    predicted_price = regressor.predict(processed_features)

    return predicted_price[0]  # Return the first (and only) prediction as a single float
def unscale_price(scaled_price, scaler):
    """
    Unscales a scaled price using the provided scaler.

    Parameters:
    scaled_price (float): The scaled price value to be unscaled.
    scaler: The StandardScaler used for scaling the target variable.

    Returns:
    float: The unscaled price value.
    """
    unscaled_price = scaler.inverse_transform([[scaled_price]])
    return unscaled_price[0, 0]
def on_predict_click():
    type_of_house = house_type_var.get()
    location = location_var.get()
    bedrooms = int(bedrooms_var.get())
    bathrooms = int(bathrooms_var.get())
    sqm = float(sqm_var.get())

    scaled_price = predict_house_price(type_of_house, location, bedrooms, bathrooms, sqm ,regressor, encoder, standard_scaler)
    unscaled_price = unscale_price(scaled_price, price_scaler)

    predicted_label.config(text=f"Predicted Price: {unscaled_price:.2f} $")

# Replace this with your actual list of types and locations
types_of_house = ['Duplex' ,'Villa' ,'Apartment', 'Townhouse', 'Penthouse' ,'iVilla',
 'Twin House', 'Hotel Apartment', 'Chalet' ,'Compound']
locations = array = [' North Investors Area', ' 5th Settlement Compounds', ' New Capital Compounds',
 ' Ring Road', ' Al Rehab', ' South Investors Area', ' Mostakbal City Compounds',
 ' El Shorouk Compounds', ' El Ismailia Square', ' El Katameya Compounds',
 ' Al Andalus District', ' El Banafseg', ' El Korba', ' Maadi', ' 90 Street',
 ' The 1st Settlement', ' El Patio', ' Downtown Area', ' Uptown Cairo',
 ' Al Narges', ' District 1', ' 6th District', ' Cairo', ' El Banafseg 9',
 ' Hay El Maadi', ' The 5th Settlement', ' Ext North Inves Area',
 ' New Capital City', ' New Fustat', ' 3rd District West', ' Ard El Golf',
 ' Shorouk City', ' North Teseen St.', ' South Teseen St.', ' 1st Area',
 ' Area C', ' Degla', ' The 3rd Settlement', ' Rehab City Third Phase',
 ' 5th District', ' Zamalek', ' Manteqet Al Cinema', ' Al Hadaba Al Wosta',
 ' Mokattam', ' New Cairo City', ' 6th Zone', ' Mostakbal City - Future City',
 ' Almazah', ' Garden City', ' 9th District', ' 2nd Area', ' 1st Neighborhood',
 ' El Yasmeen', ' Zahraa El Maadi', ' Sheraton Al Matar',
 ' Rehab City Second Phase', ' 3rd Area', ' Al Hadaba Al Olya',
 ' El Tawfik City', ' El Banafseg 11', ' New Maadi Extension', ' 8th Zone',
 ' Al Hadiqah Al Dawliyah', ' Al Narges 7', ' 9th Area', ' Al Waha City',
 ' El Yasmeen 3', ' Ganoob El Acadimia', ' Rehab City First Phase',
 ' El Hegaz Square', ' New Maadi', ' District 2', ' Group 11', ' El Yasmeen 4',
 ' El Banafseg 1', ' El Lotus', ' District 5', ' Al Narges 1', ' Al Narges 4',
 ' Al Narges 8', ' Nasr City Compounds', ' El Yasmeen 7',
 ' Cairo - Ismailia Desert Road', ' Al Gamea Square', ' El Nozha', ' Roxy',
 ' Rehab City Fifth Phase', ' District 4', ' El Banafseg 5', ' El Manial',
 ' Bulaq Abo El Ela', ' Area A', ' New Heliopolis', ' Al Narges 5',
 ' Al Narges 6', ' 8th Area', ' El Nozha El Gadida', ' Al Sefarat District',
 ' Rehab City Sixth Phase', ' El Banafseg 10', ' 10th District',
 ' El Yasmeen 5', ' El Banafseg 8', ' 7th District', ' Al Nadi Al Ahly',
 ' El Mearag City', ' Hadayek El Maadi', ' El Mahkama Square',
 ' Al Wafa Wa Al Amal', ' Abou El Houl', ' 1st Zone', ' Area F',
 ' Rehab City Forth Phase', ' Hadayek El Zaytoun', ' 10th Zone',
 ' Mohamed Naguib Axis', ' Berket an Nasr', ' District 3', ' 3rd District',
 ' El Banafseg 6', ' 3rd District East', ' Heliopolis Square', ' El Laselky',
 ' Neighborhood K', ' 2nd Neighborhood', ' 4th Neighborhood',
 ' 2nd District East', ' 1st District East', ' Hadayek El Kobba',
 ' Heliopolis - Masr El Gedida', ' 9th Zone', ' Nasr City', ' Area E',
 ' 1st District West', ' Hay El Manial', ' El Banafseg 7',
 ' Zahraa Madinat Nasr', ' El Estad', ' Badr City', ' Mansheya El Bakry',
 ' Al Tagned Square', ' Shobra', ' 4th District West', ' Triumph Square',
 ' Abbasia', ' Masaken Al Mohandesin', ' 2nd District West', ' 5th Area',
 ' Rabaa Al Adaweyah', ' Abdeen']



# Create the main application window
app = tk.Tk()
app.title("House Price Predictor")

# Create and set up the input widgets
house_type_var = tk.StringVar(value=types_of_house[0])
location_var = tk.StringVar(value=locations[0])
bedrooms_var = tk.StringVar(value="3")
bathrooms_var = tk.StringVar(value="3")
sqm_var = tk.StringVar(value="175.0")

house_type_label = ttk.Label(app, text="Type of House:")
house_type_combobox = ttk.Combobox(app, textvariable=house_type_var, values=types_of_house)

location_label = ttk.Label(app, text="Location:")
location_combobox = ttk.Combobox(app, textvariable=location_var, values=locations)

bedrooms_label = ttk.Label(app, text="Number of Bedrooms:")
bedrooms_entry = ttk.Entry(app, textvariable=bedrooms_var)

bathrooms_label = ttk.Label(app, text="Number of Bathrooms:")
bathrooms_entry = ttk.Entry(app, textvariable=bathrooms_var)

sqm_label = ttk.Label(app, text="Square Meters:")
sqm_entry = ttk.Entry(app, textvariable=sqm_var)

predict_button = ttk.Button(app, text="Predict Price", command=on_predict_click)
predicted_label = ttk.Label(app, text="Predicted Price: ")

# Layout the widgets using the grid geometry manager
house_type_label.grid(row=0, column=0, padx=5, pady=5)
house_type_combobox.grid(row=0, column=1, padx=5, pady=5)

location_label.grid(row=1, column=0, padx=5, pady=5)
location_combobox.grid(row=1, column=1, padx=5, pady=5)

bedrooms_label.grid(row=2, column=0, padx=5, pady=5)
bedrooms_entry.grid(row=2, column=1, padx=5, pady=5)

bathrooms_label.grid(row=3, column=0, padx=5, pady=5)
bathrooms_entry.grid(row=3, column=1, padx=5, pady=5)

sqm_label.grid(row=4, column=0, padx=5, pady=5)
sqm_entry.grid(row=4, column=1, padx=5, pady=5)

predict_button.grid(row=5, column=0, columnspan=2, padx=5, pady=5)
predicted_label.grid(row=6, column=0, columnspan=2, padx=5, pady=5)

# Start the main event loop
app.mainloop()
