In [12]:
import pandas as pd
import numpy as np
import re
import seaborn as sns

In [13]:
df = pd.read_csv('property_listing_data_in_Bangladesh.csv')

In [14]:
# 4. Clean Price & Area
def convert_price_to_number(price):
    """
    Converts a price string (potentially with mixed units like Lakh/Thousand/Crore)
    into a single float value.
    """
    # 1. Handle if input is already a number (int or float)
    if isinstance(price, (int, float)):
        return float(price)
    
    # 2. Handle non-string inputs (e.g., NaN or None)
    if not isinstance(price, str):
        return 0.0
        
    # 3. Normalize string: lowercase and remove commas
    price = price.lower().replace(',', '')
    
    # 4. Define multipliers (Added 'Crore' as it is common in Bangladesh)
    multipliers = {
        'crore': 10000000,
        'lakh': 100000,
        'million': 1000000,
        'thousand': 1000,
        'k': 1000
    }
    
    # 5. Regex Pattern:
    # (\d+(?:\.\d+)?) -> Matches integers or decimals (Group 1)
    # \s*             -> Matches optional whitespace
    # (crore|lakh|...)? -> Matches optional unit keywords (Group 2)
    pattern = re.compile(r'(\d+(?:\.\d+)?)\s*(crore|lakh|million|thousand|k)?')
    
    total = 0.0
    matches = pattern.findall(price)
    
    for num_str, unit in matches:
        value = float(num_str)
        if unit in multipliers:
            total += value * multipliers[unit]
        else:
            # If no unit is specified, assume it is in the base currency (e.g. Taka)
            total += value
            
    return total

df['price'] = df['price'].apply(convert_price_to_number)

In [15]:
print("\n--- Top 10 Most Expensive Properties ---")
top_expensive = df.nlargest(10, 'price')
print(top_expensive[['price', 'address', 'area']])


--- Top 10 Most Expensive Properties ---
          price                             address         area
6936  2000000.0           Block K, Baridhara, Dhaka  31,500 sqft
5615  1660000.0           Block K, Baridhara, Dhaka  19,360 sqft
372   1000000.0  Road No 2, Sector 6, Uttara, Dhaka  16,100 sqft
861   1000000.0                    Baridhara, Dhaka   8,000 sqft
6925  1000000.0           Gulshan 2, Gulshan, Dhaka   5,200 sqft
7118  1000000.0           Block K, Baridhara, Dhaka  15,000 sqft
7548  1000000.0           Block K, Baridhara, Dhaka  12,000 sqft
3481   950000.0           Block K, Baridhara, Dhaka   6,500 sqft
7067   924000.0                    Dhanmondi, Dhaka   1,700 sqft
7381   800000.0           Block K, Baridhara, Dhaka  12,000 sqft


In [16]:
# Let's check the Bottom 10 cheapest properties
print("\n--- Bottom 10 Cheapest Properties ---")
bottom_cheap = df.nsmallest(10, 'price')
print(bottom_cheap[['price', 'address', 'area']])


--- Bottom 10 Cheapest Properties ---
       price                                            address      area
605   5500.0  Aman Bazar, 1 No. South Pahartali Ward, Hathaz...  800 sqft
491   6000.0  Fakirhat, Bandar, 36 Goshail Danga Ward, Chatt...  650 sqft
4790  6200.0  Shantibag Residential Area, 24 No. North Agrab...  400 sqft
2930  6500.0                    4 No Chandgaon Ward, Chattogram  950 sqft
4772  6500.0  Ramna Residential Area, Rongipara, 24 No. Nort...  680 sqft
6685  6500.0                New Mooring, Halishahar, Chattogram  520 sqft
4702  7000.0                      Bamoil, Sarulia, Demra, Dhaka  600 sqft
4787  7000.0  24 No. North Agrabad Ward, Double Mooring, Cha...  650 sqft
6933  7000.0                New Mooring, Halishahar, Chattogram  560 sqft
146   7300.0                      Faydabad, Dakshin Khan, Dhaka  450 sqft


In [17]:
print(df['price'].describe())

count    7.557000e+03
mean     3.407903e+04
std      6.471161e+04
min      5.500000e+03
25%      1.500000e+04
50%      1.900000e+04
75%      3.000000e+04
max      2.000000e+06
Name: price, dtype: float64
