In [127]:
# ======================================
# Required Libraries
# ======================================

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import widgets, interact
import re
from datetime import datetime

# ======================================
# Step 1: Define Helper Functions
# ======================================

def safe_divide(row):
    """
    Safely divide 'Asking Price' by 'MyHome_Floor_Area_Value'.
    Returns None if either value is NaN or if division by zero occurs.
    
    Args:
        row (pd.Series): A row from the DataFrame.
    
    Returns:
        float or None: The price per square meter or None.
    """
    if pd.isna(row['Asking Price']) or pd.isna(row['MyHome_Floor_Area_Value']) or row['MyHome_Floor_Area_Value'] == 0:
        return None
    else:
        return row['Asking Price'] / row['MyHome_Floor_Area_Value']

def get_price_details(price_changes: str):
    """
    Extracts the asking price at sale, sold price, sold date, and first list date from the Price Changes column.

    Args:
        price_changes (str): The value from the Price Changes column.

    Returns:
        tuple: A tuple containing:
            - Asking Price at Sale (float or None)
            - Sold Price (float or None)
            - Sold Date (str or None)
            - First List Date (str or None)
    """
    if not isinstance(price_changes, str):
        return (None, None, None, None)
    
    # Initialize variables
    sold_price = None
    sold_date = None
    asking_price_at_sale = None
    all_dates = []
    
    # Split the price changes by ';' to process each event separately
    entries = price_changes.split(';')
    
    for i, entry in enumerate(entries):
        entry = entry.strip()
        
        # Extract the date from the entry
        date_match = re.search(r"([A-Za-z]{3}\s+[A-Za-z]{3}\s+\d{1,2}\s+\d{4})", entry)
        if date_match:
            date_str = date_match.group(1)
            try:
                date_obj = datetime.strptime(date_str, "%a %b %d %Y")
                all_dates.append(date_obj)
            except ValueError:
                # Handle unexpected date formats
                pass
        
        # Match the 'Sold' event
        sold_match = re.match(
            r"Sold,\s*€([\d,]+),\s*([A-Za-z]{3}\s+[A-Za-z]{3}\s+\d{1,2}\s+\d{4})",
            entry,
            re.IGNORECASE
        )
        if sold_match:
            sold_price = float(sold_match.group(1).replace(',', ''))
            sold_date = sold_match.group(2)
            
            # Look for the next relevant event to find the asking price at sale
            for next_entry in entries[i+1:]:
                next_entry = next_entry.strip()
                asking_match = re.match(
                    r"(Sale Agreed|Price Drop|Created),\s*€([\d,]+),\s*[A-Za-z]{3}\s+[A-Za-z]{3}\s+\d{1,2}\s+\d{4}",
                    next_entry,
                    re.IGNORECASE
                )
                if asking_match:
                    asking_price_at_sale = float(asking_match.group(2).replace(',', ''))
                    break
            # Assuming only one 'Sold' event exists
            break
    
    # Determine the first list date (earliest date)
    if all_dates:
        first_list_date_obj = min(all_dates)
        first_list_date = first_list_date_obj.strftime("%a %b %d %Y")
    else:
        first_list_date = None
    
    return (asking_price_at_sale, sold_price, sold_date, first_list_date)

# ======================================
# Step 2: Load the Data
# ======================================

# Define the file path
file_path = '/Users/johnmcenroe/Documents/programming_misc/real_estate/data/processed/scraped_dublin/added_metadata/scraped_property_results_Dublin_final_with_metadata_deduped.csv'

# Read the CSV into a DataFrame
df = pd.read_csv(file_path)

# ======================================
# Step 3: Data Cleaning and Preprocessing
# ======================================

# 3.1 Clean the 'Asking Price' Column
# -----------------------------------
# Remove any non-numeric characters (e.g., currency symbols, commas)
df['Asking Price'] = df['Asking Price'].replace({'[^0-9.]': ''}, regex=True)

# 3.2 Convert Columns to Numeric Types
# ------------------------------------
# Convert 'Asking Price' to numeric, coercing errors to NaN
df['Asking Price'] = pd.to_numeric(df['Asking Price'], errors='coerce')

# Convert 'MyHome_Floor_Area_Value' to numeric, coercing errors to NaN
df['MyHome_Floor_Area_Value'] = pd.to_numeric(df['MyHome_Floor_Area_Value'], errors='coerce')

# 3.3 Calculate Price per Square Meter
# ------------------------------------
# Apply the safe_divide function to create 'price_per_square_meter'
df['price_per_square_meter'] = df.apply(safe_divide, axis=1)

# 3.4 Extract Numeric Values from 'Beds' and 'Baths'
# ---------------------------------------------------
# Convert 'Beds' to string, extract numeric part, and convert to float
df['Beds'] = df['Beds'].astype(str).str.extract('(\d+)').astype(float)

# Convert 'Baths' to string, extract numeric part, and convert to float
df['Baths'] = df['Baths'].astype(str).str.extract('(\d+)').astype(float)

# ======================================
# Step 4: Add Boolean Columns
# ======================================

# 4.1 Define a list of property types considered as houses
house_types = [
    'End of Terrace', 'Terrace', 'Semi-D', 'Detached',
    'Duplex', 'Bungalow', 'Townhouse', 'Houses'
]

# 4.2 Add 'is_house' column indicating if the property is a house type
df['is_house'] = df['Property Type'].isin(house_types)

# ======================================
# Step 5: Extract Sold Price Details
# ======================================

# Apply the get_price_details function to extract details from 'Price Changes' column
df[['Sold Asking Price', 'Sold Price', 'Sold Date', 'First List Date']] = df['Price Changes'].apply(get_price_details).apply(pd.Series)

# 5.1 Convert 'Sold Asking Price' and 'Sold Price' to numeric types
df['Sold Asking Price'] = pd.to_numeric(df['Sold Asking Price'], errors='coerce')
df['Sold Price'] = pd.to_numeric(df['Sold Price'], errors='coerce')

# 5.2 Convert 'First List Date' to datetime format for better handling
df['First List Date'] = pd.to_datetime(df['First List Date'], format="%a %b %d %Y", errors='coerce')

# ======================================
# Step 6: Add 'my_home_listing' Boolean Column
# ======================================

# 6.1 Add 'my_home_listing' column indicating if 'price_per_square_meter' is greater than 0
df['my_home_listing'] = df['price_per_square_meter'] > 0

# ======================================
# Step 7: Final DataFrame Preview
# ======================================

# Display the first five rows of the updated DataFrame
print("Final DataFrame Preview:")
df.head(5)

# ======================================
# Step 8: (Optional) Save the Processed Data
# ======================================

# If you wish to save the processed DataFrame to a new CSV file
# df.to_csv('/path/to/save/processed_data.csv', index=False)


Final DataFrame Preview:


Unnamed: 0,Address,Asking Price,Beds,Baths,Property Type,Energy Rating,Eircode,Local Property Tax,Agency Name,Agency Contact,Price Changes,URL,MyHome_Address,MyHome_Asking_Price,MyHome_Beds,MyHome_Baths,MyHome_Floor_Area_Value,MyHome_BER_Rating,MyHome_Latitude,MyHome_Longitude,MyHome_Link,price_per_square_meter,is_house,Sold Asking Price,Sold Price,Sold Date,First List Date,my_home_listing
0,"Taramar, Middle Third, Dublin 5, D05X8N9",750000,4.0,1.0,End of Terrace,E2,D05 X8N9,€765,Hamill Estate Agents & Valuers,Hamill Estate Agents & Valuers,"Sold, €950,000, Fri Sep 13 2024; Sale Agreed, ...",https://mynest.ie/listing/126051/Dublin/tarama...,"Taramar, Middle Third, Killester, Dublin 5",Sold,4 beds,,142.0,,53.373857,-6.203371,https://www.myhome.ie/residential/brochure/tar...,5281.690141,True,750000.0,950000.0,Fri Sep 13 2024,2024-09-13,True
1,"87 Haddington Road, Dublin 4, D04WP23",990000,5.0,3.0,Terrace,SI_666,D04 WP23,"€1,035",Turley Property Advisors,Susan Turley,"Sold, €1,010,000, Fri Sep 13 2024; Sale Agreed...",,"87 Haddington Road, Ballsbridge, Dublin 4",Sold,5 beds,3 baths,175.0,,53.335385,-6.239842,https://www.myhome.ie/residential/brochure/87-...,5657.142857,True,990000.0,1010000.0,Fri Sep 13 2024,2024-09-13,True
2,"Apartment 79, The Northumberlands, Love Lane E...",410000,2.0,1.0,Apartment,C1,D02 X068,€405,Owen Reilly,Owen Reilly Sales,"Sold, €480,000, Fri Sep 13 2024; Sale Agreed, ...",https://mynest.ie/listing/134218/Dublin/apartm...,"79 The Northumberlands, Love Lane East, Mount ...",Sold,2 beds,,64.0,,53.349805,-6.26031,https://www.myhome.ie/residential/brochure/79-...,6406.25,False,410000.0,480000.0,Fri Sep 13 2024,2024-09-13,True
3,"7 Parkside Heath, Clongriffin, Dublin 13, Dubl...",535000,3.0,3.0,Terrace,A3,D13 WN3C,€585,Sherry FitzGerald Sutton,Madeleine O'Connor,"Sold, €569,000, Fri Sep 13 2024; Unlisted, €53...",https://mynest.ie/listing/128619/Dublin/7-park...,"7 Parkside Heath, Balgriffin, Dublin 13",Sold,3 beds,3 baths,113.0,,53.407653,-6.163418,https://www.myhome.ie/residential/brochure/7-p...,4734.513274,True,535000.0,569000.0,Fri Sep 13 2024,2024-09-13,True
4,"5 Herbert Road, Blanchardstown, Blanchardstown...",400000,2.0,1.0,Bungalow,D2,D15 A3TN,€405,Lloyd Daly & Associates Ltd.,Lloyd Daly & Associates - Sales,"Sold, €432,200, Fri Sep 13 2024; Sale Agreed, ...",https://mynest.ie/listing/117502/Dublin/5-herb...,"5 Herbert Road, Blanchardstown, Dublin 15",Sold,2 beds,,116.0,,53.387605,-6.374562,https://www.myhome.ie/residential/brochure/5-h...,3448.275862,True,400000.0,432200.0,Fri Sep 13 2024,2024-09-13,True


In [128]:
df.head(10)

Unnamed: 0,Address,Asking Price,Beds,Baths,Property Type,Energy Rating,Eircode,Local Property Tax,Agency Name,Agency Contact,Price Changes,URL,MyHome_Address,MyHome_Asking_Price,MyHome_Beds,MyHome_Baths,MyHome_Floor_Area_Value,MyHome_BER_Rating,MyHome_Latitude,MyHome_Longitude,MyHome_Link,price_per_square_meter,is_house,Sold Asking Price,Sold Price,Sold Date,First List Date,my_home_listing
0,"Taramar, Middle Third, Dublin 5, D05X8N9",750000,4.0,1.0,End of Terrace,E2,D05 X8N9,€765,Hamill Estate Agents & Valuers,Hamill Estate Agents & Valuers,"Sold, €950,000, Fri Sep 13 2024; Sale Agreed, ...",https://mynest.ie/listing/126051/Dublin/tarama...,"Taramar, Middle Third, Killester, Dublin 5",Sold,4 beds,,142.0,,53.373857,-6.203371,https://www.myhome.ie/residential/brochure/tar...,5281.690141,True,750000.0,950000.0,Fri Sep 13 2024,2024-09-13,True
1,"87 Haddington Road, Dublin 4, D04WP23",990000,5.0,3.0,Terrace,SI_666,D04 WP23,"€1,035",Turley Property Advisors,Susan Turley,"Sold, €1,010,000, Fri Sep 13 2024; Sale Agreed...",,"87 Haddington Road, Ballsbridge, Dublin 4",Sold,5 beds,3 baths,175.0,,53.335385,-6.239842,https://www.myhome.ie/residential/brochure/87-...,5657.142857,True,990000.0,1010000.0,Fri Sep 13 2024,2024-09-13,True
2,"Apartment 79, The Northumberlands, Love Lane E...",410000,2.0,1.0,Apartment,C1,D02 X068,€405,Owen Reilly,Owen Reilly Sales,"Sold, €480,000, Fri Sep 13 2024; Sale Agreed, ...",https://mynest.ie/listing/134218/Dublin/apartm...,"79 The Northumberlands, Love Lane East, Mount ...",Sold,2 beds,,64.0,,53.349805,-6.26031,https://www.myhome.ie/residential/brochure/79-...,6406.25,False,410000.0,480000.0,Fri Sep 13 2024,2024-09-13,True
3,"7 Parkside Heath, Clongriffin, Dublin 13, Dubl...",535000,3.0,3.0,Terrace,A3,D13 WN3C,€585,Sherry FitzGerald Sutton,Madeleine O'Connor,"Sold, €569,000, Fri Sep 13 2024; Unlisted, €53...",https://mynest.ie/listing/128619/Dublin/7-park...,"7 Parkside Heath, Balgriffin, Dublin 13",Sold,3 beds,3 baths,113.0,,53.407653,-6.163418,https://www.myhome.ie/residential/brochure/7-p...,4734.513274,True,535000.0,569000.0,Fri Sep 13 2024,2024-09-13,True
4,"5 Herbert Road, Blanchardstown, Blanchardstown...",400000,2.0,1.0,Bungalow,D2,D15 A3TN,€405,Lloyd Daly & Associates Ltd.,Lloyd Daly & Associates - Sales,"Sold, €432,200, Fri Sep 13 2024; Sale Agreed, ...",https://mynest.ie/listing/117502/Dublin/5-herb...,"5 Herbert Road, Blanchardstown, Dublin 15",Sold,2 beds,,116.0,,53.387605,-6.374562,https://www.myhome.ie/residential/brochure/5-h...,3448.275862,True,400000.0,432200.0,Fri Sep 13 2024,2024-09-13,True
5,"Apartment 40, The Swift, Tassagard Greens, Sag...",290000,2.0,2.0,Apartment,B3,D24 KF58,€315,Smith & Butler Estates,Danny Butler,"Sold, €340,000, Fri Sep 13 2024; Sale Agreed, ...",https://mynest.ie/listing/106324/Dublin/apartm...,,,,,,,53.281393,-6.440669,https://www.myhome.ie/priceregister/40-the-swi...,,False,290000.0,340000.0,Fri Sep 13 2024,2024-09-13,False
6,"31 Tibradden Grove, Dublin 12, D12P2X4",355000,3.0,1.0,Terrace,D1,D12 P2X4,€405,Byrne and Moore Property Consultants Limited,Bryne & Moore Property,"Sold, €390,000, Fri Sep 13 2024; Sale Agreed, ...",,"14, Tibradden Grove, Greenpark, Walkinstown, D...",Sold,3 beds,,106.0,,53.308228,-6.34192,https://www.myhome.ie/residential/brochure/14-...,3349.056604,True,355000.0,390000.0,Fri Sep 13 2024,2024-09-13,True
7,"Apartment 207, The Edges 1, Beacon South Quart...",375000,2.0,2.0,Apartment,B3,D18 KX68,€405,Herbert & Lansdowne Estate Agents,Sales Department,"Sold, €387,000, Fri Sep 13 2024; Sale Agreed, ...",https://mynest.ie/listing/109892/Dublin/apartm...,"207 The Edges 1, Beacon South Quarter, Sandyfo...",Sold,2 beds,2 baths,64.0,,53.27797,-6.216868,https://www.myhome.ie/residential/brochure/207...,5859.375,False,375000.0,387000.0,Fri Sep 13 2024,2024-09-13,True
8,"80 Moatfield Road, Coolock, Coolock, Dublin 5,...",395000,3.0,1.0,Terrace,E1,D05 X9C0,€405,Hamill Estate Agents & Valuers,Hamill Estate Agents & Valuers,"Sold, €435,000, Thu Sep 12 2024; Sale Agreed, ...",https://mynest.ie/listing/116212/Dublin/80-moa...,"80 Moatfield Road, Coolock, Dublin 5",Sold,3 beds,,95.0,,53.386624,-6.192724,https://www.myhome.ie/residential/brochure/80-...,4157.894737,True,395000.0,435000.0,Thu Sep 12 2024,2024-09-12,True
9,"79 South Circular Road, Dublin 8, D08HR77",1350000,4.0,3.0,Terrace,SI_666,D08 HR77,"€1,846",Leonard Wilson Keenan Estates & Letting Agents,Patrick Leonard,"Sold, €1,490,000, Thu Sep 12 2024; Sale Agreed...",https://mynest.ie/listing/133579/Dublin/79-sou...,,,,3 baths,173.0,,53.335004,-6.293548,https://www.myhome.ie/residential/dublin-8/per...,7803.468208,True,1350000.0,1490000.0,Thu Sep 12 2024,2024-09-12,True


In [129]:
row_count = pd.read_csv(file_path).shape[0]
print(f"Number of rows: {row_count}")


Number of rows: 1821


In [130]:
df[df['price_per_square_meter'].isna()].head(5)

Unnamed: 0,Address,Asking Price,Beds,Baths,Property Type,Energy Rating,Eircode,Local Property Tax,Agency Name,Agency Contact,Price Changes,URL,MyHome_Address,MyHome_Asking_Price,MyHome_Beds,MyHome_Baths,MyHome_Floor_Area_Value,MyHome_BER_Rating,MyHome_Latitude,MyHome_Longitude,MyHome_Link,price_per_square_meter,is_house,Sold Asking Price,Sold Price,Sold Date,First List Date,my_home_listing
5,"Apartment 40, The Swift, Tassagard Greens, Sag...",290000,2.0,2.0,Apartment,B3,D24 KF58,€315,Smith & Butler Estates,Danny Butler,"Sold, €340,000, Fri Sep 13 2024; Sale Agreed, ...",https://mynest.ie/listing/106324/Dublin/apartm...,,,,,,,53.281393,-6.440669,https://www.myhome.ie/priceregister/40-the-swi...,,False,290000.0,340000.0,Fri Sep 13 2024,2024-09-13,False
18,"43 Hayestown, Rush, Co. Dublin",319950,2.0,1.0,Bungalow,D1,K56 Y206,€315,Leonard Wilson Keenan Estates & Letting Agents,Multi Award Winning Team Richard Todd,"Sold, €453,000, Thu Sep 12 2024; Unlisted, €31...",https://mynest.ie/listing/66930/Dublin/43-haye...,"43 Hayestown, Rush, Dublin",Sale Agreed,2 beds,,,,53.524124,-6.113056,https://www.myhome.ie/residential/brochure/43-...,,True,319950.0,453000.0,Thu Sep 12 2024,2024-09-12,False
19,"43 Hayestown, Rush, Co. Dublin, K56Y206",365000,2.0,1.0,Bungalow,D1,K56 Y206,€405,Leonard Wilson Keenan Estates & Letting Agents,Richard Todd,"Sold, €453,000, Thu Sep 12 2024; Unlisted, €36...",https://mynest.ie/listing/116836/Dublin/43-hay...,,,,,,,,,,,True,365000.0,453000.0,Thu Sep 12 2024,2024-09-12,False
25,"183 Charlemont, Griffith Avenue, Drumcondra, D...",525000,3.0,1.0,Terrace,C3,D09 E5W4,€495,KELLY BRADSHAW DALTON,Sharon Beckett,"Sold, €530,000, Thu Sep 12 2024; Sale Agreed, ...",https://mynest.ie/listing/131480/Dublin/183-ch...,"147 Charlemont, Griffith Avenue, Drumcondra, D...",Sale Agreed,4 beds,3 baths,,,53.376708,-6.233381,https://www.myhome.ie/residential/brochure/147...,,True,525000.0,530000.0,Thu Sep 12 2024,2024-09-12,False
31,"Apartment 6, 45 Saint Anthony's Road, Rialto, ...",350000,2.0,2.0,Apartment,SI_666,D08 XD73,€315,Keller Williams Ireland,Team Castles,"Sold, €390,000, Wed Sep 11 2024; Sale Agreed, ...",https://mynest.ie/listing/122564/Dublin/apartm...,"Apartment 6, 45 St Anthonys Road, Rialto, Dubl...",Sold,2 beds,2 baths,,,53.337832,-6.293655,https://www.myhome.ie/residential/brochure/apa...,,False,350000.0,390000.0,Wed Sep 11 2024,2024-09-11,False


In [131]:
# Get the statistical summary of the dataset
df_description = df.describe()

# Display the description
df_description

Unnamed: 0,Asking Price,Beds,Baths,MyHome_Floor_Area_Value,MyHome_BER_Rating,MyHome_Latitude,MyHome_Longitude,price_per_square_meter,Sold Asking Price,Sold Price,First List Date
count,1821.0,1814.0,1797.0,1080.0,0.0,1783.0,1783.0,1080.0,1819.0,1821.0,1821
mean,580046.0,2.880375,1.965498,105.739648,,53.306174,-6.172207,5354.982428,578834.9,622331.4,2024-08-11 22:39:20.461285120
min,150000.0,1.0,1.0,24.0,,-20.706854,-6.482501,994.318182,150000.0,80000.0,2024-03-28 00:00:00
25%,349950.0,2.0,1.0,71.0,,53.290903,-6.304371,4159.702692,349000.0,380000.0,2024-07-30 00:00:00
50%,449000.0,3.0,2.0,93.325,,53.338714,-6.247742,5111.455108,449000.0,485000.0,2024-08-14 00:00:00
75%,675000.0,3.0,3.0,125.0,,53.384024,-6.193442,6294.117647,675000.0,725000.0,2024-08-28 00:00:00
max,6750000.0,9.0,7.0,498.0,,53.617567,140.505325,22551.546392,6750000.0,7255000.0,2024-09-13 00:00:00
std,434697.0,1.070345,0.956708,53.140745,,1.755186,3.476683,1714.005612,433936.9,450159.4,


In [132]:
# Display column names as a list
def get_column_info(df):
    return pd.DataFrame({'Column Name': df.columns, 'Data Type': df.dtypes})

print(get_column_info(df))


                                     Column Name       Data Type
Address                                  Address          object
Asking Price                        Asking Price           int64
Beds                                        Beds         float64
Baths                                      Baths         float64
Property Type                      Property Type          object
Energy Rating                      Energy Rating          object
Eircode                                  Eircode          object
Local Property Tax            Local Property Tax          object
Agency Name                          Agency Name          object
Agency Contact                    Agency Contact          object
Price Changes                      Price Changes          object
URL                                          URL          object
MyHome_Address                    MyHome_Address          object
MyHome_Asking_Price          MyHome_Asking_Price          object
MyHome_Beds              

In [143]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import logging
from math import radians, sin, cos, sqrt, atan2

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the Haversine distance between two points on the Earth.
    """
    # Earth radius in kilometers
    R = 6371.0

    # Convert coordinates to radians
    lat1_rad, lon1_rad = radians(lat1), radians(lon1)
    lat2_rad, lon2_rad = radians(lat2), radians(lon2)

    # Differences
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad

    # Haversine formula
    a = sin(dlat / 2)**2 + cos(lat1_rad) * cos(lat2_rad) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return distance

def preprocess_dataframe(df):
    """
    Preprocess the dataframe by converting columns to appropriate data types and handling missing values.
    """
    try:
        # Define required columns with their expected data types
        required_columns = {
            'Sold Date': 'datetime',
            'Price Changes': 'numeric',
            'Local Property Tax': 'numeric',
            'MyHome_Asking_Price': 'numeric',
            'MyHome_Beds': 'numeric',
            'MyHome_Baths': 'numeric',
            'First List Date': 'datetime'
        }

        # Ensure column names are stripped of leading/trailing spaces
        df.columns = df.columns.str.strip()

        # Convert and handle each required column
        for col, dtype in required_columns.items():
            if col in df.columns:
                if dtype == 'datetime':
                    df[col] = pd.to_datetime(df[col], errors='coerce')
                elif dtype == 'numeric':
                    # Remove non-numeric characters before conversion
                    df[col] = pd.to_numeric(
                        df[col].astype(str).str.replace('[^\d.]', '', regex=True), errors='coerce'
                    )
            else:
                logging.warning(f"'{col}' column is missing. Filling with NaN or default values.")
                if dtype == 'datetime':
                    df[col] = pd.NaT
                elif dtype == 'numeric':
                    df[col] = np.nan

        # Handle 'First List Date' missing values
        if 'First List Date' in df.columns:
            missing_first_list_date = df['First List Date'].isna().sum()
            if missing_first_list_date > 0:
                if 'Sold Date' in df.columns:
                    df['First List Date'] = df['First List Date'].fillna(df['Sold Date'] - timedelta(days=30))
                    logging.info("Filled missing 'First List Date' with 'Sold Date' minus 30 days.")
                else:
                    df['First List Date'] = df['First List Date'].fillna(datetime.now() - timedelta(days=30))
                    logging.info("Filled missing 'First List Date' with current date minus 30 days.")

        # Handle other numeric columns
        numeric_columns = [
            'Asking Price', 'Beds', 'Baths', 'MyHome_Asking_Price',
            'MyHome_Beds', 'MyHome_Baths', 'MyHome_Floor_Area_Value',
            'MyHome_BER_Rating', 'Local Property Tax', 'Sold Asking Price',
            'Sold Price', 'price_per_square_meter'
        ]

        # Clean and convert numeric columns
        for col in numeric_columns:
            if col in df.columns:
                # Remove non-numeric characters except decimal points
                df[col] = pd.to_numeric(
                    df[col].astype(str).str.replace('[^\d.]', '', regex=True), errors='coerce'
                )
                # Fill missing values with median or a default value if median is NaN
                median_value = df[col].median()
                if pd.isna(median_value):
                    median_value = 0  # Default value if median is NaN
                df[col] = df[col].fillna(median_value)
                logging.info(f"Filled missing values in '{col}' with median: {median_value}")
            else:
                df[col] = np.nan
                logging.warning(f"'{col}' column is missing. Filling with NaN.")

        # Ensure 'Geo Distance (km)' is removed as it will be calculated per row
        if 'Geo Distance (km)' in df.columns:
            df.drop(columns=['Geo Distance (km)'], inplace=True)
            logging.info("Removed precomputed 'Geo Distance (km)' to calculate it per row.")

        # Initialize 'Dynamic Geo Distance (km)' as 0.0 (placeholder)
        df['Dynamic Geo Distance (km)'] = 0.0

        # Ensure 'Energy Rating' is clean and consistent
        if 'Energy Rating' in df.columns:
            df['Energy Rating'] = df['Energy Rating'].astype(str).str.upper().str.strip()
            # Replace any unexpected values with a default category, e.g., 'UNKNOWN'
            valid_energy_ratings = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
            df['Energy Rating'] = df['Energy Rating'].where(df['Energy Rating'].isin(valid_energy_ratings), 'UNKNOWN')
        else:
            df['Energy Rating'] = 'UNKNOWN'
            logging.warning("'Energy Rating' column is missing. Filled with 'UNKNOWN'.")

        # Handle 'Property Type' consistency
        if 'Property Type' in df.columns:
            df['Property Type'] = df['Property Type'].astype(str).str.title().str.strip()
        else:
            df['Property Type'] = 'Unknown'
            logging.warning("'Property Type' column is missing. Filled with 'Unknown'.")

        return df

    except Exception as e:
        logging.error(f"Error in preprocessing dataframe: {e}")
        raise

def calculate_metrics_for_row(df, row):
    """
    Calculate all metrics for a single row and return a dictionary of new columns.
    """
    new_columns = {}

    # Extract the current property's latitude and longitude
    current_lat = row.get('MyHome_Latitude', np.nan)
    current_lon = row.get('MyHome_Longitude', np.nan)

    if pd.isna(current_lat) or pd.isna(current_lon):
        logging.warning(f"Missing latitude or longitude for row index {row.name}. Skipping metrics calculation.")
        return new_columns  # Return empty metrics if location data is missing

    # Calculate distances from the current property to all other properties
    df = df.copy()  # To avoid SettingWithCopyWarning
    df['Dynamic Geo Distance (km)'] = df.apply(
        lambda r: haversine_distance(current_lat, current_lon, r['MyHome_Latitude'], r['MyHome_Longitude'])
        if pd.notna(r['MyHome_Latitude']) and pd.notna(r['MyHome_Longitude']) else np.nan,
        axis=1
    )

    # Define metric categories with distances and time frames
    transaction_volume_categories = {
        '3km_90days': {'distance': 3, 'days': 90},
        '5km_180days': {'distance': 5, 'days': 180}
    }

    transaction_value_categories = {
        '3km_90days': {'distance': 3, 'days': 90},
        '5km_180days': {'distance': 5, 'days': 180}
    }

    price_dynamics_categories = {
        '3km_90days': {'distance': 3, 'days': 90},
        '5km_180days': {'distance': 5, 'days': 180}
    }

    time_based_categories = {
        '3km': {'distance': 3},
        '5km': {'distance': 5}
    }

    property_condition_categories = {
        '3km': {'distance': 3},
        '5km': {'distance': 5}
    }

    house_size_benchmark_categories = {
        '3km_90days': {'distance': 3, 'days': 90},
        '5km_180days': {'distance': 5, 'days': 180}
    }

    property_type_distribution_categories = {
        '3km': {'distance': 3},
        '5km': {'distance': 5}
    }

    listing_activity_categories = {
        '3km': {'distance': 3},
        '5km': {'distance': 5}
    }

    sales_velocity_categories = {
        '3km': {'distance': 3, 'months': 6},
        '5km': {'distance': 5, 'months': 6}
    }

    pricing_consistency_categories = {
        '3km': {'distance': 3},
        '5km': {'distance': 5, 'days': 180}
    }

    # Transaction Volume Metrics
    for key, params in transaction_volume_categories.items():
        distance = params['distance']
        days = params['days']
        mask = (df['Dynamic Geo Distance (km)'] <= distance) & (df['Sold Date'] >= (row['Sold Date'] - timedelta(days=days)))
        filtered = df[mask]
        num_sold = filtered.shape[0]
        new_columns[f'transaction_volume_num_sold_within_{distance}km_{days}days'] = num_sold

        if days >= 30:
            months = days / 30
            avg_monthly = num_sold / months if months > 0 else np.nan
            new_columns[f'transaction_volume_avg_monthly_transactions_within_{distance}km'] = avg_monthly
        else:
            new_columns[f'transaction_volume_avg_monthly_transactions_within_{distance}km'] = np.nan

        if key == '3km_90days':
            total_listings = df[
                (df['Dynamic Geo Distance (km)'] <= distance) &
                (df['First List Date'] >= (row['Sold Date'] - timedelta(days=days)))
            ].shape[0]
            new_columns[f'transaction_volume_total_listings_last_{days}days_within_{distance}km'] = total_listings

    # Transaction Value Metrics
    for key, params in transaction_value_categories.items():
        distance = params['distance']
        days = params['days']
        mask = (df['Dynamic Geo Distance (km)'] <= distance) & (df['Sold Date'] >= (row['Sold Date'] - timedelta(days=days)))
        filtered = df[mask]

        if 'Sold Price' in df.columns and not filtered.empty:
            median_sold_price = filtered['Sold Price'].median()
            new_columns[f'transaction_value_median_sold_price_within_{distance}km_{days}days'] = median_sold_price

            percentile_75_sold_price = filtered['Sold Price'].quantile(0.75)
            new_columns[f'transaction_value_p75_sold_price_within_{distance}km_{days}days'] = percentile_75_sold_price
        else:
            new_columns[f'transaction_value_median_sold_price_within_{distance}km_{days}days'] = np.nan
            new_columns[f'transaction_value_p75_sold_price_within_{distance}km_{days}days'] = np.nan

        if 'price_per_square_meter' in df.columns and not filtered.empty:
            avg_price_sqm = filtered['price_per_square_meter'].mean()
            new_columns[f'transaction_value_avg_price_per_sqm_within_{distance}km'] = avg_price_sqm
        else:
            new_columns[f'transaction_value_avg_price_per_sqm_within_{distance}km'] = np.nan

        if key == '5km_180days' and 'Sold Price' in df.columns and not filtered.empty:
            avg_sold_price = filtered['Sold Price'].mean()
            new_columns[f'transaction_value_avg_sold_price_within_{distance}km_{days}days'] = avg_sold_price

        if key == '3km_90days' and 'Asking Price' in df.columns and not filtered.empty:
            median_asking_price = filtered['Asking Price'].median()
            new_columns[f'transaction_value_median_asking_price_within_{distance}km_{days}days'] = median_asking_price

    # Price Dynamics Metrics
    for key, params in price_dynamics_categories.items():
        distance = params['distance']
        days = params['days']
        mask = (df['Dynamic Geo Distance (km)'] <= distance) & (df['Sold Date'] >= (row['Sold Date'] - timedelta(days=days)))
        filtered = df[mask].copy()

        if 'Sold Price' in filtered.columns and 'Asking Price' in filtered.columns and not filtered.empty:
            filtered['Price_Difference'] = filtered['Sold Price'] - filtered['Asking Price']
            avg_price_diff = filtered['Price_Difference'].mean()
            new_columns[f'price_dynamics_avg_price_diff_within_{distance}km_{days}days'] = avg_price_diff

            filtered_non_zero = filtered[filtered['Asking Price'] != 0]
            if not filtered_non_zero.empty:
                filtered_non_zero['Price_Change_Pct'] = (filtered_non_zero['Price_Difference'] / filtered_non_zero['Asking Price']) * 100
                median_price_change_pct = filtered_non_zero['Price_Change_Pct'].median()
                new_columns[f'price_dynamics_median_price_change_pct_within_{distance}km_{days}days'] = median_price_change_pct

                percent_above = (filtered_non_zero['Sold Price'] > filtered_non_zero['Asking Price']).mean() * 100
                new_columns[f'price_dynamics_percent_sold_above_asking_within_{distance}km_{days}days'] = percent_above
            else:
                new_columns[f'price_dynamics_median_price_change_pct_within_{distance}km_{days}days'] = np.nan
                new_columns[f'price_dynamics_percent_sold_above_asking_within_{distance}km_{days}days'] = np.nan
        else:
            new_columns[f'price_dynamics_avg_price_diff_within_{distance}km_{days}days'] = np.nan
            new_columns[f'price_dynamics_median_price_change_pct_within_{distance}km_{days}days'] = np.nan
            new_columns[f'price_dynamics_percent_sold_above_asking_within_{distance}km_{days}days'] = np.nan

    # Time-Based Metrics
    for key, params in time_based_categories.items():
        distance = params['distance']
        mask = (df['Dynamic Geo Distance (km)'] <= distance)
        filtered = df[mask].copy()

        if 'Sold Date' in filtered.columns and 'First List Date' in filtered.columns:
            # Calculate Days_on_Market safely
            filtered['Days_on_Market'] = (filtered['Sold Date'] - filtered['First List Date']).dt.days
            avg_days = filtered['Days_on_Market'].mean() if not filtered['Days_on_Market'].empty else np.nan
            median_days = filtered['Days_on_Market'].median() if not filtered['Days_on_Market'].empty else np.nan
            new_columns[f'time_based_avg_days_on_market_within_{distance}km'] = avg_days
            new_columns[f'time_based_median_days_on_market_within_{distance}km'] = median_days
        else:
            new_columns[f'time_based_avg_days_on_market_within_{distance}km'] = np.nan
            new_columns[f'time_based_median_days_on_market_within_{distance}km'] = np.nan

        if key == '5km':
            if 'Days_on_Market' in filtered.columns and not filtered['Days_on_Market'].empty:
                avg_time_to_sell = filtered['Days_on_Market'].mean()
                new_columns['time_based_avg_time_to_sell_within_5km'] = avg_time_to_sell

                mask_days = (df['Sold Date'] >= (row['Sold Date'] - timedelta(days=180)))
                filtered_days = df[mask & mask_days]
                if 'Days_on_Market' in filtered_days.columns and not filtered_days['Days_on_Market'].empty:
                    median_time_on_market = filtered_days['Days_on_Market'].median()
                    new_columns['time_based_median_time_on_market_last_180days_within_5km'] = median_time_on_market
                else:
                    new_columns['time_based_median_time_on_market_last_180days_within_5km'] = np.nan
            else:
                new_columns['time_based_avg_time_to_sell_within_5km'] = np.nan
                new_columns['time_based_median_time_on_market_last_180days_within_5km'] = np.nan

        if key == '3km':
            mask_listings = (df['Dynamic Geo Distance (km)'] <= distance) & \
                            (df['First List Date'] >= (row['Sold Date'] - timedelta(days=90)))
            total_listings = df[mask_listings].shape[0]
            new_columns['time_based_total_listings_last_90days_within_3km'] = total_listings

    # Property Condition Metrics
    # Energy Rating Distribution within 3 km
    distance = 3
    if 'Dynamic Geo Distance (km)' in df.columns and 'Energy Rating' in df.columns:
        mask = (df['Dynamic Geo Distance (km)'] <= distance)
        filtered = df[mask]
        if not filtered.empty:
            energy_distribution = filtered['Energy Rating'].value_counts(normalize=True) * 100
            for rating, percent in energy_distribution.items():
                new_columns[f'property_condition_energy_rating_dist_{rating}_within_{distance}km'] = percent
        else:
            logging.warning("No data available for energy rating distribution within 3 km.")
    else:
        logging.warning("Required columns for energy rating distribution are missing.")

    # Percentage of BER Rating A within 5 km
    distance = 5
    if 'Dynamic Geo Distance (km)' in df.columns and 'MyHome_BER_Rating' in df.columns:
        mask = (df['Dynamic Geo Distance (km)'] <= distance)
        filtered = df[mask]
        if not filtered.empty:
            ber_a_percent = (filtered['MyHome_BER_Rating'] == 7).mean() * 100
            new_columns[f'property_condition_percent_ber_A_within_{distance}km'] = ber_a_percent
        else:
            new_columns[f'property_condition_percent_ber_A_within_{distance}km'] = np.nan
    else:
        logging.warning("Required columns for BER rating percentage are missing.")

    # Average BER Rating within 3 km
    distance = 3
    if 'Dynamic Geo Distance (km)' in df.columns and 'MyHome_BER_Rating' in df.columns:
        mask = (df['Dynamic Geo Distance (km)'] <= distance)
        filtered = df[mask]
        if not filtered.empty:
            avg_ber = filtered['MyHome_BER_Rating'].mean()
            new_columns[f'property_condition_avg_ber_within_{distance}km'] = avg_ber
        else:
            new_columns[f'property_condition_avg_ber_within_{distance}km'] = np.nan
    else:
        logging.warning("Required columns for average BER rating are missing.")

    # Percentage of Energy Efficient Homes within 3 km (BER Rating 5-7)
    distance = 3
    if 'Dynamic Geo Distance (km)' in df.columns and 'MyHome_BER_Rating' in df.columns:
        mask = (df['Dynamic Geo Distance (km)'] <= distance)
        filtered = df[mask]
        if not filtered.empty:
            energy_efficient_percent = ((filtered['MyHome_BER_Rating'] >= 5) & (filtered['MyHome_BER_Rating'] <= 7)).mean() * 100
            new_columns[f'property_condition_percent_energy_efficient_within_{distance}km'] = energy_efficient_percent
        else:
            new_columns[f'property_condition_percent_energy_efficient_within_{distance}km'] = np.nan
    else:
        logging.warning("Required columns for energy efficiency percentage are missing.")

    # Median BER Rating within 5 km in Last 180 Days
    distance = 5
    days = 180
    if 'Dynamic Geo Distance (km)' in df.columns and 'Sold Date' in df.columns and 'MyHome_BER_Rating' in df.columns:
        mask = (df['Dynamic Geo Distance (km)'] <= distance) & \
               (df['Sold Date'] >= (row['Sold Date'] - timedelta(days=days)))
        filtered = df[mask]
        if not filtered.empty:
            median_ber = filtered['MyHome_BER_Rating'].median()
            new_columns[f'property_condition_median_ber_within_{distance}km_{days}days'] = median_ber
        else:
            new_columns[f'property_condition_median_ber_within_{distance}km_{days}days'] = np.nan
    else:
        logging.warning("Required columns for median BER rating are missing.")

    # House Size Benchmark Metrics
    for key, params in house_size_benchmark_categories.items():
        distance = params['distance']
        days = params['days']
        mask = (df['Dynamic Geo Distance (km)'] <= distance) & (df['Sold Date'] >= (row['Sold Date'] - timedelta(days=days)))
        filtered = df[mask]

        if 'MyHome_Floor_Area_Value' in filtered.columns and not filtered['MyHome_Floor_Area_Value'].empty:
            avg_floor_area = filtered['MyHome_Floor_Area_Value'].mean()
            new_columns[f'house_size_benchmark_avg_floor_area_within_{distance}km_{days}days'] = avg_floor_area
        else:
            avg_floor_area = np.nan
            new_columns[f'house_size_benchmark_avg_floor_area_within_{distance}km_{days}days'] = np.nan

        if 'MyHome_Beds' in filtered.columns and not filtered['MyHome_Beds'].empty:
            median_beds = filtered['MyHome_Beds'].median()
            new_columns[f'house_size_benchmark_median_beds_within_{distance}km_{days}days'] = median_beds
        else:
            median_beds = np.nan
            new_columns[f'house_size_benchmark_median_beds_within_{distance}km_{days}days'] = np.nan

        if 'MyHome_Baths' in filtered.columns and not filtered['MyHome_Baths'].empty:
            median_baths = filtered['MyHome_Baths'].median()
            new_columns[f'house_size_benchmark_median_baths_within_{distance}km_{days}days'] = median_baths
        else:
            median_baths = np.nan
            new_columns[f'house_size_benchmark_median_baths_within_{distance}km_{days}days'] = np.nan

        if 'MyHome_Floor_Area_Value' in filtered.columns and not filtered['MyHome_Floor_Area_Value'].empty:
            std_floor_area = filtered['MyHome_Floor_Area_Value'].std()
            new_columns[f'house_size_benchmark_std_floor_area_within_{distance}km'] = std_floor_area
        else:
            new_columns[f'house_size_benchmark_std_floor_area_within_{distance}km'] = np.nan

        if 'MyHome_Floor_Area_Value' in df.columns and not pd.isna(avg_floor_area):
            percent_larger = (df['MyHome_Floor_Area_Value'] > avg_floor_area).mean() * 100
            new_columns[f'house_size_benchmark_percent_larger_than_avg_within_{distance}km_{days}days'] = percent_larger
        else:
            new_columns[f'house_size_benchmark_percent_larger_than_avg_within_{distance}km_{days}days'] = np.nan

        # Comparison Indicators
        if 'MyHome_Floor_Area_Value' in df.columns and not pd.isna(avg_floor_area):
            new_columns[f'house_size_comparison_larger_than_avg_within_{distance}km_{days}days'] = (df['MyHome_Floor_Area_Value'] > avg_floor_area).astype(float)
        else:
            new_columns[f'house_size_comparison_larger_than_avg_within_{distance}km_{days}days'] = np.nan

        if 'MyHome_Beds' in df.columns and not pd.isna(median_beds):
            new_columns[f'house_size_comparison_beds_above_median_within_{distance}km_{days}days'] = (df['MyHome_Beds'] > median_beds).astype(float)
        else:
            new_columns[f'house_size_comparison_beds_above_median_within_{distance}km_{days}days'] = np.nan

        if 'MyHome_Baths' in df.columns and not pd.isna(median_baths):
            new_columns[f'house_size_comparison_baths_above_median_within_{distance}km_{days}days'] = (df['MyHome_Baths'] > median_baths).astype(float)
        else:
            new_columns[f'house_size_comparison_baths_above_median_within_{distance}km_{days}days'] = np.nan

    # Property Type Distribution Metrics
    for key, params in property_type_distribution_categories.items():
        distance = params['distance']
        mask = (df['Dynamic Geo Distance (km)'] <= distance)
        filtered = df[mask]

        if 'Property Type' in filtered.columns and not filtered['Property Type'].empty:
            # Distribution of Property Types
            property_counts = filtered['Property Type'].value_counts(normalize=True) * 100
            for prop_type, percent in property_counts.items():
                new_columns[f'property_type_dist_{prop_type}_percent_within_{distance}km'] = percent

            # Percentage of Each Property Type
            for prop_type in df['Property Type'].unique():
                percent = (filtered['Property Type'] == prop_type).mean() * 100 if not filtered.empty else np.nan
                new_columns[f'property_type_percent_{prop_type}_within_{distance}km'] = percent

            # Median Property Type Count within 3 km in Last 90 Days
            if key == '3km' and 'First List Date' in df.columns:
                days = 90
                mask_days = (df['First List Date'] >= (row['Sold Date'] - timedelta(days=days)))
                filtered_days = df[mask & mask_days]
                if 'Property Type' in filtered_days.columns and not filtered_days['Property Type'].empty:
                    median_prop_count = filtered_days['Property Type'].value_counts().median()
                    new_columns[f'property_type_median_count_within_{distance}km_{days}days'] = median_prop_count
                else:
                    new_columns[f'property_type_median_count_within_{distance}km_{days}days'] = np.nan

            # Diversity Index of Property Types within 5 km
            if key == '5km':
                diversity = filtered['Property Type'].nunique()
                new_columns[f'property_type_diversity_within_{distance}km'] = diversity

            # Number of Unique Property Types within 3 km
            if key == '3km':
                unique_prop_types = filtered['Property Type'].nunique()
                new_columns[f'property_type_unique_count_within_{distance}km'] = unique_prop_types
        else:
            logging.warning("Required columns for property type distribution are missing or empty.")

    # Listing Activity Metrics
    for key, params in listing_activity_categories.items():
        distance = params['distance']

        if key == '3km':
            # Active Listings: Listings without a Sold Date
            mask_active = (df['Dynamic Geo Distance (km)'] <= distance) & (df['Sold Date'].isna())
            num_active = df[mask_active].shape[0]
            new_columns[f'listing_activity_num_active_within_{distance}km'] = num_active

            # Median Asking Price of Active Listings within 3 km
            if 'Asking Price' in df.columns and not df.loc[mask_active, 'Asking Price'].empty:
                median_asking = df.loc[mask_active, 'Asking Price'].median()
                new_columns[f'listing_activity_median_asking_price_active_within_{distance}km'] = median_asking
            else:
                new_columns[f'listing_activity_median_asking_price_active_within_{distance}km'] = np.nan

            # Average Number of Price Changes within 3 km
            if 'Price Changes' in df.columns and not df.loc[mask_active, 'Price Changes'].empty:
                avg_price_changes = df.loc[mask_active, 'Price Changes'].fillna(0).mean()
                new_columns[f'listing_activity_avg_price_changes_within_{distance}km'] = avg_price_changes
            else:
                new_columns[f'listing_activity_avg_price_changes_within_{distance}km'] = np.nan

        if key == '5km':
            # Active Listings: Listings without a Sold Date
            mask_active = (df['Dynamic Geo Distance (km)'] <= distance) & (df['Sold Date'].isna())
            active_listings = df[mask_active].copy()
            if 'First List Date' in active_listings.columns and not active_listings['First List Date'].isna().all():
                active_listings['Days_on_Market'] = (row['Sold Date'] - active_listings['First List Date']).dt.days
                avg_time_on_market = active_listings['Days_on_Market'].mean() if not active_listings['Days_on_Market'].empty else np.nan
                new_columns[f'listing_activity_avg_days_on_market_active_within_{distance}km'] = avg_time_on_market
            else:
                new_columns[f'listing_activity_avg_days_on_market_active_within_{distance}km'] = np.nan

            # Percentage of Listings with Price Changes within 5 km
            if 'Price Changes' in df.columns and not df[df['Dynamic Geo Distance (km)'] <= distance]['Price Changes'].empty:
                percent_price_changes = ((df['Dynamic Geo Distance (km)'] <= distance) & df['Price Changes'].notna()).mean() * 100
                new_columns[f'listing_activity_percent_price_changes_within_{distance}km'] = percent_price_changes
            else:
                new_columns[f'listing_activity_percent_price_changes_within_{distance}km'] = np.nan

    # Sales Velocity Metrics
    for key, params in sales_velocity_categories.items():
        distance = params['distance']
        months = params['months']
        start_date = row['Sold Date'] - pd.DateOffset(months=months)

        mask = (df['Dynamic Geo Distance (km)'] <= distance) & (df['Sold Date'] >= start_date)
        filtered = df[mask]

        # Average Sales Velocity (Properties Sold per Month)
        sales_velocity_avg = filtered.shape[0] / months if months > 0 else np.nan
        new_columns[f'sales_velocity_avg_properties_sold_per_month_within_{distance}km'] = sales_velocity_avg

        # Median Sales Velocity
        if not filtered.empty:
            monthly_sales = filtered.groupby(filtered['Sold Date'].dt.to_period('M')).size()
            sales_velocity_median = monthly_sales.median() if not monthly_sales.empty else np.nan
            new_columns[f'sales_velocity_median_properties_sold_per_month_within_{distance}km'] = sales_velocity_median
        else:
            new_columns[f'sales_velocity_median_properties_sold_per_month_within_{distance}km'] = np.nan

        # Sales Velocity Trend over Last 6 Months within 3 km
        if key == '3km' and not filtered.empty:
            monthly_sales = filtered.groupby(filtered['Sold Date'].dt.to_period('M')).size()
            for period, count in monthly_sales.items():
                new_columns[f'sales_velocity_count_{period}_within_{distance}km'] = count

        # Seasonal Sales Velocity Patterns within 5 km
        if key == '5km' and not filtered.empty:
            # Ensure that 'Month' is correctly set using .loc to avoid SettingWithCopyWarning
            filtered = filtered.copy()
            filtered['Month'] = filtered['Sold Date'].dt.month
            seasonal_sales = filtered.groupby('Month').size()
            for month, count in seasonal_sales.items():
                new_columns[f'sales_velocity_seasonal_month_{month}_within_{distance}km'] = count

        # Comparative Sales Velocity between 3 km and 5 km Radii
        if key == '5km':
            # Calculate sales velocity for 3 km if not already present
            mask_3km = (df['Dynamic Geo Distance (km)'] <= 3) & (df['Sold Date'] >= row['Sold Date'] - pd.DateOffset(months=6))
            filtered_3km = df[mask_3km]
            sales_velocity_avg_3km = filtered_3km.shape[0] / 6 if 6 > 0 else np.nan
            new_columns['sales_velocity_comparative_3km_vs_5km'] = sales_velocity_avg - sales_velocity_avg_3km

    # Pricing Consistency Metrics
    for key, params in pricing_consistency_categories.items():
        distance = params['distance']
        days = params.get('days', None)

        mask = (df['Dynamic Geo Distance (km)'] <= distance)
        filtered = df[mask].copy()

        # Average Asking Price vs. Sold Price Ratio within distance
        if 'Sold Price' in filtered.columns and 'Asking Price' in filtered.columns and not filtered.empty:
            filtered_non_zero = filtered[filtered['Asking Price'] != 0]
            if not filtered_non_zero.empty:
                asking_sold_ratio = filtered_non_zero['Sold Price'] / filtered_non_zero['Asking Price']
                avg_ratio = asking_sold_ratio.mean()
                new_columns[f'pricing_consistency_avg_asking_sold_ratio_within_{distance}km'] = avg_ratio
            else:
                new_columns[f'pricing_consistency_avg_asking_sold_ratio_within_{distance}km'] = np.nan
        else:
            new_columns[f'pricing_consistency_avg_asking_sold_ratio_within_{distance}km'] = np.nan

        # Median Price Adjustment Ratio within 5 km in Last 180 Days
        if key == '5km' and days:
            mask_days = (df['Sold Date'] >= (row['Sold Date'] - timedelta(days=days)))
            filtered_days = df[mask & mask_days]
            if 'Sold Price' in filtered_days.columns and 'Asking Price' in filtered_days.columns and not filtered_days.empty:
                filtered_days_non_zero = filtered_days[filtered_days['Asking Price'] != 0]
                if not filtered_days_non_zero.empty:
                    median_ratio = (filtered_days_non_zero['Sold Price'] / filtered_days_non_zero['Asking Price']).median()
                    new_columns[f'pricing_consistency_median_asking_sold_ratio_within_{distance}km_{days}days'] = median_ratio
                else:
                    new_columns[f'pricing_consistency_median_asking_sold_ratio_within_{distance}km_{days}days'] = np.nan
            else:
                new_columns[f'pricing_consistency_median_asking_sold_ratio_within_{distance}km_{days}days'] = np.nan

        # Percentage of Listings with Consistent Pricing (Minimal Changes) within 3 km
        if key == '3km':
            consistent_threshold = 5  # Define what constitutes minimal change, e.g., <=5%
            if 'Price Changes' in filtered.columns and 'Asking Price' in filtered.columns and not filtered.empty:
                filtered_non_zero = filtered[filtered['Asking Price'] != 0].copy()
                if not filtered_non_zero.empty:
                    filtered_non_zero['Price_Change_Pct'] = (filtered_non_zero['Price Changes'] / filtered_non_zero['Asking Price']) * 100
                    percent_consistent = (filtered_non_zero['Price_Change_Pct'].abs() <= consistent_threshold).mean() * 100
                    new_columns[f'pricing_consistency_percent_minimal_changes_within_{distance}km'] = percent_consistent
                else:
                    new_columns[f'pricing_consistency_percent_minimal_changes_within_{distance}km'] = np.nan
            else:
                new_columns[f'pricing_consistency_percent_minimal_changes_within_{distance}km'] = np.nan

        # Average Time Between Price Changes within 5 km
        if key == '5km':
            if 'Price Changes' in filtered.columns and not filtered['Price Changes'].empty:
                avg_time_between_changes = filtered['Price Changes'].fillna(0).mean()
                new_columns[f'pricing_consistency_avg_time_between_changes_within_{distance}km'] = avg_time_between_changes
            else:
                new_columns[f'pricing_consistency_avg_time_between_changes_within_{distance}km'] = np.nan

        # Price Change Frequency within 3 km
        if key == '3km':
            if 'Price Changes' in filtered.columns and not filtered['Price Changes'].empty:
                price_change_freq = (filtered['Price Changes'].fillna(0) > 0).mean() * 100
                new_columns[f'pricing_consistency_price_change_freq_within_{distance}km'] = price_change_freq
            else:
                new_columns[f'pricing_consistency_price_change_freq_within_{distance}km'] = np.nan

    return new_columns

def add_all_metrics(df):
    """
    Add all calculated metrics to the dataframe.
    """
    try:
        # Preprocess the dataframe
        df = preprocess_dataframe(df)

        # Initialize a list to store metrics dictionaries
        metrics_list = []

        # Iterate over each row to calculate metrics
        for idx, row in df.iterrows():
            logging.info(f"Calculating metrics for row {idx + 1}/{len(df)}")
            metrics = calculate_metrics_for_row(df, row)
            metrics_list.append(metrics)

        # Create a DataFrame from the list of metrics
        metrics_df = pd.DataFrame(metrics_list)

        # Concatenate metrics to the original DataFrame
        df = pd.concat([df.reset_index(drop=True), metrics_df.reset_index(drop=True)], axis=1)

        # Drop the 'Dynamic Geo Distance (km)' column as it's no longer needed
        if 'Dynamic Geo Distance (km)' in df.columns:
            df.drop(columns=['Dynamic Geo Distance (km)'], inplace=True)

        logging.info("All metrics have been successfully added to the dataframe.")
        return df

    except Exception as e:
        logging.error(f"Error in adding all metrics: {e}")
        raise

# Example usage:
if __name__ == "__main__":
    # Sample data creation for demonstration purposes
    # Replace this with your actual data loading mechanism (e.g., pd.read_csv, pd.read_excel)
    data = {
        'MyHome_Latitude': [53.3498, 53.3398, 53.3298],
        'MyHome_Longitude': [-6.2603, -6.2503, -6.2403],
        'Sold Date': ['2023-09-01', '2023-09-15', '2023-10-01'],
        'First List Date': ['2023-08-01', '2023-08-15', '2023-09-01'],
        'Sold Price': [300000, 350000, 400000],
        'Asking Price': [310000, 340000, 390000],
        'Price Changes': [2, 1, 3],
        'Property Type': ['Apartment', 'House', 'Condo'],
        'Energy Rating': ['B', 'A', 'C'],
        'Beds': [3, 4, 2],
        'Baths': [2, 3, 1],
        'MyHome_BER_Rating': [6, 7, 5],
        'MyHome_Floor_Area_Value': [120, 200, 80],
        'Local Property Tax': [2500, 3000, 1500],
        'price_per_square_meter': [2500, 1750, 5000]
    }
    data = df

    # Create DataFrame
    df_sample = pd.DataFrame(data)

    # Add all metrics
    df_with_metrics = add_all_metrics(df_sample)

    # Display the dataframe with new metrics
    pd.set_option('display.max_columns', None)  # Show all columns for clarity
    print(df_with_metrics.head())


INFO: Filled missing values in 'Asking Price' with median: 449000.0
INFO: Filled missing values in 'Beds' with median: 3.0
INFO: Filled missing values in 'Baths' with median: 2.0
INFO: Filled missing values in 'MyHome_Asking_Price' with median: 367500.0
INFO: Filled missing values in 'MyHome_Beds' with median: 3.0
INFO: Filled missing values in 'MyHome_Baths' with median: 2.0
INFO: Filled missing values in 'MyHome_Floor_Area_Value' with median: 93.325
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
INFO: Filled missing values in 'MyHome_BER_Rating' with median: 0
INFO: Filled missing values in 'Local Property Tax' with median: 495.0
INFO: Filled missing values in 'Sold Asking Price' with median: 449000.0
INFO: Filled missing values in 'Sold Price' with median: 485000.0
INFO: Filled missing values in 'price_per_square_meter' with median: 5111.455108359133
INFO: Calculating metrics for row 1/1821
INFO: Calculating metrics for row 2/1821
INFO: Calculating metrics for row 3/1821
I

                                             Address  Asking Price  Beds  \
0           Taramar, Middle Third, Dublin 5, D05X8N9        750000   4.0   
1              87 Haddington Road, Dublin 4, D04WP23        990000   5.0   
2  Apartment 79, The Northumberlands, Love Lane E...        410000   2.0   
3  7 Parkside Heath, Clongriffin, Dublin 13, Dubl...        535000   3.0   
4  5 Herbert Road, Blanchardstown, Blanchardstown...        400000   2.0   

   Baths   Property Type Energy Rating   Eircode  Local Property Tax  \
0    1.0  End Of Terrace       UNKNOWN  D05 X8N9                 765   
1    3.0         Terrace       UNKNOWN  D04 WP23                1035   
2    1.0       Apartment       UNKNOWN  D02 X068                 405   
3    3.0         Terrace       UNKNOWN  D13 WN3C                 585   
4    1.0        Bungalow       UNKNOWN  D15 A3TN                 405   

                      Agency Name                   Agency Contact  \
0  Hamill Estate Agents & Valuers   Hami

In [144]:
df_with_metrics.head(10)

Unnamed: 0,Address,Asking Price,Beds,Baths,Property Type,Energy Rating,Eircode,Local Property Tax,Agency Name,Agency Contact,Price Changes,URL,MyHome_Address,MyHome_Asking_Price,MyHome_Beds,MyHome_Baths,MyHome_Floor_Area_Value,MyHome_BER_Rating,MyHome_Latitude,MyHome_Longitude,MyHome_Link,price_per_square_meter,is_house,Sold Asking Price,Sold Price,Sold Date,First List Date,my_home_listing,transaction_volume_num_sold_within_3km_90days,transaction_volume_avg_monthly_transactions_within_3km,transaction_volume_total_listings_last_90days_within_3km,transaction_volume_num_sold_within_5km_180days,transaction_volume_avg_monthly_transactions_within_5km,transaction_value_median_sold_price_within_3km_90days,transaction_value_p75_sold_price_within_3km_90days,transaction_value_avg_price_per_sqm_within_3km,transaction_value_median_asking_price_within_3km_90days,transaction_value_median_sold_price_within_5km_180days,transaction_value_p75_sold_price_within_5km_180days,transaction_value_avg_price_per_sqm_within_5km,transaction_value_avg_sold_price_within_5km_180days,price_dynamics_avg_price_diff_within_3km_90days,price_dynamics_median_price_change_pct_within_3km_90days,price_dynamics_percent_sold_above_asking_within_3km_90days,price_dynamics_avg_price_diff_within_5km_180days,price_dynamics_median_price_change_pct_within_5km_180days,price_dynamics_percent_sold_above_asking_within_5km_180days,time_based_avg_days_on_market_within_3km,time_based_median_days_on_market_within_3km,time_based_total_listings_last_90days_within_3km,time_based_avg_days_on_market_within_5km,time_based_median_days_on_market_within_5km,time_based_avg_time_to_sell_within_5km,time_based_median_time_on_market_last_180days_within_5km,property_condition_energy_rating_dist_UNKNOWN_within_3km,property_condition_energy_rating_dist_F_within_3km,property_condition_energy_rating_dist_G_within_3km,property_condition_percent_ber_A_within_5km,property_condition_avg_ber_within_3km,property_condition_percent_energy_efficient_within_3km,property_condition_median_ber_within_5km_180days,house_size_benchmark_avg_floor_area_within_3km_90days,house_size_benchmark_median_beds_within_3km_90days,house_size_benchmark_median_baths_within_3km_90days,house_size_benchmark_std_floor_area_within_3km,house_size_benchmark_percent_larger_than_avg_within_3km_90days,house_size_comparison_larger_than_avg_within_3km_90days,house_size_comparison_beds_above_median_within_3km_90days,house_size_comparison_baths_above_median_within_3km_90days,house_size_benchmark_avg_floor_area_within_5km_180days,house_size_benchmark_median_beds_within_5km_180days,house_size_benchmark_median_baths_within_5km_180days,house_size_benchmark_std_floor_area_within_5km,house_size_benchmark_percent_larger_than_avg_within_5km_180days,house_size_comparison_larger_than_avg_within_5km_180days,house_size_comparison_beds_above_median_within_5km_180days,house_size_comparison_baths_above_median_within_5km_180days,property_type_dist_Terrace_percent_within_3km,property_type_dist_Semi-D_percent_within_3km,property_type_dist_End Of Terrace_percent_within_3km,property_type_dist_Apartment_percent_within_3km,property_type_dist_Detached_percent_within_3km,property_type_percent_End Of Terrace_within_3km,property_type_percent_Terrace_within_3km,property_type_percent_Apartment_within_3km,property_type_percent_Bungalow_within_3km,property_type_percent_Semi-D_within_3km,property_type_percent_Detached_within_3km,property_type_percent_Duplex_within_3km,property_type_percent_Studio_within_3km,property_type_percent_Townhouse_within_3km,property_type_percent_Houses_within_3km,property_type_percent_Site_within_3km,property_type_median_count_within_3km_90days,property_type_unique_count_within_3km,property_type_dist_Apartment_percent_within_5km,property_type_dist_Terrace_percent_within_5km,property_type_dist_Semi-D_percent_within_5km,property_type_dist_End Of Terrace_percent_within_5km,property_type_dist_Detached_percent_within_5km,property_type_dist_Duplex_percent_within_5km,property_type_dist_Bungalow_percent_within_5km,property_type_dist_Townhouse_percent_within_5km,property_type_percent_End Of Terrace_within_5km,property_type_percent_Terrace_within_5km,property_type_percent_Apartment_within_5km,property_type_percent_Bungalow_within_5km,property_type_percent_Semi-D_within_5km,property_type_percent_Detached_within_5km,property_type_percent_Duplex_within_5km,property_type_percent_Studio_within_5km,property_type_percent_Townhouse_within_5km,property_type_percent_Houses_within_5km,property_type_percent_Site_within_5km,property_type_diversity_within_5km,listing_activity_num_active_within_3km,listing_activity_median_asking_price_active_within_3km,listing_activity_avg_price_changes_within_3km,listing_activity_avg_days_on_market_active_within_5km,listing_activity_percent_price_changes_within_5km,sales_velocity_avg_properties_sold_per_month_within_3km,sales_velocity_median_properties_sold_per_month_within_3km,sales_velocity_count_2024-05_within_3km,sales_velocity_count_2024-06_within_3km,sales_velocity_count_2024-07_within_3km,sales_velocity_count_2024-08_within_3km,sales_velocity_count_2024-09_within_3km,sales_velocity_avg_properties_sold_per_month_within_5km,sales_velocity_median_properties_sold_per_month_within_5km,sales_velocity_seasonal_month_5_within_5km,sales_velocity_seasonal_month_6_within_5km,sales_velocity_seasonal_month_7_within_5km,sales_velocity_seasonal_month_8_within_5km,sales_velocity_seasonal_month_9_within_5km,sales_velocity_comparative_3km_vs_5km,pricing_consistency_avg_asking_sold_ratio_within_3km,pricing_consistency_percent_minimal_changes_within_3km,pricing_consistency_price_change_freq_within_3km,pricing_consistency_avg_asking_sold_ratio_within_5km,pricing_consistency_median_asking_sold_ratio_within_5km_180days,pricing_consistency_avg_time_between_changes_within_5km,property_type_dist_Duplex_percent_within_3km,property_type_dist_Bungalow_percent_within_3km,property_type_dist_Townhouse_percent_within_3km,property_type_dist_Studio_percent_within_3km,property_type_dist_Site_percent_within_3km,property_type_dist_Studio_percent_within_5km,property_type_dist_Houses_percent_within_5km,property_type_dist_Site_percent_within_5km,sales_velocity_seasonal_month_3_within_5km,sales_velocity_count_2024-03_within_3km,property_type_dist_Houses_percent_within_3km
0,"Taramar, Middle Third, Dublin 5, D05X8N9",750000,4.0,1.0,End Of Terrace,UNKNOWN,D05 X8N9,765,Hamill Estate Agents & Valuers,Hamill Estate Agents & Valuers,9.500001e+35,https://mynest.ie/listing/126051/Dublin/tarama...,"Taramar, Middle Third, Killester, Dublin 5",367500.0,4.0,2.0,142.0,0.0,53.373857,-6.203371,https://www.myhome.ie/residential/brochure/tar...,5281.690141,True,750000.0,950000.0,2024-09-13,2024-09-13,True,143.0,47.666667,143.0,379.0,63.166667,500000.0,702500.0,5203.786709,450000.0,465000.0,587000.0,5323.267638,538454.844327,47455.440559,8.62069,90.20979,37491.387863,7.522936,82.849604,0.0,0.0,143.0,0.0,0.0,0.0,,87.5,9.722222,2.777778,0.0,0.0,0.0,0.0,97.743916,3.0,2.0,34.9228,26.249314,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,94.562916,3.0,2.0,37.754059,28.995058,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,32.638889,32.638889,17.361111,15.277778,2.083333,17.361111,32.638889,15.277778,0.0,32.638889,2.083333,0.0,0.0,0.0,0.0,0.0,24.0,5.0,29.287599,28.23219,21.372032,13.456464,2.902375,2.110818,1.846966,0.791557,13.456464,28.23219,29.287599,1.846966,21.372032,2.902375,2.110818,0.0,0.791557,0.0,0.0,8.0,0.0,,,,20.81274,24.0,16.0,1.0,2.0,42.0,83.0,16.0,63.166667,63.0,2.0,3.0,103.0,208.0,63.0,39.166667,1.091455,0.0,100.0,1.081899,1.075229,1.14512e+129,,,,,,,,,,,
1,"87 Haddington Road, Dublin 4, D04WP23",990000,5.0,3.0,Terrace,UNKNOWN,D04 WP23,1035,Turley Property Advisors,Susan Turley,1.01e+73,,"87 Haddington Road, Ballsbridge, Dublin 4",367500.0,5.0,3.0,175.0,0.0,53.335385,-6.239842,https://www.myhome.ie/residential/brochure/87-...,5657.142857,True,990000.0,1010000.0,2024-09-13,2024-09-13,True,249.0,83.0,249.0,542.0,90.333333,485000.0,744000.0,6238.726317,450000.0,516250.0,781500.0,5894.116163,687743.212177,35446.196787,5.747126,74.698795,50539.52214,8.235294,81.365314,0.0,0.0,249.0,0.0,0.0,0.0,,85.375494,7.509881,7.114625,0.0,0.0,0.0,0.0,96.255341,3.0,2.0,52.348116,26.963207,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,98.749797,3.0,2.0,50.319482,25.590335,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,30.434783,7.114625,7.114625,42.687747,4.743083,7.114625,30.434783,42.687747,2.371542,7.114625,4.743083,3.162055,0.395257,1.581028,0.0,0.395257,10.0,10.0,31.549815,33.948339,13.837638,9.778598,6.088561,1.660517,1.660517,0.922509,9.778598,33.948339,31.549815,1.660517,13.837638,6.088561,1.660517,0.184502,0.922509,0.184502,0.184502,11.0,0.0,,,,29.763866,42.166667,52.0,3.0,3.0,64.0,131.0,52.0,90.333333,93.0,4.0,5.0,147.0,293.0,93.0,48.166667,1.068193,0.0,100.0,1.092531,1.082353,4.7048020000000005e+142,3.162055,2.371542,1.581028,0.395257,0.395257,0.184502,0.184502,0.184502,,,
2,"Apartment 79, The Northumberlands, Love Lane E...",410000,2.0,1.0,Apartment,UNKNOWN,D02 X068,405,Owen Reilly,Owen Reilly Sales,4.800001e+35,https://mynest.ie/listing/134218/Dublin/apartm...,"79 The Northumberlands, Love Lane East, Mount ...",367500.0,2.0,2.0,64.0,0.0,53.349805,-6.26031,https://www.myhome.ie/residential/brochure/79-...,6406.25,False,410000.0,480000.0,2024-09-13,2024-09-13,True,286.0,95.333333,286.0,569.0,94.833333,463500.0,600000.0,5862.549143,425000.0,484000.0,675000.0,5671.616934,624116.750439,35161.888112,7.328814,77.972028,49248.033392,8.474576,82.776801,0.0,0.0,286.0,0.0,0.0,0.0,,79.790941,11.84669,8.362369,0.0,0.0,0.0,0.0,88.014336,3.0,2.0,40.132969,73.860516,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,95.744745,3.0,2.0,43.837107,28.00659,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,40.418118,5.923345,9.756098,35.888502,2.090592,9.756098,40.418118,35.888502,2.439024,5.923345,2.090592,1.74216,0.348432,1.045296,0.0,0.348432,6.5,10.0,29.525483,34.622144,15.817223,11.950791,3.690685,1.757469,1.405975,0.878735,11.950791,34.622144,29.525483,1.405975,15.817223,3.690685,1.757469,0.175747,0.878735,0.0,0.175747,10.0,0.0,,,,31.246568,47.833333,64.0,,4.0,72.0,155.0,56.0,94.833333,93.0,4.0,6.0,156.0,310.0,93.0,47.0,1.085347,0.0,100.0,1.104922,1.084746,4.481552e+142,1.74216,2.439024,1.045296,0.348432,0.348432,0.175747,,0.175747,,,
3,"7 Parkside Heath, Clongriffin, Dublin 13, Dubl...",535000,3.0,3.0,Terrace,UNKNOWN,D13 WN3C,585,Sherry FitzGerald Sutton,Madeleine O'Connor,5.690000999999999e+35,https://mynest.ie/listing/128619/Dublin/7-park...,"7 Parkside Heath, Balgriffin, Dublin 13",367500.0,3.0,3.0,113.0,0.0,53.407653,-6.163418,https://www.myhome.ie/residential/brochure/7-p...,4734.513274,True,535000.0,569000.0,2024-09-13,2024-09-13,True,83.0,27.666667,83.0,199.0,33.166667,460000.0,569500.0,4666.142133,415000.0,485000.0,708000.0,4976.018912,576378.678392,39643.457831,8.586941,83.13253,37530.437186,8.029197,84.422111,0.0,0.0,83.0,0.0,0.0,0.0,,90.361446,4.819277,4.819277,0.0,0.0,0.0,0.0,102.419036,3.0,2.0,34.489221,23.778144,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,104.375503,3.0,2.0,37.211915,23.119165,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,25.301205,22.891566,14.457831,20.481928,9.638554,14.457831,25.301205,20.481928,2.409639,22.891566,9.638554,4.819277,0.0,0.0,0.0,0.0,12.0,7.0,19.59799,20.603015,31.658291,14.572864,8.040201,3.015075,2.512563,,14.572864,20.603015,19.59799,2.512563,31.658291,8.040201,3.015075,0.0,0.0,0.0,0.0,7.0,0.0,,,,10.928062,13.833333,25.0,,,25.0,36.0,22.0,33.166667,37.0,1.0,1.0,53.0,107.0,37.0,19.333333,1.090615,0.0,100.0,1.08337,1.080292,1.7336699999999998e+105,4.819277,2.409639,,,,,,,,,
4,"5 Herbert Road, Blanchardstown, Blanchardstown...",400000,2.0,1.0,Bungalow,UNKNOWN,D15 A3TN,405,Lloyd Daly & Associates Ltd.,Lloyd Daly & Associates - Sales,4.322001e+35,https://mynest.ie/listing/117502/Dublin/5-herb...,"5 Herbert Road, Blanchardstown, Dublin 15",367500.0,2.0,2.0,116.0,0.0,53.387605,-6.374562,https://www.myhome.ie/residential/brochure/5-h...,3448.275862,True,400000.0,432200.0,2024-09-13,2024-09-13,True,79.0,26.333333,79.0,167.0,27.833333,410000.0,545000.0,4668.633417,375000.0,400000.0,475000.0,4567.196733,456169.874251,37807.518987,6.875,87.341772,42572.898204,7.84,86.227545,0.0,0.0,79.0,0.0,0.0,0.0,,98.734177,1.265823,,0.0,0.0,0.0,0.0,93.922785,3.0,2.0,25.743827,29.599121,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,93.679641,3.0,2.0,30.920051,29.599121,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,5.063291,45.56962,2.531646,26.582278,8.860759,2.531646,5.063291,26.582278,2.531646,45.56962,8.860759,8.860759,0.0,0.0,0.0,0.0,7.0,7.0,26.347305,15.568862,35.928144,8.383234,4.790419,7.784431,1.197605,,8.383234,15.568862,26.347305,1.197605,35.928144,4.790419,7.784431,0.0,0.0,0.0,0.0,7.0,0.0,,,,9.170785,13.166667,17.0,,2.0,24.0,43.0,10.0,27.833333,27.0,,4.0,43.0,92.0,27.0,14.666667,1.078534,0.0,100.0,1.107067,1.0784,3.6107799999999997e+93,8.860759,2.531646,,,,,,,1.0,,
5,"Apartment 40, The Swift, Tassagard Greens, Sag...",290000,2.0,2.0,Apartment,UNKNOWN,D24 KF58,315,Smith & Butler Estates,Danny Butler,3.400001e+59,https://mynest.ie/listing/106324/Dublin/apartm...,,367500.0,3.0,2.0,93.325,0.0,53.281393,-6.440669,https://www.myhome.ie/priceregister/40-the-swi...,5111.455108,False,290000.0,340000.0,2024-09-13,2024-09-13,False,36.0,12.0,36.0,77.0,12.833333,347500.0,465250.0,4870.7614,305000.0,340000.0,435000.0,4542.911616,367649.337662,33280.555556,10.682904,88.888889,35614.272727,10.456717,92.207792,0.0,0.0,36.0,0.0,0.0,0.0,,100.0,,,0.0,0.0,0.0,0.0,89.85625,3.0,2.0,17.181166,73.531027,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,88.48026,3.0,2.0,18.137482,73.860516,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,16.666667,27.777778,,25.0,5.555556,0.0,16.666667,25.0,5.555556,27.777778,5.555556,13.888889,0.0,5.555556,0.0,0.0,5.0,7.0,27.272727,15.584416,35.064935,5.194805,3.896104,6.493506,3.896104,2.597403,5.194805,15.584416,27.272727,3.896104,35.064935,3.896104,6.493506,0.0,2.597403,0.0,0.0,8.0,0.0,,,,4.228446,6.0,7.5,,1.0,10.0,20.0,5.0,12.833333,18.0,,4.0,24.0,37.0,12.0,6.833333,1.110929,0.0,100.0,1.115273,1.104567,6.753249e+105,13.888889,5.555556,5.555556,,,,,,,,
6,"31 Tibradden Grove, Dublin 12, D12P2X4",355000,3.0,1.0,Terrace,UNKNOWN,D12 P2X4,405,Byrne and Moore Property Consultants Limited,Bryne & Moore Property,3.900001e+59,,"14, Tibradden Grove, Greenpark, Walkinstown, D...",367500.0,3.0,2.0,106.0,0.0,53.308228,-6.34192,https://www.myhome.ie/residential/brochure/14-...,3349.056604,True,355000.0,390000.0,2024-09-13,2024-09-13,True,89.0,29.666667,89.0,322.0,53.666667,475000.0,695000.0,4927.264907,425000.0,471250.0,636250.0,5018.887098,559051.586957,50984.831461,14.814815,85.393258,49030.934783,11.099034,88.509317,0.0,0.0,89.0,0.0,0.0,0.0,,87.640449,8.988764,3.370787,0.0,0.0,0.0,0.0,101.87191,3.0,2.0,28.856674,24.272378,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,99.004053,3.0,2.0,37.621789,25.53542,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,25.842697,37.078652,16.853933,8.988764,8.988764,16.853933,25.842697,8.988764,1.123596,37.078652,8.988764,0.0,0.0,0.0,0.0,1.123596,8.0,7.0,17.391304,29.503106,29.813665,12.732919,7.142857,1.863354,0.931677,0.310559,12.732919,29.503106,17.391304,0.931677,29.813665,7.142857,1.863354,0.0,0.310559,0.0,0.310559,9.0,0.0,,,,17.682592,14.833333,21.0,,1.0,31.0,46.0,11.0,53.666667,71.5,,8.0,103.0,171.0,40.0,38.833333,1.115904,0.0,100.0,1.12014,1.11099,1.91615e+93,,1.123596,,,1.123596,,,0.310559,,,
7,"Apartment 207, The Edges 1, Beacon South Quart...",375000,2.0,2.0,Apartment,UNKNOWN,D18 KX68,405,Herbert & Lansdowne Estate Agents,Sales Department,3.870001e+35,https://mynest.ie/listing/109892/Dublin/apartm...,"207 The Edges 1, Beacon South Quarter, Sandyfo...",367500.0,2.0,2.0,64.0,0.0,53.27797,-6.216868,https://www.myhome.ie/residential/brochure/207...,5859.375,False,375000.0,387000.0,2024-09-13,2024-09-13,True,177.0,59.0,177.0,338.0,56.333333,680000.0,875000.0,5666.725351,635000.0,680000.0,908000.0,5827.882601,800365.488166,63078.305085,7.692308,87.00565,69724.659763,7.609562,86.686391,0.0,0.0,177.0,0.0,0.0,0.0,,92.655367,5.649718,1.694915,0.0,0.0,0.0,0.0,109.866695,3.0,2.0,44.505927,21.252059,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,111.257485,3.0,2.0,50.655987,20.208677,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,9.60452,34.463277,2.824859,32.768362,17.514124,2.824859,9.60452,32.768362,2.824859,34.463277,17.514124,0.0,0.0,0.0,0.0,0.0,24.0,6.0,28.994083,14.497041,31.065089,3.550296,18.934911,0.591716,1.775148,0.295858,3.550296,14.497041,28.994083,1.775148,31.065089,18.934911,0.591716,0.0,0.295858,0.295858,0.0,9.0,0.0,,,,18.56123,29.5,44.5,,2.0,62.0,86.0,27.0,56.333333,75.5,,2.0,104.0,185.0,47.0,26.833333,1.091014,0.0,100.0,1.094038,1.076096,2.455622e+141,,2.824859,,,,,0.295858,,,,
8,"80 Moatfield Road, Coolock, Coolock, Dublin 5,...",395000,3.0,1.0,Terrace,UNKNOWN,D05 X9C0,405,Hamill Estate Agents & Valuers,Hamill Estate Agents & Valuers,4.3500009999999997e+71,https://mynest.ie/listing/116212/Dublin/80-moa...,"80 Moatfield Road, Coolock, Dublin 5",367500.0,3.0,2.0,95.0,0.0,53.386624,-6.192724,https://www.myhome.ie/residential/brochure/80-...,4157.894737,True,395000.0,435000.0,2024-09-12,2024-09-12,True,128.0,42.666667,128.0,275.0,45.833333,475000.0,612500.0,4991.819206,440000.0,462000.0,582750.0,4968.799323,517777.807273,46750.390625,8.63082,89.0625,42613.443636,8.586941,88.0,0.0,0.0,128.0,0.0,0.0,0.0,,88.372093,8.527132,3.100775,0.0,0.0,0.0,0.0,98.965977,3.0,2.0,32.797053,25.590335,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,95.566691,3.0,2.0,30.823048,28.00659,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,24.806202,36.434109,17.829457,15.503876,2.325581,17.829457,24.806202,15.503876,2.325581,36.434109,2.325581,0.775194,0.0,0.0,0.0,0.0,20.0,7.0,20.727273,29.818182,27.272727,15.272727,3.636364,1.454545,1.818182,,15.272727,29.818182,20.727273,1.818182,27.272727,3.636364,1.454545,0.0,0.0,0.0,0.0,7.0,0.0,,,,15.101593,21.5,19.0,1.0,1.0,37.0,71.0,19.0,45.833333,42.0,2.0,3.0,77.0,151.0,42.0,24.333333,1.092713,0.0,100.0,1.090264,1.085869,1.578183e+129,0.775194,2.325581,,,,,,,,,
9,"79 South Circular Road, Dublin 8, D08HR77",1350000,4.0,3.0,Terrace,UNKNOWN,D08 HR77,1846,Leonard Wilson Keenan Estates & Letting Agents,Patrick Leonard,1.49e+38,https://mynest.ie/listing/133579/Dublin/79-sou...,,367500.0,3.0,3.0,173.0,0.0,53.335004,-6.293548,https://www.myhome.ie/residential/dublin-8/per...,7803.468208,True,1350000.0,1490000.0,2024-09-12,2024-09-12,True,220.0,73.333333,220.0,544.0,90.666667,458000.0,592250.0,5624.573058,395000.0,488000.0,725000.0,5653.663287,630817.266544,51538.645455,10.797103,84.090909,49241.53125,8.376667,81.25,0.0,0.0,220.0,0.0,0.0,0.0,,81.696429,8.482143,9.821429,0.0,0.0,0.0,0.0,88.717773,3.0,2.0,40.163022,73.860516,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,96.729605,3.0,2.0,45.173491,26.853377,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,44.196429,5.803571,15.178571,29.017857,2.232143,15.178571,44.196429,29.017857,1.339286,5.803571,2.232143,0.892857,0.446429,0.446429,0.0,0.446429,4.0,10.0,30.882353,34.007353,15.992647,10.661765,3.860294,2.022059,1.286765,0.735294,10.661765,34.007353,30.882353,1.286765,15.992647,3.860294,2.022059,0.183824,0.735294,0.183824,0.183824,11.0,0.0,,,,29.873696,37.333333,41.0,3.0,2.0,59.0,119.0,41.0,90.666667,91.0,3.0,7.0,144.0,299.0,91.0,53.333333,1.118264,0.0,100.0,1.105554,1.083767,4.6875050000000005e+142,0.892857,1.339286,0.446429,0.446429,0.446429,0.183824,0.183824,0.183824,,,


In [145]:
# Filter the DataFrame to keep only rows where the specified column is not NaN
filtered_df = df_with_metrics[df_with_metrics['transaction_volume_num_sold_within_3km_90days'].notna()]

# Display the first 20 rows of the filtered DataFrame
filtered_df.head(10)


Unnamed: 0,Address,Asking Price,Beds,Baths,Property Type,Energy Rating,Eircode,Local Property Tax,Agency Name,Agency Contact,Price Changes,URL,MyHome_Address,MyHome_Asking_Price,MyHome_Beds,MyHome_Baths,MyHome_Floor_Area_Value,MyHome_BER_Rating,MyHome_Latitude,MyHome_Longitude,MyHome_Link,price_per_square_meter,is_house,Sold Asking Price,Sold Price,Sold Date,First List Date,my_home_listing,transaction_volume_num_sold_within_3km_90days,transaction_volume_avg_monthly_transactions_within_3km,transaction_volume_total_listings_last_90days_within_3km,transaction_volume_num_sold_within_5km_180days,transaction_volume_avg_monthly_transactions_within_5km,transaction_value_median_sold_price_within_3km_90days,transaction_value_p75_sold_price_within_3km_90days,transaction_value_avg_price_per_sqm_within_3km,transaction_value_median_asking_price_within_3km_90days,transaction_value_median_sold_price_within_5km_180days,transaction_value_p75_sold_price_within_5km_180days,transaction_value_avg_price_per_sqm_within_5km,transaction_value_avg_sold_price_within_5km_180days,price_dynamics_avg_price_diff_within_3km_90days,price_dynamics_median_price_change_pct_within_3km_90days,price_dynamics_percent_sold_above_asking_within_3km_90days,price_dynamics_avg_price_diff_within_5km_180days,price_dynamics_median_price_change_pct_within_5km_180days,price_dynamics_percent_sold_above_asking_within_5km_180days,time_based_avg_days_on_market_within_3km,time_based_median_days_on_market_within_3km,time_based_total_listings_last_90days_within_3km,time_based_avg_days_on_market_within_5km,time_based_median_days_on_market_within_5km,time_based_avg_time_to_sell_within_5km,time_based_median_time_on_market_last_180days_within_5km,property_condition_energy_rating_dist_UNKNOWN_within_3km,property_condition_energy_rating_dist_F_within_3km,property_condition_energy_rating_dist_G_within_3km,property_condition_percent_ber_A_within_5km,property_condition_avg_ber_within_3km,property_condition_percent_energy_efficient_within_3km,property_condition_median_ber_within_5km_180days,house_size_benchmark_avg_floor_area_within_3km_90days,house_size_benchmark_median_beds_within_3km_90days,house_size_benchmark_median_baths_within_3km_90days,house_size_benchmark_std_floor_area_within_3km,house_size_benchmark_percent_larger_than_avg_within_3km_90days,house_size_comparison_larger_than_avg_within_3km_90days,house_size_comparison_beds_above_median_within_3km_90days,house_size_comparison_baths_above_median_within_3km_90days,house_size_benchmark_avg_floor_area_within_5km_180days,house_size_benchmark_median_beds_within_5km_180days,house_size_benchmark_median_baths_within_5km_180days,house_size_benchmark_std_floor_area_within_5km,house_size_benchmark_percent_larger_than_avg_within_5km_180days,house_size_comparison_larger_than_avg_within_5km_180days,house_size_comparison_beds_above_median_within_5km_180days,house_size_comparison_baths_above_median_within_5km_180days,property_type_dist_Terrace_percent_within_3km,property_type_dist_Semi-D_percent_within_3km,property_type_dist_End Of Terrace_percent_within_3km,property_type_dist_Apartment_percent_within_3km,property_type_dist_Detached_percent_within_3km,property_type_percent_End Of Terrace_within_3km,property_type_percent_Terrace_within_3km,property_type_percent_Apartment_within_3km,property_type_percent_Bungalow_within_3km,property_type_percent_Semi-D_within_3km,property_type_percent_Detached_within_3km,property_type_percent_Duplex_within_3km,property_type_percent_Studio_within_3km,property_type_percent_Townhouse_within_3km,property_type_percent_Houses_within_3km,property_type_percent_Site_within_3km,property_type_median_count_within_3km_90days,property_type_unique_count_within_3km,property_type_dist_Apartment_percent_within_5km,property_type_dist_Terrace_percent_within_5km,property_type_dist_Semi-D_percent_within_5km,property_type_dist_End Of Terrace_percent_within_5km,property_type_dist_Detached_percent_within_5km,property_type_dist_Duplex_percent_within_5km,property_type_dist_Bungalow_percent_within_5km,property_type_dist_Townhouse_percent_within_5km,property_type_percent_End Of Terrace_within_5km,property_type_percent_Terrace_within_5km,property_type_percent_Apartment_within_5km,property_type_percent_Bungalow_within_5km,property_type_percent_Semi-D_within_5km,property_type_percent_Detached_within_5km,property_type_percent_Duplex_within_5km,property_type_percent_Studio_within_5km,property_type_percent_Townhouse_within_5km,property_type_percent_Houses_within_5km,property_type_percent_Site_within_5km,property_type_diversity_within_5km,listing_activity_num_active_within_3km,listing_activity_median_asking_price_active_within_3km,listing_activity_avg_price_changes_within_3km,listing_activity_avg_days_on_market_active_within_5km,listing_activity_percent_price_changes_within_5km,sales_velocity_avg_properties_sold_per_month_within_3km,sales_velocity_median_properties_sold_per_month_within_3km,sales_velocity_count_2024-05_within_3km,sales_velocity_count_2024-06_within_3km,sales_velocity_count_2024-07_within_3km,sales_velocity_count_2024-08_within_3km,sales_velocity_count_2024-09_within_3km,sales_velocity_avg_properties_sold_per_month_within_5km,sales_velocity_median_properties_sold_per_month_within_5km,sales_velocity_seasonal_month_5_within_5km,sales_velocity_seasonal_month_6_within_5km,sales_velocity_seasonal_month_7_within_5km,sales_velocity_seasonal_month_8_within_5km,sales_velocity_seasonal_month_9_within_5km,sales_velocity_comparative_3km_vs_5km,pricing_consistency_avg_asking_sold_ratio_within_3km,pricing_consistency_percent_minimal_changes_within_3km,pricing_consistency_price_change_freq_within_3km,pricing_consistency_avg_asking_sold_ratio_within_5km,pricing_consistency_median_asking_sold_ratio_within_5km_180days,pricing_consistency_avg_time_between_changes_within_5km,property_type_dist_Duplex_percent_within_3km,property_type_dist_Bungalow_percent_within_3km,property_type_dist_Townhouse_percent_within_3km,property_type_dist_Studio_percent_within_3km,property_type_dist_Site_percent_within_3km,property_type_dist_Studio_percent_within_5km,property_type_dist_Houses_percent_within_5km,property_type_dist_Site_percent_within_5km,sales_velocity_seasonal_month_3_within_5km,sales_velocity_count_2024-03_within_3km,property_type_dist_Houses_percent_within_3km
0,"Taramar, Middle Third, Dublin 5, D05X8N9",750000,4.0,1.0,End Of Terrace,UNKNOWN,D05 X8N9,765,Hamill Estate Agents & Valuers,Hamill Estate Agents & Valuers,9.500001e+35,https://mynest.ie/listing/126051/Dublin/tarama...,"Taramar, Middle Third, Killester, Dublin 5",367500.0,4.0,2.0,142.0,0.0,53.373857,-6.203371,https://www.myhome.ie/residential/brochure/tar...,5281.690141,True,750000.0,950000.0,2024-09-13,2024-09-13,True,143.0,47.666667,143.0,379.0,63.166667,500000.0,702500.0,5203.786709,450000.0,465000.0,587000.0,5323.267638,538454.844327,47455.440559,8.62069,90.20979,37491.387863,7.522936,82.849604,0.0,0.0,143.0,0.0,0.0,0.0,,87.5,9.722222,2.777778,0.0,0.0,0.0,0.0,97.743916,3.0,2.0,34.9228,26.249314,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,94.562916,3.0,2.0,37.754059,28.995058,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,32.638889,32.638889,17.361111,15.277778,2.083333,17.361111,32.638889,15.277778,0.0,32.638889,2.083333,0.0,0.0,0.0,0.0,0.0,24.0,5.0,29.287599,28.23219,21.372032,13.456464,2.902375,2.110818,1.846966,0.791557,13.456464,28.23219,29.287599,1.846966,21.372032,2.902375,2.110818,0.0,0.791557,0.0,0.0,8.0,0.0,,,,20.81274,24.0,16.0,1.0,2.0,42.0,83.0,16.0,63.166667,63.0,2.0,3.0,103.0,208.0,63.0,39.166667,1.091455,0.0,100.0,1.081899,1.075229,1.14512e+129,,,,,,,,,,,
1,"87 Haddington Road, Dublin 4, D04WP23",990000,5.0,3.0,Terrace,UNKNOWN,D04 WP23,1035,Turley Property Advisors,Susan Turley,1.01e+73,,"87 Haddington Road, Ballsbridge, Dublin 4",367500.0,5.0,3.0,175.0,0.0,53.335385,-6.239842,https://www.myhome.ie/residential/brochure/87-...,5657.142857,True,990000.0,1010000.0,2024-09-13,2024-09-13,True,249.0,83.0,249.0,542.0,90.333333,485000.0,744000.0,6238.726317,450000.0,516250.0,781500.0,5894.116163,687743.212177,35446.196787,5.747126,74.698795,50539.52214,8.235294,81.365314,0.0,0.0,249.0,0.0,0.0,0.0,,85.375494,7.509881,7.114625,0.0,0.0,0.0,0.0,96.255341,3.0,2.0,52.348116,26.963207,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,98.749797,3.0,2.0,50.319482,25.590335,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,30.434783,7.114625,7.114625,42.687747,4.743083,7.114625,30.434783,42.687747,2.371542,7.114625,4.743083,3.162055,0.395257,1.581028,0.0,0.395257,10.0,10.0,31.549815,33.948339,13.837638,9.778598,6.088561,1.660517,1.660517,0.922509,9.778598,33.948339,31.549815,1.660517,13.837638,6.088561,1.660517,0.184502,0.922509,0.184502,0.184502,11.0,0.0,,,,29.763866,42.166667,52.0,3.0,3.0,64.0,131.0,52.0,90.333333,93.0,4.0,5.0,147.0,293.0,93.0,48.166667,1.068193,0.0,100.0,1.092531,1.082353,4.7048020000000005e+142,3.162055,2.371542,1.581028,0.395257,0.395257,0.184502,0.184502,0.184502,,,
2,"Apartment 79, The Northumberlands, Love Lane E...",410000,2.0,1.0,Apartment,UNKNOWN,D02 X068,405,Owen Reilly,Owen Reilly Sales,4.800001e+35,https://mynest.ie/listing/134218/Dublin/apartm...,"79 The Northumberlands, Love Lane East, Mount ...",367500.0,2.0,2.0,64.0,0.0,53.349805,-6.26031,https://www.myhome.ie/residential/brochure/79-...,6406.25,False,410000.0,480000.0,2024-09-13,2024-09-13,True,286.0,95.333333,286.0,569.0,94.833333,463500.0,600000.0,5862.549143,425000.0,484000.0,675000.0,5671.616934,624116.750439,35161.888112,7.328814,77.972028,49248.033392,8.474576,82.776801,0.0,0.0,286.0,0.0,0.0,0.0,,79.790941,11.84669,8.362369,0.0,0.0,0.0,0.0,88.014336,3.0,2.0,40.132969,73.860516,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,95.744745,3.0,2.0,43.837107,28.00659,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,40.418118,5.923345,9.756098,35.888502,2.090592,9.756098,40.418118,35.888502,2.439024,5.923345,2.090592,1.74216,0.348432,1.045296,0.0,0.348432,6.5,10.0,29.525483,34.622144,15.817223,11.950791,3.690685,1.757469,1.405975,0.878735,11.950791,34.622144,29.525483,1.405975,15.817223,3.690685,1.757469,0.175747,0.878735,0.0,0.175747,10.0,0.0,,,,31.246568,47.833333,64.0,,4.0,72.0,155.0,56.0,94.833333,93.0,4.0,6.0,156.0,310.0,93.0,47.0,1.085347,0.0,100.0,1.104922,1.084746,4.481552e+142,1.74216,2.439024,1.045296,0.348432,0.348432,0.175747,,0.175747,,,
3,"7 Parkside Heath, Clongriffin, Dublin 13, Dubl...",535000,3.0,3.0,Terrace,UNKNOWN,D13 WN3C,585,Sherry FitzGerald Sutton,Madeleine O'Connor,5.690000999999999e+35,https://mynest.ie/listing/128619/Dublin/7-park...,"7 Parkside Heath, Balgriffin, Dublin 13",367500.0,3.0,3.0,113.0,0.0,53.407653,-6.163418,https://www.myhome.ie/residential/brochure/7-p...,4734.513274,True,535000.0,569000.0,2024-09-13,2024-09-13,True,83.0,27.666667,83.0,199.0,33.166667,460000.0,569500.0,4666.142133,415000.0,485000.0,708000.0,4976.018912,576378.678392,39643.457831,8.586941,83.13253,37530.437186,8.029197,84.422111,0.0,0.0,83.0,0.0,0.0,0.0,,90.361446,4.819277,4.819277,0.0,0.0,0.0,0.0,102.419036,3.0,2.0,34.489221,23.778144,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,104.375503,3.0,2.0,37.211915,23.119165,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,25.301205,22.891566,14.457831,20.481928,9.638554,14.457831,25.301205,20.481928,2.409639,22.891566,9.638554,4.819277,0.0,0.0,0.0,0.0,12.0,7.0,19.59799,20.603015,31.658291,14.572864,8.040201,3.015075,2.512563,,14.572864,20.603015,19.59799,2.512563,31.658291,8.040201,3.015075,0.0,0.0,0.0,0.0,7.0,0.0,,,,10.928062,13.833333,25.0,,,25.0,36.0,22.0,33.166667,37.0,1.0,1.0,53.0,107.0,37.0,19.333333,1.090615,0.0,100.0,1.08337,1.080292,1.7336699999999998e+105,4.819277,2.409639,,,,,,,,,
4,"5 Herbert Road, Blanchardstown, Blanchardstown...",400000,2.0,1.0,Bungalow,UNKNOWN,D15 A3TN,405,Lloyd Daly & Associates Ltd.,Lloyd Daly & Associates - Sales,4.322001e+35,https://mynest.ie/listing/117502/Dublin/5-herb...,"5 Herbert Road, Blanchardstown, Dublin 15",367500.0,2.0,2.0,116.0,0.0,53.387605,-6.374562,https://www.myhome.ie/residential/brochure/5-h...,3448.275862,True,400000.0,432200.0,2024-09-13,2024-09-13,True,79.0,26.333333,79.0,167.0,27.833333,410000.0,545000.0,4668.633417,375000.0,400000.0,475000.0,4567.196733,456169.874251,37807.518987,6.875,87.341772,42572.898204,7.84,86.227545,0.0,0.0,79.0,0.0,0.0,0.0,,98.734177,1.265823,,0.0,0.0,0.0,0.0,93.922785,3.0,2.0,25.743827,29.599121,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,93.679641,3.0,2.0,30.920051,29.599121,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,5.063291,45.56962,2.531646,26.582278,8.860759,2.531646,5.063291,26.582278,2.531646,45.56962,8.860759,8.860759,0.0,0.0,0.0,0.0,7.0,7.0,26.347305,15.568862,35.928144,8.383234,4.790419,7.784431,1.197605,,8.383234,15.568862,26.347305,1.197605,35.928144,4.790419,7.784431,0.0,0.0,0.0,0.0,7.0,0.0,,,,9.170785,13.166667,17.0,,2.0,24.0,43.0,10.0,27.833333,27.0,,4.0,43.0,92.0,27.0,14.666667,1.078534,0.0,100.0,1.107067,1.0784,3.6107799999999997e+93,8.860759,2.531646,,,,,,,1.0,,
5,"Apartment 40, The Swift, Tassagard Greens, Sag...",290000,2.0,2.0,Apartment,UNKNOWN,D24 KF58,315,Smith & Butler Estates,Danny Butler,3.400001e+59,https://mynest.ie/listing/106324/Dublin/apartm...,,367500.0,3.0,2.0,93.325,0.0,53.281393,-6.440669,https://www.myhome.ie/priceregister/40-the-swi...,5111.455108,False,290000.0,340000.0,2024-09-13,2024-09-13,False,36.0,12.0,36.0,77.0,12.833333,347500.0,465250.0,4870.7614,305000.0,340000.0,435000.0,4542.911616,367649.337662,33280.555556,10.682904,88.888889,35614.272727,10.456717,92.207792,0.0,0.0,36.0,0.0,0.0,0.0,,100.0,,,0.0,0.0,0.0,0.0,89.85625,3.0,2.0,17.181166,73.531027,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,88.48026,3.0,2.0,18.137482,73.860516,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,16.666667,27.777778,,25.0,5.555556,0.0,16.666667,25.0,5.555556,27.777778,5.555556,13.888889,0.0,5.555556,0.0,0.0,5.0,7.0,27.272727,15.584416,35.064935,5.194805,3.896104,6.493506,3.896104,2.597403,5.194805,15.584416,27.272727,3.896104,35.064935,3.896104,6.493506,0.0,2.597403,0.0,0.0,8.0,0.0,,,,4.228446,6.0,7.5,,1.0,10.0,20.0,5.0,12.833333,18.0,,4.0,24.0,37.0,12.0,6.833333,1.110929,0.0,100.0,1.115273,1.104567,6.753249e+105,13.888889,5.555556,5.555556,,,,,,,,
6,"31 Tibradden Grove, Dublin 12, D12P2X4",355000,3.0,1.0,Terrace,UNKNOWN,D12 P2X4,405,Byrne and Moore Property Consultants Limited,Bryne & Moore Property,3.900001e+59,,"14, Tibradden Grove, Greenpark, Walkinstown, D...",367500.0,3.0,2.0,106.0,0.0,53.308228,-6.34192,https://www.myhome.ie/residential/brochure/14-...,3349.056604,True,355000.0,390000.0,2024-09-13,2024-09-13,True,89.0,29.666667,89.0,322.0,53.666667,475000.0,695000.0,4927.264907,425000.0,471250.0,636250.0,5018.887098,559051.586957,50984.831461,14.814815,85.393258,49030.934783,11.099034,88.509317,0.0,0.0,89.0,0.0,0.0,0.0,,87.640449,8.988764,3.370787,0.0,0.0,0.0,0.0,101.87191,3.0,2.0,28.856674,24.272378,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,99.004053,3.0,2.0,37.621789,25.53542,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,25.842697,37.078652,16.853933,8.988764,8.988764,16.853933,25.842697,8.988764,1.123596,37.078652,8.988764,0.0,0.0,0.0,0.0,1.123596,8.0,7.0,17.391304,29.503106,29.813665,12.732919,7.142857,1.863354,0.931677,0.310559,12.732919,29.503106,17.391304,0.931677,29.813665,7.142857,1.863354,0.0,0.310559,0.0,0.310559,9.0,0.0,,,,17.682592,14.833333,21.0,,1.0,31.0,46.0,11.0,53.666667,71.5,,8.0,103.0,171.0,40.0,38.833333,1.115904,0.0,100.0,1.12014,1.11099,1.91615e+93,,1.123596,,,1.123596,,,0.310559,,,
7,"Apartment 207, The Edges 1, Beacon South Quart...",375000,2.0,2.0,Apartment,UNKNOWN,D18 KX68,405,Herbert & Lansdowne Estate Agents,Sales Department,3.870001e+35,https://mynest.ie/listing/109892/Dublin/apartm...,"207 The Edges 1, Beacon South Quarter, Sandyfo...",367500.0,2.0,2.0,64.0,0.0,53.27797,-6.216868,https://www.myhome.ie/residential/brochure/207...,5859.375,False,375000.0,387000.0,2024-09-13,2024-09-13,True,177.0,59.0,177.0,338.0,56.333333,680000.0,875000.0,5666.725351,635000.0,680000.0,908000.0,5827.882601,800365.488166,63078.305085,7.692308,87.00565,69724.659763,7.609562,86.686391,0.0,0.0,177.0,0.0,0.0,0.0,,92.655367,5.649718,1.694915,0.0,0.0,0.0,0.0,109.866695,3.0,2.0,44.505927,21.252059,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,111.257485,3.0,2.0,50.655987,20.208677,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,9.60452,34.463277,2.824859,32.768362,17.514124,2.824859,9.60452,32.768362,2.824859,34.463277,17.514124,0.0,0.0,0.0,0.0,0.0,24.0,6.0,28.994083,14.497041,31.065089,3.550296,18.934911,0.591716,1.775148,0.295858,3.550296,14.497041,28.994083,1.775148,31.065089,18.934911,0.591716,0.0,0.295858,0.295858,0.0,9.0,0.0,,,,18.56123,29.5,44.5,,2.0,62.0,86.0,27.0,56.333333,75.5,,2.0,104.0,185.0,47.0,26.833333,1.091014,0.0,100.0,1.094038,1.076096,2.455622e+141,,2.824859,,,,,0.295858,,,,
8,"80 Moatfield Road, Coolock, Coolock, Dublin 5,...",395000,3.0,1.0,Terrace,UNKNOWN,D05 X9C0,405,Hamill Estate Agents & Valuers,Hamill Estate Agents & Valuers,4.3500009999999997e+71,https://mynest.ie/listing/116212/Dublin/80-moa...,"80 Moatfield Road, Coolock, Dublin 5",367500.0,3.0,2.0,95.0,0.0,53.386624,-6.192724,https://www.myhome.ie/residential/brochure/80-...,4157.894737,True,395000.0,435000.0,2024-09-12,2024-09-12,True,128.0,42.666667,128.0,275.0,45.833333,475000.0,612500.0,4991.819206,440000.0,462000.0,582750.0,4968.799323,517777.807273,46750.390625,8.63082,89.0625,42613.443636,8.586941,88.0,0.0,0.0,128.0,0.0,0.0,0.0,,88.372093,8.527132,3.100775,0.0,0.0,0.0,0.0,98.965977,3.0,2.0,32.797053,25.590335,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,95.566691,3.0,2.0,30.823048,28.00659,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,24.806202,36.434109,17.829457,15.503876,2.325581,17.829457,24.806202,15.503876,2.325581,36.434109,2.325581,0.775194,0.0,0.0,0.0,0.0,20.0,7.0,20.727273,29.818182,27.272727,15.272727,3.636364,1.454545,1.818182,,15.272727,29.818182,20.727273,1.818182,27.272727,3.636364,1.454545,0.0,0.0,0.0,0.0,7.0,0.0,,,,15.101593,21.5,19.0,1.0,1.0,37.0,71.0,19.0,45.833333,42.0,2.0,3.0,77.0,151.0,42.0,24.333333,1.092713,0.0,100.0,1.090264,1.085869,1.578183e+129,0.775194,2.325581,,,,,,,,,
9,"79 South Circular Road, Dublin 8, D08HR77",1350000,4.0,3.0,Terrace,UNKNOWN,D08 HR77,1846,Leonard Wilson Keenan Estates & Letting Agents,Patrick Leonard,1.49e+38,https://mynest.ie/listing/133579/Dublin/79-sou...,,367500.0,3.0,3.0,173.0,0.0,53.335004,-6.293548,https://www.myhome.ie/residential/dublin-8/per...,7803.468208,True,1350000.0,1490000.0,2024-09-12,2024-09-12,True,220.0,73.333333,220.0,544.0,90.666667,458000.0,592250.0,5624.573058,395000.0,488000.0,725000.0,5653.663287,630817.266544,51538.645455,10.797103,84.090909,49241.53125,8.376667,81.25,0.0,0.0,220.0,0.0,0.0,0.0,,81.696429,8.482143,9.821429,0.0,0.0,0.0,0.0,88.717773,3.0,2.0,40.163022,73.860516,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,96.729605,3.0,2.0,45.173491,26.853377,0 1.0 1 1.0 2 0.0 3 1....,0 1.0 1 1.0 2 0.0 3 0....,0 0.0 1 1.0 2 0.0 3 1....,44.196429,5.803571,15.178571,29.017857,2.232143,15.178571,44.196429,29.017857,1.339286,5.803571,2.232143,0.892857,0.446429,0.446429,0.0,0.446429,4.0,10.0,30.882353,34.007353,15.992647,10.661765,3.860294,2.022059,1.286765,0.735294,10.661765,34.007353,30.882353,1.286765,15.992647,3.860294,2.022059,0.183824,0.735294,0.183824,0.183824,11.0,0.0,,,,29.873696,37.333333,41.0,3.0,2.0,59.0,119.0,41.0,90.666667,91.0,3.0,7.0,144.0,299.0,91.0,53.333333,1.118264,0.0,100.0,1.105554,1.083767,4.6875050000000005e+142,0.892857,1.339286,0.446429,0.446429,0.446429,0.183824,0.183824,0.183824,,,


In [146]:
import pandas as pd

# Function to return column statistics including mean, median, 75th percentile, and non-null counts
def get_column_info_with_stats(df):
    # Filter numeric columns
    numeric_columns = df.select_dtypes(include=['number']).columns
    
    # Create the base DataFrame with column names and data types
    stats = pd.DataFrame({
        'Column Name': df.columns,
        'Data Type': df.dtypes
    })
    
    # Calculate statistics only for numeric columns
    stats['Mean'] = df[numeric_columns].mean()
    stats['Median'] = df[numeric_columns].median()
    stats['75th Percentile'] = df[numeric_columns].quantile(0.75)
    
    # Calculate the count of non-null values for all columns
    stats['Non-null Count'] = df.notnull().sum()

    return stats

# Example usage with your dataframe
# Assuming `df_with_metrics` is your dataframe with metrics
df_stats = get_column_info_with_stats(df_with_metrics)

# Display the first 30 rows
df_stats.head(30)


Unnamed: 0,Column Name,Data Type,Mean,Median,75th Percentile,Non-null Count
Address,Address,object,,,,1821
Asking Price,Asking Price,int64,580046.0,449000.0,675000.0,1821
Beds,Beds,float64,2.880835,3.0,3.0,1821
Baths,Baths,float64,1.965953,2.0,3.0,1821
Property Type,Property Type,object,,,,1821
Energy Rating,Energy Rating,object,,,,1821
Eircode,Eircode,object,,,,1821
Local Property Tax,Local Property Tax,int64,627.5409,495.0,675.0,1821
Agency Name,Agency Name,object,,,,1812
Agency Contact,Agency Contact,object,,,,1821
