# Data Exploration and Cleaning

## Loading Data:
- Used Python libraries (Pandas, NumPy) to load the data into Jupyter Notebook.

## Data Cleaning:
- Handling missing values (imputation or removal).
- Removing duplicates.
- Correcting data types.
- Outlier detection and treatment.



In [2]:
# loading the data from csv to dataframes 
import pandas as pd
import numpy as np

flats_df = pd.read_csv("/Users/guliaharsh021/Downloads/DA Documents /Projects/Project 1/Data Collection/Flats Data/flats_data.csv")

In [3]:
# viewing the first few rows of flats data

flats_df.head()

Unnamed: 0,Property Name,Property Title,Property Type,City/Locality,BHK,Property Size,Furnishing,Price Total,Price per Sqft
0,,1 BHK Apartment for Sale in Saket New Delhi,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1.0,400 sqft,Unfurnished,₹24 Lac,"₹4,364 per sqft"
1,,1 BHK Builder Floor for Sale in Uttam Nagar Ne...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1.0,380 sqft,Semi-Furnished,₹13 Lac,"₹3,250 per sqft"
2,,1 BHK Builder Floor for Sale in Dwarka Mor New...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1.0,500 sqft,Semi-Furnished,₹20 Lac,"₹3,636 per sqft"
3,,1 BHK Builder Floor for Sale in Block A Mahavi...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1.0,400 sqft,Semi-Furnished,₹25 Lac,"₹5,435 per sqft"
4,Neelkanth Plots,"1 BHK Apartment for Sale in Neelkanth Plots, S...","Multistorey-Apartment,Builder-Floor-Apartment,...","1 BHK Apartment for Sale in Neelkanth Plots, S...",1.0,366 sqft,Unfurnished,₹17 Lac,"₹4,645 per sqft"


In [4]:
# total number of rows 
flats_df.shape[0]

74264

In [5]:
# converting the "BHK" column to integer type
flats_df['BHK'] = flats_df['BHK'].astype(int)
flats_df.head(5)

Unnamed: 0,Property Name,Property Title,Property Type,City/Locality,BHK,Property Size,Furnishing,Price Total,Price per Sqft
0,,1 BHK Apartment for Sale in Saket New Delhi,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400 sqft,Unfurnished,₹24 Lac,"₹4,364 per sqft"
1,,1 BHK Builder Floor for Sale in Uttam Nagar Ne...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,380 sqft,Semi-Furnished,₹13 Lac,"₹3,250 per sqft"
2,,1 BHK Builder Floor for Sale in Dwarka Mor New...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,500 sqft,Semi-Furnished,₹20 Lac,"₹3,636 per sqft"
3,,1 BHK Builder Floor for Sale in Block A Mahavi...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400 sqft,Semi-Furnished,₹25 Lac,"₹5,435 per sqft"
4,Neelkanth Plots,"1 BHK Apartment for Sale in Neelkanth Plots, S...","Multistorey-Apartment,Builder-Floor-Apartment,...","1 BHK Apartment for Sale in Neelkanth Plots, S...",1,366 sqft,Unfurnished,₹17 Lac,"₹4,645 per sqft"


In [6]:
lst = flats_df['Property Size'].str[-4:]

In [7]:
print(lst.unique())

['sqft' ' sqm' 'qyrd' 'sale' 'Move' 'of 5' 'ound' 'of 3' nan 'of 4' '1'
 'acre' 'f 25' 'tion' ' are' '3' 'igha' 'erch']


In [8]:
df = pd.DataFrame(lst)

In [10]:
grouped_df = df.groupby('Property Size').size().reset_index(name='Count')
grouped_df

Unnamed: 0,Property Size,Count
0,are,1
1,sqm,1106
2,1,2
3,3,2
4,Move,17
5,acre,2
6,erch,8
7,f 25,2
8,igha,1
9,of 3,4


In [11]:
# Filtering rows where 'Property Size' contains 'sqft', 'qyrd' or 'sqm'
flats_df = flats_df[flats_df['Property Size'].str.contains('sqft|qyrd|sqm', regex=True, na=False)]
flats_df.head(10)

Unnamed: 0,Property Name,Property Title,Property Type,City/Locality,BHK,Property Size,Furnishing,Price Total,Price per Sqft
0,,1 BHK Apartment for Sale in Saket New Delhi,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400 sqft,Unfurnished,₹24 Lac,"₹4,364 per sqft"
1,,1 BHK Builder Floor for Sale in Uttam Nagar Ne...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,380 sqft,Semi-Furnished,₹13 Lac,"₹3,250 per sqft"
2,,1 BHK Builder Floor for Sale in Dwarka Mor New...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,500 sqft,Semi-Furnished,₹20 Lac,"₹3,636 per sqft"
3,,1 BHK Builder Floor for Sale in Block A Mahavi...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400 sqft,Semi-Furnished,₹25 Lac,"₹5,435 per sqft"
4,Neelkanth Plots,"1 BHK Apartment for Sale in Neelkanth Plots, S...","Multistorey-Apartment,Builder-Floor-Apartment,...","1 BHK Apartment for Sale in Neelkanth Plots, S...",1,366 sqft,Unfurnished,₹17 Lac,"₹4,645 per sqft"
5,Prem Apartments,"1 BHK Apartment for Sale in Prem Apartments, K...","Multistorey-Apartment,Builder-Floor-Apartment,...","Kishangarh, Vasant Kunj New Delhi",1,44 sqft,Unfurnished,₹16 Lac,"₹4,444 per sqft"
6,DDA LIG Flats Rohini,1 BHK Apartment for Sale in DDA LIG Flats Rohi...,"Multistorey-Apartment,Builder-Floor-Apartment,...",1 BHK Apartment for Sale in DDA LIG Flats Rohi...,1,500 sqft,Unfurnished,₹15 Lac,"₹3,000 per sqft"
7,,1 BHK Builder Floor for Sale in Rani Bagh Pita...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,500 sqft,Unfurnished,₹21 Lac,"₹4,200 per sqft"
8,,1 BHK Apartment for Sale in Sangam Vihar Wazir...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,446 sqft,Unfurnished,₹15 Lac,"₹3,000 per sqft"
9,,1 BHK Builder Floor for Sale in Bhagwati Vihar...,"Multistorey-Apartment,Builder-Floor-Apartment,...",1 BHK Builder Floor for Sale in Bhagwati Vihar...,1,450 sqft,Unfurnished,₹21 Lac,"₹4,516 per sqft"


In [12]:
# Function to convert values to sqft
def convert_to_sqft(value):
    if pd.isna(value):
        return None
    elif 'sqft' in value:
        return float(value.split()[0].replace(',', '')) 
    elif 'qyrd' in value:
        return float(value.split()[0].replace(',', '')) * 9  
    elif 'sqm' in value:
        return float(value.split()[0].replace(',', '')) * 10.7639 
    else:
        return None 

flats_df['Property Size'] = flats_df['Property Size'].apply(convert_to_sqft)

In [13]:
flats_df.head(10)

Unnamed: 0,Property Name,Property Title,Property Type,City/Locality,BHK,Property Size,Furnishing,Price Total,Price per Sqft
0,,1 BHK Apartment for Sale in Saket New Delhi,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400.0,Unfurnished,₹24 Lac,"₹4,364 per sqft"
1,,1 BHK Builder Floor for Sale in Uttam Nagar Ne...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,380.0,Semi-Furnished,₹13 Lac,"₹3,250 per sqft"
2,,1 BHK Builder Floor for Sale in Dwarka Mor New...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,500.0,Semi-Furnished,₹20 Lac,"₹3,636 per sqft"
3,,1 BHK Builder Floor for Sale in Block A Mahavi...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400.0,Semi-Furnished,₹25 Lac,"₹5,435 per sqft"
4,Neelkanth Plots,"1 BHK Apartment for Sale in Neelkanth Plots, S...","Multistorey-Apartment,Builder-Floor-Apartment,...","1 BHK Apartment for Sale in Neelkanth Plots, S...",1,366.0,Unfurnished,₹17 Lac,"₹4,645 per sqft"
5,Prem Apartments,"1 BHK Apartment for Sale in Prem Apartments, K...","Multistorey-Apartment,Builder-Floor-Apartment,...","Kishangarh, Vasant Kunj New Delhi",1,44.0,Unfurnished,₹16 Lac,"₹4,444 per sqft"
6,DDA LIG Flats Rohini,1 BHK Apartment for Sale in DDA LIG Flats Rohi...,"Multistorey-Apartment,Builder-Floor-Apartment,...",1 BHK Apartment for Sale in DDA LIG Flats Rohi...,1,500.0,Unfurnished,₹15 Lac,"₹3,000 per sqft"
7,,1 BHK Builder Floor for Sale in Rani Bagh Pita...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,500.0,Unfurnished,₹21 Lac,"₹4,200 per sqft"
8,,1 BHK Apartment for Sale in Sangam Vihar Wazir...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,446.0,Unfurnished,₹15 Lac,"₹3,000 per sqft"
9,,1 BHK Builder Floor for Sale in Bhagwati Vihar...,"Multistorey-Apartment,Builder-Floor-Apartment,...",1 BHK Builder Floor for Sale in Bhagwati Vihar...,1,450.0,Unfurnished,₹21 Lac,"₹4,516 per sqft"


In [14]:
flats_df = flats_df.rename(columns={'Property Size': 'Property Size (sqft)'})
flats_df['Property Size (sqft)'] = flats_df['Property Size (sqft)'].astype(int)
flats_df.head()

Unnamed: 0,Property Name,Property Title,Property Type,City/Locality,BHK,Property Size (sqft),Furnishing,Price Total,Price per Sqft
0,,1 BHK Apartment for Sale in Saket New Delhi,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400,Unfurnished,₹24 Lac,"₹4,364 per sqft"
1,,1 BHK Builder Floor for Sale in Uttam Nagar Ne...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,380,Semi-Furnished,₹13 Lac,"₹3,250 per sqft"
2,,1 BHK Builder Floor for Sale in Dwarka Mor New...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,500,Semi-Furnished,₹20 Lac,"₹3,636 per sqft"
3,,1 BHK Builder Floor for Sale in Block A Mahavi...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400,Semi-Furnished,₹25 Lac,"₹5,435 per sqft"
4,Neelkanth Plots,"1 BHK Apartment for Sale in Neelkanth Plots, S...","Multistorey-Apartment,Builder-Floor-Apartment,...","1 BHK Apartment for Sale in Neelkanth Plots, S...",1,366,Unfurnished,₹17 Lac,"₹4,645 per sqft"


In [15]:
# Filtering rows where 'Furnshing' contains 'Semi-Furnished', 'Unfurnished' or 'Furnished'
flats_df = flats_df[flats_df['Furnishing'].str.contains('Semi-Furnished|Unfurnished|Furnished', regex=True, na=False)]

In [16]:
# checking the data
lst = flats_df['Furnishing']
print(lst.unique())

['Unfurnished' 'Semi-Furnished' 'Furnished']


In [17]:
# Renaming the 'Price Total' column to 'Price (INR)'
flats_df = flats_df.rename(columns={'Price Total': 'Price (INR)'})

# Function to clean and convert 'Price (INR)' to numeric
def clean_and_convert_price(price):
    if pd.isna(price) or price == 'NA':  
        return None
    cleaned_price = ''.join(filter(str.isdigit, str(price)))
    if cleaned_price == '':
        return None
    return float(cleaned_price) * 100000 

# Clean and convert 'Price (INR)' column
flats_df['Price (INR)'] = flats_df['Price (INR)'].apply(clean_and_convert_price)

flats_df.head()

Unnamed: 0,Property Name,Property Title,Property Type,City/Locality,BHK,Property Size (sqft),Furnishing,Price (INR),Price per Sqft
0,,1 BHK Apartment for Sale in Saket New Delhi,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400,Unfurnished,2400000.0,"₹4,364 per sqft"
1,,1 BHK Builder Floor for Sale in Uttam Nagar Ne...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,380,Semi-Furnished,1300000.0,"₹3,250 per sqft"
2,,1 BHK Builder Floor for Sale in Dwarka Mor New...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,500,Semi-Furnished,2000000.0,"₹3,636 per sqft"
3,,1 BHK Builder Floor for Sale in Block A Mahavi...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400,Semi-Furnished,2500000.0,"₹5,435 per sqft"
4,Neelkanth Plots,"1 BHK Apartment for Sale in Neelkanth Plots, S...","Multistorey-Apartment,Builder-Floor-Apartment,...","1 BHK Apartment for Sale in Neelkanth Plots, S...",1,366,Unfurnished,1700000.0,"₹4,645 per sqft"


In [18]:
# Drop rows with NaN values after conversion
flats_df = flats_df.dropna(subset=['Price (INR)', 'Property Size (sqft)', 'Property Size (sqft)'])

# Convert to integer type
flats_df['Price (INR)'] = flats_df['Price (INR)'].astype(int)

In [19]:
flats_df.shape[0]

71350

In [20]:
# Renaming the 'Price per Sqft' column to 'Price (per sqft)'
flats_df = flats_df.rename(columns={'Price per Sqft': 'Price (per sqft)'})

# Clean and convert 'Price (per sqft)' column
flats_df['Price (per sqft)'] = flats_df['Price (INR)'] / flats_df['Property Size (sqft)'].astype(float)

flats_df.head()

Unnamed: 0,Property Name,Property Title,Property Type,City/Locality,BHK,Property Size (sqft),Furnishing,Price (INR),Price (per sqft)
0,,1 BHK Apartment for Sale in Saket New Delhi,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400,Unfurnished,2400000,6000.0
1,,1 BHK Builder Floor for Sale in Uttam Nagar Ne...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,380,Semi-Furnished,1300000,3421.052632
2,,1 BHK Builder Floor for Sale in Dwarka Mor New...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,500,Semi-Furnished,2000000,4000.0
3,,1 BHK Builder Floor for Sale in Block A Mahavi...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400,Semi-Furnished,2500000,6250.0
4,Neelkanth Plots,"1 BHK Apartment for Sale in Neelkanth Plots, S...","Multistorey-Apartment,Builder-Floor-Apartment,...","1 BHK Apartment for Sale in Neelkanth Plots, S...",1,366,Unfurnished,1700000,4644.808743


In [21]:
# rounding off the values in column "Price (per sqft)"
flats_df['Price (per sqft)'] = flats_df['Price (per sqft)'].astype(int)
flats_df.head()

Unnamed: 0,Property Name,Property Title,Property Type,City/Locality,BHK,Property Size (sqft),Furnishing,Price (INR),Price (per sqft)
0,,1 BHK Apartment for Sale in Saket New Delhi,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400,Unfurnished,2400000,6000
1,,1 BHK Builder Floor for Sale in Uttam Nagar Ne...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,380,Semi-Furnished,1300000,3421
2,,1 BHK Builder Floor for Sale in Dwarka Mor New...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,500,Semi-Furnished,2000000,4000
3,,1 BHK Builder Floor for Sale in Block A Mahavi...,"Multistorey-Apartment,Builder-Floor-Apartment,...",,1,400,Semi-Furnished,2500000,6250
4,Neelkanth Plots,"1 BHK Apartment for Sale in Neelkanth Plots, S...","Multistorey-Apartment,Builder-Floor-Apartment,...","1 BHK Apartment for Sale in Neelkanth Plots, S...",1,366,Unfurnished,1700000,4644


In [22]:
flats_df['Property Title'] = flats_df['Property Title'].str.replace(r'new delhi', 'Delhi')

In [23]:
flats_df['Property Title'] = flats_df['Property Title'].str.replace(r'delhi', 'Delhi')

In [24]:
flats_df['Property Title'] = flats_df['Property Title'].str.replace(r'New Delhi', 'Delhi')
flats_df['Property Title'] = flats_df['Property Title'].str.replace(r'-', ' ')

In [25]:
# Dictionary of localities
Localities = {
    'Delhi': [
        'Adarsh Nagar', 'Ashok Vihar', 'Bawana', 'Begum Pur', 'Haqiqat Nagar', 'Karala', 'Keshav Puram', 'Narela', 'Pitam Pura', 'Rohini', 'Rani Bagh', 'Shalimar Bagh', 'Shastri Nagar', 'Azadpur', 'Civil Lines', 'Derawal Nagar', 'Gulabi Bagh', 'Kamla Nagar', 'Kashmiri Gate', 'Daryaganj', 'Model Town', 'Sadar Bazaar', 'Sarai Rohilla', 'Shakti Nagar', 'Tis Hazari', 'Timarpur', 'Wazirabad', 'GTB Nagar', 'Urdu Bazaar', 'Mukherjee Nagar', 'Majnu ka tilla', 'Babarpur', 'Bhajanpura', 'Dayal Pur', 'Dilshad Garden', 'Karawal Nagar', 'Naveen Shahdara', 'Nand Nagri', 'Shahdara', 'Shastri Park', 'Seelampur', 'Yamuna Vihar', 'Central Delhi', 'Ashok Nagar', 'Chandni Chowk', 'Civil Lines', 'Daryaganj', 'Dariba Kalan', 'Karol Bagh', 'Old Delhi', 'Shastri Nagar', 'South Patel Nagar', 'Sadar Bazaar', 'Paharganj', 'Rajender Nagar', 'Barakhamba Road', 'Chanakyapuri', 'Connaught Place', 'Gole Market', 'Golf Links, New Delhi', 'INA Colony', 'Inder Puri', 'Jaffrabad', 'Laxmibai Nagar', 'Lodhi Colony', "Lutyens' Delhi", 'Mahipalpur', 'New Delhi', 'Pragati Maidan', 'Raisina Hill', 'Rajendra Place', 'East Vinod Nagar', 'Krishna Nagar', 'Laxmi Nagar', 'Mayur Vihar', 'Pandav Nagar', 'Preet Vihar', 'Anand Vihar', 'Shreshtha Vihar', 'Vivek Vihar', 'Vasundhara Enclave', 'Geetanjali Enclave', 'Green Park', 'Gulmohar Park', 'Hauz Khas', 'Khanpur', 'Kailash Colony', 'Malviya Nagar', 'Maharani Bagh', 'Moti Bagh', 'New Moti Bagh', 'Mehrauli', 'Munirka', 'Netaji Nagar', 'Pamposh Enclave', 'Safdarjung Enclave', 'Sainik Farm', 'Saket', 'Sarojini Nagar', 'Sarvodaya Enclave', 'Shaheen Bagh', 'Siri Fort', 'South Extension', 'Shahpur Jat', 'Sriniwaspuri', 'Ashram Chowk', 'Lodhi Colony', 'Khan Market', 'Netaji Nagar', 'Nizamuddin East', 'Nizamuddin West', 'Sarai Kale Khan', 'Jangpura', 'Defence Colony', 'Lajpat Nagar', 'New Friends Colony', 'Nehru Place', 'Chittaranjan Park', 'Govindpuri', 'Greater Kailash', 'Okhla', 'Sarita Vihar', 'Sarai Kale Khan', 'Tughlaqabad', 'Badarpur', 'Pul Pehladpur', 'Ber Sarai', 'Dabri', 'Dashrath Puri', 'Dwarka', 'Delhi Cantonment', 'Dhaula Kuan', 'Ghitorni', 'Inderpuri', 'Janakpuri', 'Mahipalpur', 'Moti Bagh', 'Munirka', 'Najafgarh', 'Naraina', 'Palam', 'Rama Krishna Puram', 'Sagar Pur', 'Sarojini Nagar', 'Vasant Kunj', 'Vasant Vihar', 'Kalkaji', 'Ashok Nagar', 'Bali Nagar', 'Fateh Nagar', 'Kirti Nagar', 'Meera Bagh', 'Mayapuri', 'Moti Nagar', 'Nangloi Jat', 'Nangloi', 'Dwarka Mor', 'Paschim Vihar', 'Patel Nagar', 'Punjabi Bagh', 'Rajouri Garden', 'Shivaji Place', 'Shadipur Depot', 'Shiv Ram Park', 'Tihar Village', 'Tilak Nagar', 'Tikri Kalan', 'Vikas Nagar', 'Vikaspuri', 'Patel Nagar', 'Uttam Nagar', 'New Delhi'],
}

In [26]:
import re

def populate_locality(df, localities_dict):
    for index, row in df.iterrows():
        text = row['Property Title']
        matched_key = None
        matched_value = None
        
        # Search for keys in the text
        for key in localities_dict:
            if re.search(r'\b{}\b'.format(re.escape(key)), text, flags=re.IGNORECASE):
                matched_key = key
                break
        
        # If a key is found, search for its values
        if matched_key:
            values = localities_dict[matched_key]
            for value in values:
                if re.search(r'\b{}\b'.format(re.escape(value)), text, flags=re.IGNORECASE):
                    matched_value = value
                    break
        
        # Assign the matched value to the City/Locality column
        if matched_key and matched_value:
            df.at[index, 'City/Locality'] = f"{matched_value}, {matched_key}"
        else:
            df.at[index, 'City/Locality'] = "No matching locality found."

# Apply the function to populate the City/Locality column
populate_locality(flats_df, Localities)

In [27]:
# Count rows with "No matching locality found."
no_match_count = (flats_df['City/Locality'] == "No matching locality found.").sum()
# Display the count of rows with no matching locality found
print(f"Number of rows with 'No matching locality found.': {no_match_count}")

Number of rows with 'No matching locality found.': 15252


In [28]:
flats_df.drop(flats_df[flats_df['City/Locality'] == 'No matching locality found.'].index, inplace=True)

In [29]:
flats_df.shape[0]

56098

In [30]:
# Dropping columns 'Property Name' and 'Property Title'
flats_df.drop(columns=['Property Name', 'Property Title'], inplace=True)

In [31]:
flats_df.head()

Unnamed: 0,Property Type,City/Locality,BHK,Property Size (sqft),Furnishing,Price (INR),Price (per sqft)
0,"Multistorey-Apartment,Builder-Floor-Apartment,...","Saket, Delhi",1,400,Unfurnished,2400000,6000
1,"Multistorey-Apartment,Builder-Floor-Apartment,...","Uttam Nagar, Delhi",1,380,Semi-Furnished,1300000,3421
2,"Multistorey-Apartment,Builder-Floor-Apartment,...","Dwarka, Delhi",1,500,Semi-Furnished,2000000,4000
4,"Multistorey-Apartment,Builder-Floor-Apartment,...","Rohini, Delhi",1,366,Unfurnished,1700000,4644
5,"Multistorey-Apartment,Builder-Floor-Apartment,...","Vasant Kunj, Delhi",1,44,Unfurnished,1600000,36363


In [32]:
localities_delhi = {
        "North West Delhi": [
        "Adarsh Nagar", "Ashok Vihar", "Bawana", "Begum Pur", "Haqiqat Nagar",
        "Karala", "Keshav Puram", "Narela", "Pitam Pura", "Rohini",
        "Rani Bagh", "Shalimar Bagh", "Shastri Nagar"
    ],
    "North Delhi": [
        "Azadpur", "Civil Lines", "Derawal Nagar", "Gulabi Bagh", "Kamla Nagar",
        "Kashmiri Gate", "Daryaganj", "Model Town", "Narela", "Sadar Bazaar",
        "Sarai Rohilla", "Shakti Nagar", "Tis Hazari", "Timarpur", "Wazirabad",
        "GTB Nagar", "Urdu Bazaar", "Mukherjee Nagar", "Majnu ka tilla"
    ],
    "North East Delhi": [
        "Babarpur", "Bhajanpura", "Dayal Pur", "Dilshad Garden", "Karawal Nagar",
        "Naveen Shahdara", "Nand Nagri", "Shahdara", "Shastri Park", "Seelampur",
        "Yamuna Vihar"
    ],
    "Central Delhi": [
        "Ashok Nagar", "Chandni Chowk", "Civil Lines", "Daryaganj", "Dariba Kalan",
        "Karol Bagh", "Old Delhi", "Shastri Nagar", "Patel Nagar", "Sadar Bazaar",
        "Paharganj", "Rajender Nagar"
    ],
    "New Delhi": [
        "Barakhamba Road", "Chanakyapuri", "Connaught Place", "Gole Market",
        "Golf Links", "INA Colony", "Inder Puri", "Jaffrabad", "Laxmibai Nagar",
        "Lodhi Colony", "Lutyens' Delhi", "Mahipalpur", "New Delhi", "Pragati Maidan",
        "Raisina Hill", "Rajendra Place"
    ],
    "East Delhi": [
        "East Vinod Nagar", "Krishna Nagar", "Laxmi Nagar", "Mayur Vihar",
        "Pandav Nagar", "Preet Vihar", "Anand Vihar", "Shreshtha Vihar", "Vivek Vihar",
        "Vasundhara Enclave"
    ],
    "South Delhi": [
        "Geetanjali Enclave", "Green Park", "Gulmohar Park", "Hauz Khas", "Khanpur",
        "Kailash Colony", "Malviya Nagar", "Maharani Bagh", "Moti Bagh", "New Moti Bagh",
        "Mehrauli", "Munirka", "Netaji Nagar", "Pamposh Enclave", "Safdarjung Enclave",
        "Sainik Farm", "Saket", "Sarojini Nagar", "Sarvodaya Enclave", "Shaheen Bagh",
        "Siri Fort", "South Extension", "Shahpur Jat", "Sriniwaspuri", "Shahpur", "Sainik Farms"
    ],
    "South East Delhi": [
        "Ashram Chowk", "Lodhi Colony", "Khan Market", "Netaji Nagar", "Nizamuddin East",
        "Nizamuddin West", "Sarai Kale Khan", "Jangpura", "Defence Colony", "Lajpat Nagar",
        "New Friends Colony", "Nehru Place", "Chittaranjan Park", "Govindpuri",
        "Greater Kailash", "Okhla", "Sarita Vihar", "Sarai Kale Khan", "Tughlaqabad",
        "Badarpur", "Pul Pehladpur"
    ],
    "South West Delhi": [
        "Ber Sarai", "Dabri", "Dashrath Puri", "Dwarka", "Delhi Cantonment",
        "Dhaula Kuan", "Ghitorni", "Inderpuri", "Janakpuri", "Mahipalpur", "Moti Bagh",
        "Munirka", "Najafgarh", "Naraina", "Palam", "Rama Krishna Puram", "Sagar Pur",
        "Sarojini Nagar", "Vasant Kunj", "Vasant Vihar", "Kalkaji"
    ],
    "West Delhi": [
        "Ashok Nagar", "Bali Nagar", "Fateh Nagar", "Kirti Nagar", "Meera Bagh",
        "Mayapuri", "Moti Nagar", "Nangloi Jat", "Paschim Vihar", "Patel Nagar",
        "Punjabi Bagh", "Rajouri Garden", "Shivaji Place", "Shadipur Depot", "Shiv Ram Park",
        "Tihar Village", "Tilak Nagar", "Tikri Kalan", "Vikas Nagar", "Vikaspuri",
        "West Patel Nagar", "Uttam Nagar", "Nangloi", "Tikri", "Dwarka Mor"
    ]
}

In [33]:
flats_df['City/Locality'] = flats_df['City/Locality'].str.replace(r', Delhi', '')

# Function to update City/Locality column
def update_locality(row):
    for region, localities in localities_delhi.items():
        for locality in localities:
            if row['City/Locality'] == locality:
                return f"{locality}, {region}"
    return row['City/Locality']

# Apply the function to update the column
flats_df['City/Locality'] = flats_df.apply(update_locality, axis=1)

# Display the updated DataFrame
flats_df.head()

Unnamed: 0,Property Type,City/Locality,BHK,Property Size (sqft),Furnishing,Price (INR),Price (per sqft)
0,"Multistorey-Apartment,Builder-Floor-Apartment,...","Saket, South Delhi",1,400,Unfurnished,2400000,6000
1,"Multistorey-Apartment,Builder-Floor-Apartment,...","Uttam Nagar, West Delhi",1,380,Semi-Furnished,1300000,3421
2,"Multistorey-Apartment,Builder-Floor-Apartment,...","Dwarka, South West Delhi",1,500,Semi-Furnished,2000000,4000
4,"Multistorey-Apartment,Builder-Floor-Apartment,...","Rohini, North West Delhi",1,366,Unfurnished,1700000,4644
5,"Multistorey-Apartment,Builder-Floor-Apartment,...","Vasant Kunj, South West Delhi",1,44,Unfurnished,1600000,36363


In [34]:
flats_df.to_csv("/Users/guliaharsh021/Downloads/DA Documents /Projects/Project 1/Data Prepration, Processing and Analysis/Data Exploration and Cleaning/Cleaned Data/flats_data_cleaned.csv")