In [189]:
# importing some libs so that we can work with data easily :)
# all of the compution is done on colab
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [190]:
# reading the csv file by using pandas :)
df = pd.read_csv('ahmedabad.csv' , encoding='utf-8')

In [191]:
df.index

RangeIndex(start=0, stop=6853, step=1)

In [192]:
print("missing values in data-set in '%'")
print("")
print(f"{(df.isnull().mean())*100}")

missing values in data-set in '%'

Unnamed: 0      0.000000
Title           0.000000
type_area       0.000000
value_area      0.000000
status          0.000000
floor           0.116737
transaction     0.306435
furnishing      0.539910
facing          1.298701
price           0.000000
price_sqft      6.537283
description    25.769736
dtype: float64


In [193]:
df.isnull().sum()

Unnamed: 0,0
Unnamed: 0,0
Title,0
type_area,0
value_area,0
status,0
floor,8
transaction,21
furnishing,37
facing,89
price,0


In [194]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6853 entries, 0 to 6852
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Unnamed: 0   6853 non-null   int64 
 1   Title        6853 non-null   object
 2   type_area    6853 non-null   object
 3   value_area   6853 non-null   object
 4   status       6853 non-null   object
 5   floor        6845 non-null   object
 6   transaction  6832 non-null   object
 7   furnishing   6816 non-null   object
 8   facing       6764 non-null   object
 9   price        6853 non-null   object
 10  price_sqft   6405 non-null   object
 11  description  5087 non-null   object
dtypes: int64(1), object(11)
memory usage: 642.6+ KB


In [195]:
df.isnull().sum()

Unnamed: 0,0
Unnamed: 0,0
Title,0
type_area,0
value_area,0
status,0
floor,8
transaction,21
furnishing,37
facing,89
price,0


In [196]:
df['price_sqft'].isnull().sum()

np.int64(448)

In [197]:
# DROP the description
df = df.drop(['description', 'Unnamed: 0'], axis=1)

In [198]:
# quick look into the data-set :)
df.head(5)

Unnamed: 0,Title,type_area,value_area,status,floor,transaction,furnishing,facing,price,price_sqft
0,"2 BHK Apartment for Sale in Vivaan Aura, Zundal Ahmedabad",Super Area,155 sqyrd,Poss. by Dec '26,New Property,Unfurnished,Vivaan Aura,2,â‚¹48.1 Lac,"â‚¹3,444 per sqft"
1,"2 BHK Apartment for Sale in SP Epitome, Shela Ahmedabad",Carpet Area,710 sqft,Poss. by Jun '27,New Property,Unfurnished,SP Epitome,2,â‚¹50 Lac,"â‚¹3,876 per sqft"
2,"2 BHK Apartment for Sale in Pacifica Amara, Sanand Ahmedabad",Carpet Area,588 sqft,Poss. by Dec '25,New Property,Unfurnished,Pacifica Amara,2,â‚¹40 Lac,"â‚¹3,738 per sqft"
3,"2 BHK Apartment for Sale in Kavisha AER, Shela Ahmedabad",Carpet Area,687 sqft,Poss. by Jun '26,New Property,Unfurnished,Kavisha AER,2,â‚¹48.8 Lac,"â‚¹3,900 per sqft"
4,"2 BHK Apartment for Sale in Aarambh Vistara, Gota Ahmedabad",Carpet Area,621 sqft,Poss. by Mar '25,New Property,Unfurnished,Aarambh Vistara,2,â‚¹48 Lac,"â‚¹4,248 per sqft"


In [199]:
import re
def clean_price(val):
    if pd.isna(val):
        return np.nan
    val = str(val).strip()

    return re.sub(r'^[^0-9A-Za-z]+','',val)


def price_conversion(val):
    if 'Lac' in val:
        return float(val.replace('Lac', "").strip()) *1e5
    elif "Cr" in val:
        return float(val.replace('Cr', "").strip()) *1e7
    elif "Call for Price" in val:
        return str(val.strip())
    else:
        return float(val.strip())

def remove_str(val):
    if pd.isna(val):
        return np.nan
    val = str(val).strip()
    return re.sub(r'[^0-9]+','', val)



df['price'] = df['price'].apply(clean_price)
df['price_sqft'] = df['price_sqft'].apply(clean_price)
df['price_sqft'] = df['price_sqft'].apply(remove_str)
df['price'] = df['price'].apply(price_conversion)

In [200]:
df['type_area'].unique()

array(['Super Area', 'Carpet Area', 'Transaction', 'Status', 'Built Area',
       'Under Construction'], dtype=object)

In [201]:
df['price_sqft'].unique()

array(['3444', '3876', '3738', ..., '10121', '8984', '11250'],
      dtype=object)

In [202]:
df['type_area'] = df['type_area'].replace({
    "Super Area": 'super area',
    'Carpet Area': 'carpet area',
    'Transaction': None,
    'Status': None,
    'Under Construction': 'under construction'
} )

df['type_area'].unique()

array(['super area', 'carpet area', None, 'Built Area',
       'under construction'], dtype=object)

In [203]:
df['status'].unique()

array(["Poss. by Dec '26", "Poss. by Jun '27", "Poss. by Dec '25",
       "Poss. by Jun '26", "Poss. by Mar '25", "Poss. by Jul '24",
       "Poss. by Jul '26", "Poss. by Dec '24", 'Ready to Move',
       "Poss. by May '26", "Poss. by Nov '24", "Poss. by Oct '25",
       "Poss. by Jan '26", "Poss. by Aug '24", "Poss. by Oct '24",
       "Poss. by Feb '28", "Poss. by Sep '25", "Poss. by Mar '26",
       "Poss. by Jan '25", "Poss. by Nov '25", "Poss. by Dec '27",
       "Poss. by Sep '24", '4 out of 5', '1 out of 1', '3 out of 3',
       '3 out of 5', '3 out of 4', "Poss. by Jun '25", '4 out of 10',
       '4 out of 4', 'Bapunagar One', '2 out of 3', '3 out of 9',
       '7 out of 8', "Poss. by Jun '24", '5 out of 5', '1 out of 4',
       '1 out of 14', '13 out of 13', '2 out of 5', '9 out of 14',
       '8 out of 14', "Poss. by Mar '27", "Poss. by Jul '25",
       "Poss. by May '24", "Poss. by Aug '25", "Poss. by Feb '25",
       "Poss. by Apr '25", "Poss. by Dec '28", "Poss. by Apr '26

In [204]:
def status_clean(val):
    val = str(val)

    if "Poss." in val or "Ready to Move" in val or "Under Construction" in val:
        return ("possession", val)
    if val in ["New Property", "Resale"]:
        return ('transaction', val)
    if "out of" in val:
        return ("floor", val)

    if "Const. Age" in val:
        return ("age" , val)

    return ('project', val)

df[['status_type', 'status_value']] = df['status'].apply(status_clean).apply(pd.Series)
df.drop(['status'], axis=1 , inplace=True)

In [205]:
df['status_value'][11]

'Ready to Move'

In [206]:
df['status_type'][11]

'possession'

In [207]:

# df.sample()
df['facing'].unique()

array(['2', '1 Covered', 'East', 'Garden/Park', 'North - East', 'North',
       'Main Road', 'North - West', 'West', '1',
       'Garden/Park, Pool, Main Road', 'Garden/Park, Main Road',
       'Vandematram City', '1 Covered,', 'Garden Paradise',
       'Parshwanath Atlantis Park', 'Shukun Heights', 'South - East',
       'Radheshyam Residency', 'Shrifal Apartment', 'Omro Anmol Avenue',
       'Main Road, Garden/Park, Pool', 'Silver Pearl', 'South -West',
       'Savvy Studioz', 'Sainath Avenue', 'Shreeji Tulsi Heights',
       'Soham Sanidhya', 'Dev Darshan Apartment', 'Ashraya 10',
       'Freehold', 'Sarvopari Elegance',
       'Jigish Rohitbhai Patel Jahnvi Residency Phase 2',
       'Saanvi Aarambh', '3', 'Suryam Elegance', 'Shyam Tirth',
       'Karnavati 3', 'Nijanand Pushkar Elegance', 'Shakti Gardenia',
       'Siddharth Icon', 'Sun Real Homes', 'Shilp Solace',
       'VandeMatram Prime', 'Laxmi Nivas', 'Rashmi Vihar', 'Shilp Ananta',
       'Green City', 'Sun Rising Homes', n

In [208]:
# it is to check the no of rows and columns in the data-set :)
x = df.shape
print('This data-set has', x[0] , "rows" , 'and' ,x[-1] , "columns")

This data-set has 6853 rows and 11 columns


In [209]:
# we can also need to check data type so there are some couple of ways for it w'll use .dtype method for now
print('data type of the data present in the columns are give below')
print('')
print(df.dtypes)

data type of the data present in the columns are give below

Title           object
type_area       object
value_area      object
floor           object
transaction     object
furnishing      object
facing          object
price           object
price_sqft      object
status_type     object
status_value    object
dtype: object


In [210]:
# null values per column
df.isnull().sum()

Unnamed: 0,0
Title,0
type_area,4
value_area,0
floor,8
transaction,21
furnishing,37
facing,89
price,0
price_sqft,448
status_type,0


In [211]:
df.duplicated().sum()

np.int64(298)

In [212]:
df = df.drop_duplicates()

In [213]:
df['type_area'].unique()

array(['super area', 'carpet area', None, 'Built Area',
       'under construction'], dtype=object)

In [214]:
# missing values :)
print("missing values in data-set in '%'")
print("")
print(f"{(df.isnull().mean())*100}")

missing values in data-set in '%'

Title           0.000000
type_area       0.045767
value_area      0.000000
floor           0.091533
transaction     0.259344
furnishing      0.488177
facing          1.266209
price           0.000000
price_sqft      6.498856
status_type     0.000000
status_value    0.000000
dtype: float64


In [215]:
df = df.dropna()
df.isnull().sum()

Unnamed: 0,0
Title,0
type_area,0
value_area,0
floor,0
transaction,0
furnishing,0
facing,0
price,0
price_sqft,0
status_type,0


In [216]:
df.head()

Unnamed: 0,Title,type_area,value_area,floor,transaction,furnishing,facing,price,price_sqft,status_type,status_value
0,"2 BHK Apartment for Sale in Vivaan Aura, Zundal Ahmedabad",super area,155 sqyrd,New Property,Unfurnished,Vivaan Aura,2,4810000.0,3444,possession,Poss. by Dec '26
1,"2 BHK Apartment for Sale in SP Epitome, Shela Ahmedabad",carpet area,710 sqft,New Property,Unfurnished,SP Epitome,2,5000000.0,3876,possession,Poss. by Jun '27
2,"2 BHK Apartment for Sale in Pacifica Amara, Sanand Ahmedabad",carpet area,588 sqft,New Property,Unfurnished,Pacifica Amara,2,4000000.0,3738,possession,Poss. by Dec '25
3,"2 BHK Apartment for Sale in Kavisha AER, Shela Ahmedabad",carpet area,687 sqft,New Property,Unfurnished,Kavisha AER,2,4880000.0,3900,possession,Poss. by Jun '26
4,"2 BHK Apartment for Sale in Aarambh Vistara, Gota Ahmedabad",carpet area,621 sqft,New Property,Unfurnished,Aarambh Vistara,2,4800000.0,4248,possession,Poss. by Mar '25


In [217]:
# not so important but still if you want you can try storing name of fetaures in variable for record :)

columns = list(df.columns)
columns

['Title',
 'type_area',
 'value_area',
 'floor',
 'transaction',
 'furnishing',
 'facing',
 'price',
 'price_sqft',
 'status_type',
 'status_value']

In [218]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6063 entries, 0 to 6852
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Title         6063 non-null   object
 1   type_area     6063 non-null   object
 2   value_area    6063 non-null   object
 3   floor         6063 non-null   object
 4   transaction   6063 non-null   object
 5   furnishing    6063 non-null   object
 6   facing        6063 non-null   object
 7   price         6063 non-null   object
 8   price_sqft    6063 non-null   object
 9   status_type   6063 non-null   object
 10  status_value  6063 non-null   object
dtypes: object(11)
memory usage: 697.4+ KB


In [219]:
df.sample()

Unnamed: 0,Title,type_area,value_area,floor,transaction,furnishing,facing,price,price_sqft,status_type,status_value
2374,"3 BHK Apartment for Sale in KP Courtyard, Sanathal Ahmedabad",carpet area,1087 sqft,3 out of 3,Resale,Unfurnished,West,6500000.0,3587,possession,Ready to Move


In [220]:
df["transaction"].unique()

array(['Unfurnished', 'New Property', 'Resale', 'Semi-Furnished',
       'Furnished', 'Bhagwat Elysium', 'Other', 'Garden/Park',
       '14 Covered', 'Samyak 49', 'The Indus', 'Super Shaligram'],
      dtype=object)

In [221]:
price_sqft = df['price_sqft']

In [222]:
df = df.drop(['price_sqft'], axis=1)

In [223]:
df['Title'][9]

'2 BHK Apartment for Sale in Sun Parkwest, Shela Ahmedabad'

In [224]:
df.sample(5)

Unnamed: 0,Title,type_area,value_area,floor,transaction,furnishing,facing,price,status_type,status_value
4728,"4 BHK Apartment for Sale in Adani Shantigram, Sarkhej Gandhinagar Highway Ahmedabad",super area,3650 sqft,2 out of 12,Resale,Unfurnished,East,32000000.0,possession,Ready to Move
6281,"5 BHK Apartment for Sale in Swati Senor, Ambli Bopal Road Ahmedabad",carpet area,4709 sqft,9 out of 12,New Property,Unfurnished,East,85600000.0,possession,Poss. by Dec '25
6429,"5 BHK Penthouse for Sale in Satyamev Luxor, Ambli Ahmedabad",carpet area,3795 sqft,22 out of 23,New Property,Unfurnished,East,50700000.0,possession,Poss. by Mar '27
6483,"4 BHK Apartment for Sale in Oeuvre 2, Bodakdev Ahmedabad",super area,5615 sqft,Ground out of 22,New Property,Unfurnished,North,49900000.0,possession,Poss. by Dec '24
4396,"4 BHK Apartment for Sale in ANANTARA Abode, Hebatpur Ahmedabad",carpet area,1856 sqft,7 out of 13,New Property,Unfurnished,East,20800000.0,possession,Poss. by Jul '24


In [225]:
# clean columns
# 1 type area ----------------- looks good
# 2 value area ------------ just convert sqft and sqm , sqyrd to one singel type
# 3 floor ----------------- convert to 1/7 or something like it form
# 4 transcations -------------- looks useless so may be drop it
# 5 furnishing -------------------- search google or chatgpt as i cant under stand data
# 6 facing ----------------------- make it direction only  or for good drop it as it is not releivent data
# 7 Flat type(new column from title)----------- like 1 BHK , 2 BHK or other
# 8 loaction or building name ---------- from title (after in and before ,)

# so there will be about 7-8 meaningful columns after this

In [226]:
# arpatment type

# df['apartment_type'] = df['Title'].apply(lambda  title for t in title: return re.filter(r'[\d\s\w]', t)  )

In [227]:
def flat_type(text):
    match = re.search(r'(\d+)\s*BHK', text , re.IGNORECASE)
    if match:
        return match.group(0)
    return None


df['Flat Type'] = df['Title'].apply(flat_type)

In [228]:
def location(text):
    match = re.search(r'\bin\b\s+(.*)', text , re.IGNORECASE)
    if match:
        return match.group(1)
    return None

df['location'] = df['Title'].apply(location)

In [229]:
df.sample(10)

Unnamed: 0,Title,type_area,value_area,floor,transaction,furnishing,facing,price,status_type,status_value,Flat Type,location
6194,"5 BHK Apartment for Sale in Iscon Vogue, Ambli Road Ahmedabad",carpet area,4221 sqft,1 out of 22,New Property,Unfurnished,East,65000000.0,possession,Poss. by Mar '28,5 BHK,"Iscon Vogue, Ambli Road Ahmedabad"
5870,"4 BHK Apartment for Sale in Safal Param, Satellite Ahmedabad",carpet area,4500 sqft,2 out of 10,Resale,Unfurnished,East,31500000.0,possession,Ready to Move,4 BHK,"Safal Param, Satellite Ahmedabad"
3002,"3 BHK Apartment for Sale in Ideal Imbert, Chharodi Ahmedabad",super area,265 sqyrd,New Property,Unfurnished,Ideal Imbert,3,11300000.0,possession,Poss. by Dec '26,3 BHK,"Ideal Imbert, Chharodi Ahmedabad"
2493,3 BHK Apartment for Sale in old high court Ahmedabad,carpet area,81 sqm,3 out of 6,Resale,Furnished,2,6500000.0,possession,Ready to Move,3 BHK,old high court Ahmedabad
4590,"4 BHK Penthouse for Sale in South Bopal, Bopal Ahmedabad",carpet area,3200 sqft,12 out of 12,Resale,Furnished,4,28000000.0,possession,Ready to Move,4 BHK,"South Bopal, Bopal Ahmedabad"
5700,"4 BHK Apartment for Sale in Malabar Retreat, Chharodi Ahmedabad",super area,2397 sqft,New Property,Unfurnished,Malabar Retreat,1 Covered,26500000.0,possession,Poss. by Sep '27,4 BHK,"Malabar Retreat, Chharodi Ahmedabad"
5537,"4 BHK Apartment for Sale in Popular Domain, Sarkhej Gandhinagar Highway Ahmedabad",carpet area,2500 sqft,8 out of 12,Resale,Furnished,North - East,31500000.0,possession,Ready to Move,4 BHK,"Popular Domain, Sarkhej Gandhinagar Highway Ahmedabad"
1257,2 BHK Apartment for Sale in Ramdev Nagar Ahmedabad,super area,105 sqyrd,4 out of 4,Resale,Furnished,2,4000000.0,possession,Ready to Move,2 BHK,Ramdev Nagar Ahmedabad
3668,4 BHK Apartment for Sale in Ghatlodiya Ahmedabad,carpet area,1150 sqft,5 out of 11,New Property,Unfurnished,Garden/Park,14400000.0,possession,Ready to Move,4 BHK,Ghatlodiya Ahmedabad
736,"1 BHK Apartment for Sale in Krish Exotica, Nikol Ahmedabad",super area,88 sqyrd,5 out of 5,Resale,Semi-Furnished,North,2100000.0,possession,Ready to Move,1 BHK,"Krish Exotica, Nikol Ahmedabad"


In [230]:
df.isnull().sum()

Unnamed: 0,0
Title,0
type_area,0
value_area,0
floor,0
transaction,0
furnishing,0
facing,0
price,0
status_type,0
status_value,0


In [231]:
df.sample(10)

Unnamed: 0,Title,type_area,value_area,floor,transaction,furnishing,facing,price,status_type,status_value,Flat Type,location
5538,"4 BHK Apartment for Sale in Saanvi Skydeck Select, Ambli Ahmedabad",carpet area,2235 sqft,4 out of 12,Resale,Semi-Furnished,East,35000000.0,possession,Ready to Move,4 BHK,"Saanvi Skydeck Select, Ambli Ahmedabad"
4019,"4 BHK Apartment for Sale in Ambrosia, Vaishnodevi Circle Ahmedabad",super area,3211 sqft,New Property,Unfurnished,Ambrosia,4,21600000.0,possession,Poss. by Sep '26,4 BHK,"Ambrosia, Vaishnodevi Circle Ahmedabad"
4939,"5 BHK Apartment for Sale in Riviera Elite, Shela Ahmedabad",carpet area,2690 sqft,9 out of 20,Resale,Unfurnished,East,37500000.0,possession,Ready to Move,5 BHK,"Riviera Elite, Shela Ahmedabad"
5549,"4 BHK Apartment for Sale in Aranyam, Shilaj Ahmedabad",carpet area,2326 sqft,8 out of 22,New Property,Unfurnished,East,27400000.0,possession,Poss. by Dec '26,4 BHK,"Aranyam, Shilaj Ahmedabad"
5170,"4 BHK Apartment for Sale in The 31ST, Shilaj Ahmedabad",carpet area,2589 sqft,10 out of 31,New Property,Unfurnished,East,31200000.0,possession,Poss. by Aug '25,4 BHK,"The 31ST, Shilaj Ahmedabad"
4543,"4 BHK Apartment for Sale in The Verity, Satellite Ahmedabad",carpet area,2700 sqft,11 out of 17,New Property,Unfurnished,North - East,36000000.0,possession,Poss. by Dec '25,4 BHK,"The Verity, Satellite Ahmedabad"
4282,3 BHK Apartment for Sale in Prahlad Nagar Ahmedabad,carpet area,2000 sqft,4 out of 5,Resale,Furnished,East,16000000.0,possession,Ready to Move,3 BHK,Prahlad Nagar Ahmedabad
253,"2 BHK Apartment for Sale in Shilp Ananta, Shela Ahmedabad",carpet area,680 sqft,12 out of 13,Resale,Unfurnished,East,5000000.0,possession,Poss. by Oct '24,2 BHK,"Shilp Ananta, Shela Ahmedabad"
4979,"5 BHK Apartment for Sale in Riviera Majestica, Shela Ahmedabad",carpet area,2772 sqft,New Property,Unfurnished,Riviera Majestica,5,31200000.0,possession,Poss. by Dec '26,5 BHK,"Riviera Majestica, Shela Ahmedabad"
2339,"2 BHK Apartment for Sale in Kavisha Amara, Shela Ahmedabad",carpet area,675 sqft,8 out of 14,New Property,Unfurnished,East,6100000.0,possession,Ready to Move,2 BHK,"Kavisha Amara, Shela Ahmedabad"


In [232]:
df['location'].unique().tolist()

['Vivaan Aura, Zundal Ahmedabad',
 'SP Epitome, Shela Ahmedabad',
 'Pacifica Amara, Sanand Ahmedabad',
 'Kavisha AER, Shela Ahmedabad',
 'Aarambh Vistara, Gota Ahmedabad',
 'Vivaan Essence, Zundal Ahmedabad',
 'Kavisha Atria, Shela Ahmedabad',
 'Sun Parkwest, Shela Ahmedabad',
 'Shilp Ananta, Shela Ahmedabad',
 'Zundal Ahmedabad',
 'Savvy Studioz, Jagatpur Ahmedabad',
 'Devam, Jagatpur Ahmedabad',
 'Orchid Blues, Shela Ahmedabad',
 'Aadhvan Rise, South Bopal, Bopal Ahmedabad',
 'Atlantis wave Ahmedabad',
 'Zaveri Greens, Ghuma Ahmedabad',
 'Mahadev Lavish, South Bopal, Bopal Ahmedabad',
 'Sun Footprints, Shela Ahmedabad',
 'Eden Godrej Garden city, Jagatpur Village, Gota Ahmedabad',
 'Sacred Shivansh, Shela Ahmedabad',
 'Sheladia Eris, Shela Ahmedabad',
 'Shela Ahmedabad',
 'Gota Ahmedabad',
 'HR Eternia, Shela Ahmedabad',
 'Indraprasth Ixora, Shela Ahmedabad',
 'Shoolin Kopren Park View, Vasant Nagar Ahmedabad',
 'Aarambh Zest, Gota Ahmedabad',
 'Aarohi Club Road Ahmedabad',
 'Orchid 

In [233]:
df['Title'].sample(10)

Unnamed: 0,Title
169,"2 BHK Apartment for Sale in Ganesh Genesis, Gota Ahmedabad"
1852,2 BHK Apartment for Sale in Chandlodiya Ahmedabad
1891,"3 BHK Apartment for Sale in Savvy Swaraj, Jagatpur Village, Gota Ahmedabad"
2119,"2 BHK Builder Floor for Sale in Shree Prashanti Geeta Apartments, Ambawadi Ahmedabad"
2508,"3 BHK Apartment for Sale in Sorrel Apartments, Sardar Patel Ring Road Ahmedabad"
1144,"3 BHK Builder Floor for Sale in Shantinath Apartment, Vejalpur Ahmedabad"
5182,4 BHK Apartment for Sale in Shilaj Ahmedabad
1952,3 BHK Apartment for Sale in Chandkheda Ahmedabad
4334,"3 BHK Apartment for Sale in Indraprasth 9, New Ranip Ahmedabad"
3374,"3 BHK Apartment for Sale in Gala Eternia, Thaltej Ahmedabad"
