In [58]:
import numpy as np 
import pandas as pd 
import warnings 
warnings.filterwarnings('ignore')

## BASIC DATA UNDERSTANDING (Pandas)
- Load this CSV file using pandas.
- Show first 5 rows of the dataset.
- Show last 5 rows of the dataset.
- Display number of rows and columns.
- Print all column names.
- Check data types of each column.
- Find total missing values in each column.
- Show basic statistics (mean, min, max, std) of numerical columns.
- Count how many houses are furnished, semi-furnished, unfurnished.
- Find unique values in furnishingstatus.

In [59]:
# 1. Load this CSV file using pandas 
df = pd.read_csv('Housing.csv')

In [60]:
# 1 Show first 5 rows of the dataset.
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,Unnamed: 13
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished,
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished,
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished,
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished,
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished,


In [61]:
# 2 Show last 5 rows of the dataset.
df.tail()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,Unnamed: 13
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished,
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished,
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished,
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished,
544,1750000,3850,3,1,2,yes,no,no,no,no,0,no,unfurnished,


In [62]:
# 4. Display number of rows and columns.
df.shape

(545, 14)

In [63]:
# 5 Print all column names.
df.columns

Index(['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus', 'Unnamed: 13'],
      dtype='object')

In [64]:
# 6 Check data types of each column. 
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
 13  Unnamed: 13       1 non-null      object
dtypes: int64(6), object(8)
memory usage: 59.7+ KB


In [65]:
# 7 .  Find total missing values in each column.
df.isna().sum()

price                 0
area                  0
bedrooms              0
bathrooms             0
stories               0
mainroad              0
guestroom             0
basement              0
hotwaterheating       0
airconditioning       0
parking               0
prefarea              0
furnishingstatus      0
Unnamed: 13         544
dtype: int64

In [66]:
# 8 - Show basic statistics (mean, min, max, std) of numerical columns. 
df.describe()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
count,545.0,545.0,545.0,545.0,545.0,545.0
mean,4766729.0,5150.541284,2.965138,1.286239,1.805505,0.693578
std,1870440.0,2170.141023,0.738064,0.50247,0.867492,0.861586
min,1750000.0,1650.0,1.0,1.0,1.0,0.0
25%,3430000.0,3600.0,2.0,1.0,1.0,0.0
50%,4340000.0,4600.0,3.0,1.0,2.0,0.0
75%,5740000.0,6360.0,3.0,2.0,2.0,1.0
max,13300000.0,16200.0,6.0,4.0,4.0,3.0


In [67]:
# 9 Count how many houses are furnished, semi-furnished, unfurnished. 
furnished_house =   df[df['furnishingstatus'] == "furnished"]['furnishingstatus'].count()
unfurnished_house = df[df['furnishingstatus'] == "unfurnished" ]['furnishingstatus'].count()
semi_furnished_house = df[df['furnishingstatus'] == "semi-furnished" ]['furnishingstatus'].count()

In [68]:
print("total --", unfurnished_house)

total -- 178


In [69]:
# 10 - Find unique values in furnishingstatus. 
df['furnishingstatus'].unique()

array(['furnished', 'semi-furnished', 'unfurnished'], dtype=object)

## DATA CLEANING & TRANSFORMATION

- Convert yes/no columns into 1/0 using pandas.
- Remove extra spaces from furnishingstatus.
- Rename all column names into lowercase.
- Replace semi-furnished with semi_furnished.
- Check if any duplicate rows exist and remove them.
- Convert price column into lakhs (price/100000).
- Create a new column: price_per_sqft.
- Convert furnishingstatus into numerical labels (0,1,2).

In [70]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,Unnamed: 13
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished,
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished,
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished,
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished,
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished,


In [71]:
# 11 - Convert yes/no columns into 1/0 using pandas.

yes_no_columns = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

df[yes_no_columns] = df[yes_no_columns].replace({'yes':1, 'no':0})

In [72]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,Unnamed: 13
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,furnished,
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,furnished,
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,semi-furnished,
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,furnished,
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,furnished,


In [73]:
# 12 - Remove extra spaces from furnishingstatus.
df['furnishingstatus'] = df['furnishingstatus'].replace(" ", '')

In [74]:
# 13 - Rename all column names into lowercase.
df.columns = df.columns.str.lower()

In [75]:
# 14 Replace semi-furnished with semi_furnished. 
df['furnishingstatus'] = df['furnishingstatus'].replace('semi-furnished', "semi_furnished")

In [76]:
# 15 - Check if any duplicate rows exist and remove them.
df.duplicated().sum()

np.int64(0)

In [77]:
# 16. - Convert price column into lakhs (price/100000).
df['price_in_lac'] = df['price']/ 100000

In [78]:
# 17 - Create a new column: price_per_sqft.
df['price_per_sqft'] = df['price'] / df['area']

In [79]:
# 18 - Convert furnishingstatus into numerical labels (0,1,2)
df['furnishingstatus'].unique()
df['furnishingstatus'] = df['furnishingstatus'].replace({"furnished":0, "semi_furnished":1, "unfurnished":2})

In [151]:
# 19 remove column 
df.drop('unnamed: 13', axis=1)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,price_in_lac,price_per_sqft
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,133.0000,1792.452830
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,122.5000,1367.187500
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,122.5000,1229.919679
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,122.1500,1628.666667
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,114.1000,1537.735849
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,1,0,1,0,0,2,0,2,18.2000,606.666667
541,1767150,2400,3,1,1,0,0,0,0,0,0,0,1,17.6715,736.312500
542,1750000,3620,2,1,1,1,0,0,0,0,0,0,2,17.5000,483.425414
543,1750000,2910,3,1,1,0,0,0,0,0,0,0,0,17.5000,601.374570


## REAL-WORLD BUSINESS QUESTIONS
- What is the average house price?
- What is the average price of furnished houses?
- What is the average price of unfurnished houses?
- Which area has the maximum house price?
- What is the average area of houses?
- How many houses have 3 bedrooms?
- How many houses have parking = 2?
- Find top 5 most expensive houses.
- Find top 5 cheapest houses.

In [81]:
# - What is the average house price?
df['price'].mean()

np.float64(4766729.247706422)

In [82]:
#  - What is the average price of furnished houses?
df[df['furnishingstatus'] == 0]['price'].mean()

np.float64(5495696.0)

In [83]:
# - What is the average price of unfurnished houses?
df[df['furnishingstatus'] == 2]['price'].mean()

np.float64(4013831.4606741574)

In [84]:
# ? - Which area has the maximum house price?
df[df['price'] == df['price'].max()][['area', 'price']]

Unnamed: 0,area,price
0,7420,13300000


In [85]:
# - What is the average area of houses? 
df['area'].mean()

np.float64(5150.54128440367)

In [86]:
#   How many houses have 3 bedrooms? 
df[df['bathrooms'] == 3].shape[0]

10

In [87]:
#  How many houses have parking = 2? 
df[df['parking'] == 2 ].shape[0]

108

In [88]:
# Find top 5 most expensive houses. 
df['price'].sort_values(ascending=False).head()

0    13300000
2    12250000
1    12250000
3    12215000
4    11410000
Name: price, dtype: int64

In [89]:
#  - Find top 5 cheapest houses. 
# df[df['price'].sort_values()[3] == df['price']]

## GROUPBY & AGGREGATION (Real Analytics)
- Average price based on furnishingstatus.
- Average price based on bedrooms.
- Average price based on bathrooms.
- Average price based on stories.
- Average price for houses with airconditioning = yes.
- Average price for houses near mainroad.
- Count houses based on prefarea.
- Average area based on bedrooms.
- Average parking based on furnishingstatus.
- Which bedroom count has highest average price?

In [90]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,unnamed: 13,price_in_lac,price_per_sqft
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,,133.0,1792.45283
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,,122.5,1367.1875
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,,122.5,1229.919679
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,,122.15,1628.666667
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,,114.1,1537.735849


In [91]:
df['price'].mean()

np.float64(4766729.247706422)

In [92]:
#  - Average price based on furnishingstatus.
df.groupby('furnishingstatus')['price'].mean() / 100000

furnishingstatus
0    54.956960
1    49.075242
2    40.138315
Name: price, dtype: float64

In [93]:
df['bedrooms'].unique()

array([4, 3, 5, 2, 6, 1])

In [94]:
# - Average price based on bedrooms. 
df.groupby('bedrooms')['price'].mean() / 100000

bedrooms
1    27.125000
2    36.320221
3    49.545981
4    57.297579
5    58.198000
6    47.915000
Name: price, dtype: float64

In [95]:
#  - Average price based on bathrooms.
df.groupby('bathrooms')['price'].mean()

bathrooms
1    4.206913e+06
2    6.209206e+06
3    7.282100e+06
4    1.225000e+07
Name: price, dtype: float64

In [96]:
# - Average price based on stories. 
df.groupby('stories')['price'].mean() / 100000

stories
1    41.706586
2    47.640735
3    56.854359
4    72.084498
Name: price, dtype: float64

In [97]:
# - Average price for houses with airconditioning = yes. 
df[df['airconditioning'] == 1]['price'].mean()

np.float64(6013220.5813953485)

In [98]:
# - Average price for houses near mainroad. 
df[df['mainroad'] == 1]['price'].mean()

np.float64(4991777.329059829)

In [99]:
#   Count houses based on prefarea 
df[df['prefarea'] == 1 ].shape[0]

128

In [100]:
# - Average area based on bedrooms. 
df.groupby('bedrooms')['area'].mean()

bedrooms
1    3710.000000
2    4636.235294
3    5226.620000
4    5582.063158
5    6291.500000
6    3950.000000
Name: area, dtype: float64

In [101]:
#  - Average parking based on furnishingstatus.
df.groupby('furnishingstatus')['parking'].mean()

furnishingstatus
0    0.885714
1    0.735683
2    0.488764
Name: parking, dtype: float64

In [102]:
# - Which bedroom count has highest average price?
df.groupby('bedrooms')['price'].mean()


bedrooms
1    2.712500e+06
2    3.632022e+06
3    4.954598e+06
4    5.729758e+06
5    5.819800e+06
6    4.791500e+06
Name: price, dtype: float64

In [103]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,unnamed: 13,price_in_lac,price_per_sqft
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,,133.0,1792.45283
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,,122.5,1367.1875
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,,122.5,1229.919679
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,,122.15,1628.666667
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,,114.1,1537.735849


## FILTERING (Real Buyer Queries)

- Find houses with:
price < 10,000,000
bedrooms >= 3
airconditioning = yes

- Find houses which are:
furnished
have parking >= 2

- Find houses with:
area > 8000
bathrooms >= 2

- Find houses in preferred area with AC.

- Find unfurnished houses with price < average price.

In [104]:
df.shape

(545, 16)

In [121]:
#  Find houses with:
# price < 10,000,000
# bedrooms >= 3
# airconditioning = yes

# df[df['price'] < 10000000]
# df[df['bathrooms'] >= 3] 
# df[df['airconditioning'] == 1]

# df[(df['price'] < 20000000) & (df['bathrooms'] >=3 ) & (df['airconditioning'] == 1) ]

In [128]:
# Find houses which are: furnished have parking >= 2 
# df[(df['furnishingstatus'] == 0) & (df['parking'] >= 2)]

In [134]:
# Find houses with: area > 8000 bathrooms >= 2 
# df[(df['area'] > 8000) & (df['bathrooms'] >= 2)]

In [138]:
# - Find houses in preferred area with AC. 
# df[(df['prefarea']==1)& (df['airconditioning']==1)]

In [144]:
# - Find unfurnished houses with price < average price. 
# avg_price = df['price'].mean()
# df[(df['furnishingstatus']== 2)&(df['price'] < avg_price)]

## NUMPY OPERATIONS (Important)

- Convert price column to NumPy array.
- Find mean, median, std using NumPy.
- Find max and min price using NumPy.
- Sort price array using NumPy.
- Find top 3 prices using NumPy.
- Calculate correlation between price and area using NumPy.
- Count how many prices are above average (NumPy).
- Compute variance of area using NumPy.

In [160]:
# Convert price column to NumPy array. 
np_price = df['price'].to_numpy()

In [172]:
# - Find mean, median, std using NumPy. 
price_mean = np_price.mean()
price_median = np.median(np_price) 
price_std = np_price.std()

In [176]:
# - Find max and min price using NumPy.
max_price = np_price.max()
min_price = np_price.min()

In [200]:
# - Sort price array using NumPy 
# np.sort(np_price)[::-1][0:4]

In [205]:
# Count how many prices are above average (NumPy). 
# (np_price >= price_mean).sum()

In [214]:
# - Compute variance of area using NumPy. 
np_area = df['area'].to_numpy()
np.var(np_area)


np.float64(4700870.7510478925)

In [242]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,price_in_lac,price_per_sqft,luxury,bog_house,total_rooms,big_house
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,133.0,1792.45283,1,0,4,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,122.5,1367.1875,1,1,8,1
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,122.5,1229.919679,1,1,4,1
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,122.15,1628.666667,1,0,4,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,114.1,1537.735849,1,0,2,0


## FEATURE ENGINEERING (ML Ready)

- Create new column luxury:
- If price > average → luxury = 1 else 0
- Create new column big_house:
- area > 8000 → 1 else 0
- Create column total_rooms = bedrooms + bathrooms.
- Encode yes/no columns into numeric.
- Prepare X (features) and y (target price).

In [222]:
# Create new column luxury: 
df['luxury'] = df['price'] > df['price'].mean()


In [239]:
# Create new column big_house: 
df['big_house'] = df['area'] > 8000

In [225]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,price_in_lac,price_per_sqft,luxury,bog_house
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,133.0,1792.45283,True,False
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,122.5,1367.1875,True,True
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,122.5,1229.919679,True,True
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,122.15,1628.666667,True,False
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,114.1,1537.735849,True,False


In [226]:
# Create column total_rooms = bedrooms + bathrooms 
df['total_rooms'] = df['bathrooms'] + df['bathrooms']

In [240]:
# Encode yes/no columns into numeric. 
df[['luxury', 'big_house']] = df[['luxury', 'big_house']].replace({True:1, False:0})

In [241]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,price_in_lac,price_per_sqft,luxury,bog_house,total_rooms,big_house
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,133.0,1792.45283,1,0,4,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,122.5,1367.1875,1,1,8,1
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,122.5,1229.919679,1,1,4,1
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,122.15,1628.666667,1,0,4,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,114.1,1537.735849,1,0,2,0


In [247]:
# Prepare X (features) and y (target price). 
X  = df[[ 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus', 'price_in_lac',
       'price_per_sqft', 'luxury', 'bog_house', 'total_rooms', 'big_house']]
y = df['price']

In [254]:
X = df.drop('price', axis=1)
y = df['price']
y.head()

0    13300000
1    12250000
2    12250000
3    12215000
4    11410000
Name: price, dtype: int64

## PROJECT-LEVEL QUESTIONS

- Which 3 features affect price the most?
- Does furnishing increase price? (compare means)
- Does parking increase price?
- Does preferred area increase price?
- Does airconditioning increase price?
- Which combination gives highest price:
- furnished + AC
- unfurnished + no AC
- Create a price prediction dataset (numerical only).
- Export cleaned dataset to new CSV file.
- Create summary report using pandas.
- Build a small function that returns house price stats

In [255]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,price_in_lac,price_per_sqft,luxury,bog_house,total_rooms,big_house
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,133.0,1792.45283,1,0,4,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,122.5,1367.1875,1,1,8,1
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,122.5,1229.919679,1,1,4,1
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,122.15,1628.666667,1,0,4,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,114.1,1537.735849,1,0,2,0


In [257]:
df.to_csv('Clean_Data.csv')

In [259]:
df.describe()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,price_in_lac,price_per_sqft,luxury,bog_house,total_rooms,big_house
count,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0,545.0
mean,4766729.0,5150.541284,2.965138,1.286239,1.805505,0.858716,0.177982,0.350459,0.045872,0.315596,0.693578,0.234862,1.069725,47.667292,993.326978,0.407339,0.097248,2.572477,0.097248
std,1870440.0,2170.141023,0.738064,0.50247,0.867492,0.348635,0.382849,0.477552,0.209399,0.46518,0.861586,0.424302,0.761373,18.704396,346.537025,0.49179,0.296567,1.004939,0.296567
min,1750000.0,1650.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.5,270.39555,0.0,0.0,2.0,0.0
25%,3430000.0,3600.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,34.3,745.37037,0.0,0.0,2.0,0.0
50%,4340000.0,4600.0,3.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,43.4,952.380952,0.0,0.0,2.0,0.0
75%,5740000.0,6360.0,3.0,2.0,2.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,2.0,57.4,1184.615385,1.0,0.0,4.0,0.0
max,13300000.0,16200.0,6.0,4.0,4.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,2.0,133.0,2640.0,1.0,1.0,8.0,1.0


In [282]:
# - Build a small function that returns house price stats
def get_states(data):
    return f"Mean:- {data['price'].mean()},  MAX:- {data['price'].max()}, MIN :- {data['price'].min()}"


In [283]:
get_states(df)

'Mean:- 4766729.247706422,  MAX:- 13300000, MIN :- 1750000'