In [1]:
from google.colab import files
uploaded = files.upload()

import pandas as pd
import io

df = pd.read_csv(io.BytesIO(uploaded['House Price India.csv']))
df.head()


Saving House Price India.csv to House Price India.csv


Unnamed: 0,id,Date,number of bedrooms,number of bathrooms,living area,lot area,number of floors,waterfront present,number of views,condition of the house,...,Built Year,Renovation Year,Postal Code,Lattitude,Longitude,living_area_renov,lot_area_renov,Number of schools nearby,Distance from the airport,Price
0,6762810145,42491,5,2.5,3650,9050,2.0,0,4,5,...,1921,0,122003,52.8645,-114.557,2880,5400,2,58,2380000
1,6762810635,42491,4,2.5,2920,4000,1.5,0,0,5,...,1909,0,122004,52.8878,-114.47,2470,4000,2,51,1400000
2,6762810998,42491,5,2.75,2910,9480,1.5,0,0,3,...,1939,0,122004,52.8852,-114.468,2940,6600,1,53,1200000
3,6762812605,42491,4,2.5,3310,42998,2.0,0,0,3,...,2001,0,122005,52.9532,-114.321,3350,42847,3,76,838000
4,6762812919,42491,3,2.0,2710,4500,1.5,0,0,4,...,1929,0,122006,52.9047,-114.485,2060,4500,1,51,805000


In [2]:
# Check and clean column names
print(df.columns)

# Drop 'Date' and unnecessary columns
df.drop(['id', 'Date'], axis=1, inplace=True)

# Convert 'number of bedrooms' to integer
df['number of bedrooms'] = df['number of bedrooms'].astype(int)

# Rename columns for convenience (optional but clean)
df.rename(columns={
    'number of bedrooms': 'Bedrooms',
    'number of bathrooms': 'Bathrooms',
    'living area': 'LivingArea',
    'lot area': 'LotArea',
    'number of floors': 'Floors',
    'waterfront present': 'Waterfront',
    'number of views': 'Views',
    'condition of the house': 'Condition',
    'grade of the house': 'Grade',
    'Area of the house(excluding basement)': 'HouseArea',
    'Area of the basement': 'BasementArea',
    'Built Year': 'YearBuilt',
    'Renovation Year': 'YearRenovated',
    'Postal Code': 'Zipcode',
    'Lattitude': 'Latitude',
    'Longitude': 'Longitude',
    'living_area_renov': 'LivingAreaRenov',
    'lot_area_renov': 'LotAreaRenov',
    'Number of schools nearby': 'SchoolsNearby',
    'Distance from the airport': 'AirportDistance',
    'Price': 'Price'
}, inplace=True)

# Handle missing values
df.fillna(df.median(numeric_only=True), inplace=True)


Index(['id', 'Date', 'number of bedrooms', 'number of bathrooms',
       'living area', 'lot area', 'number of floors', 'waterfront present',
       'number of views', 'condition of the house', 'grade of the house',
       'Area of the house(excluding basement)', 'Area of the basement',
       'Built Year', 'Renovation Year', 'Postal Code', 'Lattitude',
       'Longitude', 'living_area_renov', 'lot_area_renov',
       'Number of schools nearby', 'Distance from the airport', 'Price'],
      dtype='object')


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Features and target
X = df.drop(['Price'], axis=1)
y = df['Price']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [4]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define the model
model = Sequential()

# Input layer and first hidden layer
model.add(Dense(units=64, activation='relu', input_shape=(X_train.shape[1],)))

# Second hidden layer
model.add(Dense(units=32, activation='relu'))

# Output layer
model.add(Dense(units=1))  # Regression output (no activation)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1)


Epoch 1/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 417374633984.0000 - mae: 536725.5000 - val_loss: 394274209792.0000 - val_mae: 520506.8438
Epoch 2/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 420192813056.0000 - mae: 539943.6250 - val_loss: 385610612736.0000 - val_mae: 514916.9375
Epoch 3/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 413268279296.0000 - mae: 529919.1250 - val_loss: 364417581056.0000 - val_mae: 500988.1250
Epoch 4/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 382091722752.0000 - mae: 513778.9688 - val_loss: 328351383552.0000 - val_mae: 476137.6875
Epoch 5/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 325963284480.0000 - mae: 480338.9375 - val_loss: 280193630208.0000 - val_mae: 439782.1875
Epoch 6/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[

In [6]:
# Evaluate on test data
loss, mae = model.evaluate(X_test, y_test)
print(f"Test MAE: {mae:.2f}")


[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 32360904704.0000 - mae: 109688.9062
Test MAE: 108059.30
