In [2]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer

import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# Load your dataset
data = pd.read_csv('Chennai houseing sale_2.csv')
print(data.head(5))

data.columns = data.columns.str.lower()
data.area = data.area.str.lower()

# Encode the area column
data.area = data.area.map({'karapakkam': 1,
                           'adyar': 2, 
                           'chrompet': 3,
                           'velachery': 4,
                           'kk nagar': 5, 
                           'anna nagar': 6,
                           't nagar': 7})

# Encode the park_facil column
data.park_facil = data.park_facil.map({'yes': 1, 'no': 0})

# Define features and target variables
X = data.drop(columns=['commis', 'sales_price', 'reg_fee'], axis=1)
y = data['sales_price']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Initialize the imputer with strategy='mean' to impute missing values with the mean
imputer = SimpleImputer(strategy='mean')

# Fit and transform the imputer on the training data
X_train_imputed = imputer.fit_transform(X_train)

# Transform the test data using the fitted imputer
X_test_imputed = imputer.transform(X_test)



# Data preprocessing: Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Training the k-NN regression model
k = 5  # Number of neighbors
knn_regressor = KNeighborsRegressor(n_neighbors=k)
knn_regressor.fit(X_train_scaled, y_train)

# Predicting house prices
y_pred = knn_regressor.predict(X_test_scaled)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)



# Manually input features for a new house
print("Enter features for the new house:")

area = int(input("Area (zipcode, district, etc.): "))
sqft = int(input("Square footage: "))
dist_main = int(input("Distance to main area (in miles): "))
bedrooms = int(input("Number of bedrooms: "))
bathrooms = int(input("Number of bathrooms: "))
room=int(input("Number of rooms:"))
park = int(input("Nearby park (1 for yes, 0 for no): "))







# Example prediction for a new house
new_house_features = np.array([[area,sqft,dist_main,bedrooms,bathrooms,room, park]]) # Example features for a new house
new_house_features_scaled = scaler.transform(new_house_features)
predicted_price = knn_regressor.predict(new_house_features_scaled)
print("Predicted Price for the New House:", predicted_price[0])


         AREA  INT_SQFT  DIST_MAINROAD  N_BEDROOM  N_BATHROOM  N_ROOM  \
0  Karapakkam      1004            131        1.0         1.0       3   
1  Anna Nagar      1986             26        2.0         1.0       5   
2       Adyar       909             70        1.0         1.0       3   
3   Velachery      1855             14        3.0         2.0       5   
4  Karapakkam      1226             84        1.0         1.0       3   

  PARK_FACIL  REG_FEE  COMMIS  SALES_PRICE  NULL  
0        Yes   380000  144400      7600000     1  
1         No   760122  304049     21717770     1  
2        Yes   421094   92114     13159200     1  
3         No   356321   77042      9630290     1  
4        Yes   237000   74063      7406250     1  
Mean Squared Error: 6901551044035.617
Enter features for the new house:
Area (zipcode, district, etc.): 1
Square footage: 1004
Distance to main area (in miles): 131
Number of bedrooms: 2
Number of bathrooms: 2
Number of rooms:4
Nearby park (1 for yes, 0 f