In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.impute import SimpleImputer
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# Load your dataset
data = pd.read_csv('Chennai houseing sale_2.csv')
print(data.head(5))

data.columns = data.columns.str.lower()
data.area = data.area.str.lower()

# Encode the area column
data.area = data.area.map({'karapakkam': 1,
                           'adyar': 2, 
                           'chrompet': 3,
                           'velachery': 4,
                           'kk nagar': 5, 
                           'anna nagar': 6,
                           't nagar': 7})

# Encode the park_facil column
data.park_facil = data.park_facil.map({'yes': 1, 'no': 0})

# Define features and target variables
features = data.drop(columns=['commis', 'sales_price', 'reg_fee'], axis=1)
target = data['sales_price']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize the imputer with strategy='mean' to impute missing values with the mean
imputer = SimpleImputer(strategy='mean')

# Fit and transform the imputer on the training data
X_train_imputed = imputer.fit_transform(X_train)

# Transform the test data using the fitted imputer
X_test_imputed = imputer.transform(X_test)

# Initialize the Linear Regression model
lr = LinearRegression()

# Fit the model on the training data
lr.fit(X_train_imputed, y_train)

# Predict on the test data
y_pred = lr.predict(X_test_imputed)

# Evaluate the model
print('R2 SCORE:', r2_score(y_test, y_pred))


         AREA  INT_SQFT  DIST_MAINROAD  N_BEDROOM  N_BATHROOM  N_ROOM  \
0  Karapakkam      1004            131        1.0         1.0       3   
1  Anna Nagar      1986             26        2.0         1.0       5   
2       Adyar       909             70        1.0         1.0       3   
3   Velachery      1855             14        3.0         2.0       5   
4  Karapakkam      1226             84        1.0         1.0       3   

  PARK_FACIL  REG_FEE  COMMIS  SALES_PRICE  
0        Yes   380000  144400      7600000  
1         No   760122  304049     21717770  
2        Yes   421094   92114     13159200  
3         No   356321   77042      9630290  
4        Yes   237000   74063      7406250  
R2 SCORE: 0.5352933447153949
