In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler  # For feature scaling
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [None]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials




In [None]:
# Authenticate and create a PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
# Mount Google Drive correctly
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd

# Correct file path after mounting Google Drive
file_path = '/content/drive/MyDrive/house_prices3.csv'

# Read CSV file
data = pd.read_csv(file_path)

# Display data information
print(data.head())
print(data.info())
print(data.describe())


Mounted at /content/drive
   House Size (sq ft)  Number of Bedrooms  Location (1-5)  Age (years)  \
0                2081                   3               4           32   
1                2727                   4               3           39   
2                1518                   5               5           35   
3                1436                   2               2            4   
4                2251                   5               5           14   

   Price ($)  
0     282948  
1     313853  
2     310886  
3     245330  
4     377906  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype
---  ------              --------------  -----
 0   House Size (sq ft)  3000 non-null   int64
 1   Number of Bedrooms  3000 non-null   int64
 2   Location (1-5)      3000 non-null   int64
 3   Age (years)         3000 non-null   int64
 4   Price ($)           3000 non-null   int64
dtyp

In [None]:
# Define features (X) and target (y)
X = data[["House Size (sq ft)", "Number of Bedrooms", "Location (1-5)", "Age (years)"]]
y = data["Price ($)"]
print("X\n",X[:3])
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



X
    House Size (sq ft)  Number of Bedrooms  Location (1-5)  Age (years)
0                2081                   3               4           32
1                2727                   4               3           39
2                1518                   5               5           35


In [None]:
# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("X_train\n",X_train[:3])
print("X_train_scaled\n",X_train_scaled[:3])


X_train
      House Size (sq ft)  Number of Bedrooms  Location (1-5)  Age (years)
642                1183                   4               5           16
700                1089                   4               4            8
226                2105                   2               2           41
X_train_scaled
 [[-1.44903586  0.43884366  1.42940418 -0.68620214]
 [-1.61163638  0.43884366  0.72236172 -1.23713804]
 [ 0.14583306 -1.34175613 -0.69172321  1.03547255]]


In [None]:
# Create a Linear Regression model
model = LinearRegression()
# Train the model on the training data
model.fit(X_train_scaled, y_train)


In [None]:
# Print the coefficients of the linear regression equation
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)



Coefficients: [ 28653.58587166  22392.31835952  21160.34972558 -14516.86858164]
Intercept: 289980.8675


In [None]:
# Make predictions on the test data
y_pred = model.predict(X_test_scaled)
print(y_pred)

[282090.25867978 313870.97191621 280593.45133365 258721.08539727
 270627.55492326 247155.33730242 390018.93330025 279744.86706167
 306782.56809996 377535.98867436 316518.01341966 314079.47553066
 310280.79393708 290227.51737072 334607.72611358 309449.25025385
 279646.74976847 237464.23242286 266124.53752908 272064.28553025
 302101.29753405 323861.17442113 251284.04823782 340353.52878379
 276654.48409762 347564.9537807  233203.78548291 302287.00985326
 213703.4291504  336863.03980854 279942.46360369 283772.52967831
 297692.56551452 263970.62187088 305609.4363682  221845.79559394
 237551.25811164 317756.47003539 301082.98008876 299764.17783943
 308409.11543108 228211.11921601 341316.21008676 366271.09585108
 294904.30901993 285286.45586598 338257.0851669  288238.37879312
 248080.39190415 361607.76213076 189648.50450292 317556.50194086
 331671.4565965  253575.84893839 274849.19714646 342091.22659885
 387332.65447311 285102.7058826  239942.55358252 226339.59619444
 263835.67249022 265083.7

In [None]:
#Convert y_test and y_pred to Pandas Series for easier indexing & prevent "iloc" run error
y_test_series = pd.Series(y_test)
y_pred_series = pd.Series(y_pred)

# Calculate MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mape = mean_absolute_percentage_error(y_test_series, y_pred_series)
print(f"Mean Absolute Percentage Error: {mape:.2f}%")


Mean Absolute Percentage Error: 1.78%


In [None]:
# Get user input each feature value for predict house price
House_size = float(input("Enter House_size (sq ft): "))
Number_of_Bedrooms = float(input("Enter Number of Bedrooms: "))
Location = float(input("Enter Location (1-5): "))
Age = float(input("Enter Age of House (years): "))

Enter House_size (sq ft): 1000
Enter Number of Bedrooms: 10
Enter Location (1-5): 4
Enter Age of House (years): 2


In [None]:
# Predict the y-value from new data.
#new_data = [[House_size, Number_of_Bedrooms, Location, Age]]  # new data
new_data = [House_size, Number_of_Bedrooms, Location, Age]  # new data
new_data_scaled = scaler.transform([new_data])
y_pred_new = model.predict(new_data_scaled)
print("Predicted price:", y_pred_new)

Predicted price: [408075.6355318]


