In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression

In [3]:
df = pd.read_csv("/content/Housing.csv")
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [5]:
df.describe()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
count,545.0,545.0,545.0,545.0,545.0,545.0
mean,4766729.0,5150.541284,2.965138,1.286239,1.805505,0.693578
std,1870440.0,2170.141023,0.738064,0.50247,0.867492,0.861586
min,1750000.0,1650.0,1.0,1.0,1.0,0.0
25%,3430000.0,3600.0,2.0,1.0,1.0,0.0
50%,4340000.0,4600.0,3.0,1.0,2.0,0.0
75%,5740000.0,6360.0,3.0,2.0,2.0,1.0
max,13300000.0,16200.0,6.0,4.0,4.0,3.0


In [6]:
encoder = LabelEncoder()

In [7]:
encoding_col = ['mainroad','guestroom','basement','hotwaterheating','airconditioning','prefarea','furnishingstatus']
for col in encoding_col:
  df[col] = encoder.fit_transform(df[col])

In [8]:
df

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,1,0,1,0,0,2,0,2
541,1767150,2400,3,1,1,0,0,0,0,0,0,0,1
542,1750000,3620,2,1,1,1,0,0,0,0,0,0,2
543,1750000,2910,3,1,1,0,0,0,0,0,0,0,0


In [9]:
x = df.drop(columns = 'price', axis = 1)
y = df['price']

In [10]:
scaler = MinMaxScaler()
x = scaler.fit_transform(x)
y = scaler.fit_transform(y.values.reshape(-1,1))

In [11]:
x

array([[0.39656357, 0.6       , 0.33333333, ..., 0.66666667, 1.        ,
        0.        ],
       [0.5024055 , 0.6       , 1.        , ..., 1.        , 0.        ,
        0.        ],
       [0.57113402, 0.4       , 0.33333333, ..., 0.66666667, 1.        ,
        0.5       ],
       ...,
       [0.13539519, 0.2       , 0.        , ..., 0.        , 0.        ,
        1.        ],
       [0.08659794, 0.4       , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.15120275, 0.4       , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [13]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2, random_state = 42)


In [14]:
ln_model = LinearRegression()
ln_model.fit(x_train,y_train)

In [15]:
y_pred = ln_model.predict(x_test)

In [16]:
y_pred

array([[0.29902093],
       [0.47679688],
       [0.11366481],
       [0.24325469],
       [0.13705042],
       [0.15697668],
       [0.33726981],
       [0.40380776],
       [0.08708498],
       [0.07956179],
       [0.67710825],
       [0.09328411],
       [0.12516764],
       [0.13872416],
       [0.17003286],
       [0.30745353],
       [0.10717924],
       [0.26500431],
       [0.22796811],
       [0.15368764],
       [0.3503255 ],
       [0.35411261],
       [0.08746447],
       [0.26083032],
       [0.29911305],
       [0.49918119],
       [0.13027547],
       [0.30183242],
       [0.55658209],
       [0.14581525],
       [0.40640014],
       [0.13818223],
       [0.43223591],
       [0.20821964],
       [0.15923398],
       [0.34962129],
       [0.26133075],
       [0.22871723],
       [0.12706987],
       [0.25006031],
       [0.24001388],
       [0.15508953],
       [0.4751633 ],
       [0.19666803],
       [0.16900249],
       [0.22068221],
       [0.42900468],
       [0.194

In [17]:
mae = mean_absolute_error(y_test,y_pred)
mse = mean_squared_error(y_test,y_pred)

In [18]:
mae

0.08482075249315932

In [19]:
mse

0.013281243729270737

In [20]:
ln_acc = r2_score(y_test,y_pred)
ln_acc

0.6494754192267794