In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from joblib import dump, load


housing = pd.read_csv("data.csv")

X = housing[['ZN', 'RM']]
y = housing['MEDV']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

my_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="median")),
    ('std_scaler', StandardScaler())
])

X_train_preprocessed = my_pipeline.fit_transform(X_train)

model = LinearRegression()
model.fit(X_train_preprocessed, y_train)

dump(model, 'housing_price_prediction_model.joblib')

X_test_preprocessed = my_pipeline.transform(X_test)

y_pred = model.predict(X_test_preprocessed)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", rmse)

loaded_model = load('housing_price_prediction_model.joblib')

new_data = pd.DataFrame({'ZN': [20], 'RM': [6]})

new_data_preprocessed = my_pipeline.transform(new_data)

predicted_price = loaded_model.predict(new_data_preprocessed)
print("Predicted Price:", predicted_price)


Root Mean Squared Error: 6.478142921667525
Predicted Price: [20.40109094]
