In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from joblib import dump

In [12]:
# Load the Boston Housing Dataset
url = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
df = pd.read_csv(url)

In [13]:
# Separate the features (X) and target (y)
X = df.drop('medv', axis=1)
y = df['medv']
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Train the linear regression model
lr = LinearRegression()
lr.fit(X_train, y_train)
# Make predictions on the test set
y_pred = lr.predict(X_test)

In [15]:
# Calculate the performance metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
# Print the performance metrics
print(f"Mean Squared Error: {mse:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"R-squared: {r2:.2f}")

Mean Squared Error: 24.29
Root Mean Squared Error: 4.93
R-squared: 0.67


In [8]:
dump(lr, '../models/model_v1.joblib')

['../models/model_v1.joblib']

In [36]:
test = pd.read_csv('../data/test_data.csv', index_col=0)
lr.predict(test)[0]

29.95339637733573

In [37]:
json = pd.read_csv('../data/test_data.csv', index_col=0).to_json()
json

'{"crim":{"0":0.00632},"zn":{"0":18.0},"indus":{"0":2.31},"chas":{"0":0},"nox":{"0":0.538},"rm":{"0":6.575},"age":{"0":65.2},"dis":{"0":4.09},"rad":{"0":1},"tax":{"0":296},"ptratio":{"0":15.3},"b":{"0":396.9},"lstat":{"0":4.98}}'