In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

california = fetch_california_housing()
X = california.data
y = california.target

df = pd.DataFrame(X, columns=california.feature_names)
print("First few rows of the dataset:")
print(df.head())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Coefficients:")
for feature, coef in zip(california.feature_names, model.coef_):
    print(f"{feature}: {coef:.2f}")
print(f"\nMean Squared Error: {mse:.2f}")
print(f"R-squared Score: {r2:.2f}")

joblib.dump(model, 'housing_model.pkl')
print("\nModel saved as 'housing_model.pkl'")

First few rows of the dataset:
   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  
0    -122.23  
1    -122.22  
2    -122.24  
3    -122.25  
4    -122.25  

Model Coefficients:
MedInc: 0.45
HouseAge: 0.01
AveRooms: -0.12
AveBedrms: 0.78
Population: -0.00
AveOccup: -0.00
Latitude: -0.42
Longitude: -0.43

Mean Squared Error: 0.56
R-squared Score: 0.58

Model saved as 'housing_model.pkl'


LEARNING MACHINE LEARNING





