### Ensuring Consistency Across Training & Inference Datasets: Feature Scaling
**Question**: Load a dataset (e.g., Boston Housing) and perform feature scaling. Ensure the
same scaling is applied during model inference with new data.

In [1]:
# write your code from here
# --- Step 1: Import Required Libraries ---
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import joblib

# --- Step 2: Load Dataset with Error Handling ---
def load_data():
    try:
        data = fetch_california_housing()
        X = pd.DataFrame(data.data, columns=data.feature_names)
        y = pd.Series(data.target, name="MedHouseValue")
        return X, y
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return pd.DataFrame(), pd.Series()

X, y = load_data()

# --- Step 3: Check for Missing Values ---
if X.isnull().sum().sum() > 0 or y.isnull().sum() > 0:
    print("Missing values found. Handling them...")
    X.fillna(X.mean(), inplace=True)
    y.fillna(y.mean(), inplace=True)

# --- Step 4: Train-Test Split ---
try:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
except ValueError as e:
    print(f"Error in train-test split: {e}")
    X_train = X_test = y_train = y_test = pd.DataFrame()

# --- Step 5: Feature Scaling with Error Handling ---
scaler = StandardScaler()
try:
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
except Exception as e:
    print(f"Error during scaling: {e}")
    X_train_scaled = X_test_scaled = np.array([])

# --- Step 6: Train Model ---
model = LinearRegression()
try:
    model.fit(X_train_scaled, y_train)
except Exception as e:
    print(f"Error training model: {e}")

# --- Step 7: Inference on New Data (Simulate One Row) ---
def predict_new_data(new_data):
    try:
        if new_data.isnull().sum().sum() > 0:
            print("New data has missing values. Filling with mean.")
            new_data.fillna(X.mean(), inplace=True)
        new_data_scaled = scaler.transform(new_data)
        prediction = model.predict(new_data_scaled)
        return prediction[0]
    except Exception as e:
        print(f"Prediction error: {e}")
        return None

new_data = X.iloc[[0]]  # simulate one new data row
predicted_value = predict_new_data(new_data)

if predicted_value is not None:
    print(f"Predicted Median House Value: {predicted_value:.2f}")

# --- Step 8: Save Model & Scaler ---
try:
    joblib.dump(model, 'california_model.pkl')
    joblib.dump(scaler, 'california_scaler.pkl')
except Exception as e:
    print(f"Error saving model or scaler: {e}")

Predicted Median House Value: 4.15
