# ML Model Inference

This notebook performs inference (predictions) using the trained linear regression model.

## Workflow:
1. Load trained model
2. Load new data for prediction
3. Make predictions
4. Save results

import os
import sys
import json
import joblib
import pandas as pd
import numpy as np
from datetime import datetime

# Add src directory to path
sys.path.insert(0, '/Workspace/ml-regression-model-dev/src')

# For development, you can use:
# sys.path.insert(0, '/Workspace/ml-regression-model-prod/src')

print("Libraries imported successfully")

## Load Configuration

# Load configuration
config_path = '/Workspace/ml-regression-model-dev/config/model_config.yaml'
import yaml

with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

print("Configuration loaded")
print(f"Model Type: {config['model']['type']}")
print(f"Features: {config['model']['features']}")

## Load Trained Model

# Path to trained model
model_path = '/Workspace/ml-regression-model-dev/models/trained_model.pkl'

# Check if model exists
try:
    model = joblib.load(model_path)
    print(f" Model loaded from {model_path}")
    print(f"Model type: {type(model).__name__}")
    print(f"Model coefficients: {model.coef_}")
    print(f"Model intercept: {model.intercept_}")
except FileNotFoundError:
    print(f" Model not found at {model_path}")
    print("Please run the training notebook first.")
    raise

## Load Inference Data

# For demo, we'll use Boston Housing test data
from sklearn.datasets import load_boston

# Load the Boston Housing dataset
boston = load_boston()
X_data = pd.DataFrame(boston.data, columns=boston.feature_names)
y_data = pd.Series(boston.target, name='target')

# Use last 10 samples for inference
X_inference = X_data.iloc[-10:].copy()
y_actual = y_data.iloc[-10:].copy()

print(f" Inference data loaded")
print(f"Shape: {X_inference.shape}")
print(f"\nFirst few rows:")
print(X_inference.head())

## Make Predictions

# Make predictions
predictions = model.predict(X_inference)

print(f" Predictions completed")
print(f"Number of predictions: {len(predictions)}")
print(f"\nPredictions:\n{predictions}")

## Evaluate Predictions

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Calculate metrics
mse = mean_squared_error(y_actual, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_actual, predictions)
r2 = r2_score(y_actual, predictions)

print(f" Inference Metrics:")
print(f"  MSE:  {mse:.4f}")
print(f"  RMSE: {rmse:.4f}")
print(f"  MAE:  {mae:.4f}")
print(f"  R²:   {r2:.4f}")

## Save Predictions

# Create results dataframe
results = pd.DataFrame({
    'actual': y_actual.values,
    'predicted': predictions,
    'error': np.abs(y_actual.values - predictions),
    'relative_error': np.abs((y_actual.values - predictions) / y_actual.values * 100)
})

print(" Results DataFrame:")
print(results)

# Save predictions
output_path = '/Workspace/ml-regression-model-dev/outputs/inference_results.csv'
results.to_csv(output_path, index=False)
print(f"\n Predictions saved to {output_path}")

## Save Inference Metrics

# Save inference metrics as JSON
inference_metrics = {
    'timestamp': datetime.now().isoformat(),
    'num_samples': len(predictions),
    'mse': float(mse),
    'rmse': float(rmse),
    'mae': float(mae),
    'r2': float(r2),
    'predictions_sample': predictions[:5].tolist()
}

metrics_path = '/Workspace/ml-regression-model-dev/outputs/inference_metrics.json'
with open(metrics_path, 'w') as f:
    json.dump(inference_metrics, f, indent=2)

print(f" Inference metrics saved to {metrics_path}")
print(f"\nMetrics:")
print(json.dumps(inference_metrics, indent=2))

## Inference Summary

print("="*60)
print("INFERENCE SUMMARY")
print("="*60)
print(f" Model loaded: {model_path}")
print(f" Samples processed: {len(predictions)}")
print(f" Predictions saved: {output_path}")
print(f" Metrics saved: {metrics_path}")
print(f"\nPerformance Metrics:")
print(f"  RMSE: {rmse:.4f}")
print(f"  MAE:  {mae:.4f}")
print(f"  R²:   {r2:.4f}")
print("="*60)
print(" Inference completed successfully!")
print("="*60)