# Locally Weighted Linear Regression (LWLR)

**Purpose:** Implement non-parametric regression using Gaussian kernel weighting.

**Key Concepts:**
- Non-parametric regression (no fixed model form)
- Gaussian kernel weighting
- Bandwidth selection (k parameter)
- Bias-variance tradeoff

**When to use LWLR:**
- Non-linear relationships
- Local patterns in data
- When global linear model fails

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("‚úÖ Libraries imported")

## 1. Core LWLR Functions

In [None]:
def lwlr(testPoint, xArr, yArr, k=1.0):
    """
    Locally Weighted Linear Regression for single point
    
    Algorithm:
        1. Calculate distance from testPoint to all training points
        2. Assign weights using Gaussian kernel: w = exp(-distance¬≤/2k¬≤)
        3. Fit weighted linear regression
        4. Return prediction
    
    Parameters:
        testPoint: Point to predict (1D array)
        xArr: Training features
        yArr: Training targets
        k: Bandwidth parameter
           - Small k (e.g., 0.01): Very local fit, high variance
           - Medium k (e.g., 1.0): Balanced
           - Large k (e.g., 10): Global fit, high bias
    
    Returns:
        Predicted value for testPoint
    """
    xMat = np.mat(xArr)
    yMat = np.mat(yArr).T
    m = np.shape(xMat)[0]
    
    # Initialize diagonal weight matrix
    weights = np.mat(np.eye((m)))
    
    # Calculate weight for each training point
    for j in range(m):
        diffMat = testPoint - xMat[j, :]
        # Gaussian kernel: weight decreases with distance
        weights[j, j] = np.exp(diffMat * diffMat.T / (-2.0 * k**2))
    
    # Weighted least squares: w = (X^T W X)^-1 X^T W y
    xTx = xMat.T * (weights * xMat)
    
    if np.linalg.det(xTx) == 0.0:
        print("‚ö†Ô∏è  Matrix is singular, cannot compute inverse")
        return None
    
    ws = xTx.I * (xMat.T * (weights * yMat))
    return testPoint * ws


def lwlr_test(testArr, xArr, yArr, k=1.0):
    """
    Apply LWLR to multiple test points
    
    Note: Can be slow for large datasets since it fits
          a new model for EACH test point
    """
    m = np.shape(testArr)[0]
    yHat = np.zeros(m)
    
    for i in range(m):
        yHat[i] = lwlr(testArr[i], xArr, yArr, k)
    
    return yHat


def rss_error(yArr, yHatArr):
    """Residual Sum of Squares"""
    return ((yArr - yHatArr)**2).sum()


print("‚úÖ LWLR functions defined")

## 2. Generate Non-Linear Test Data

In [None]:
def generate_nonlinear_data(n_samples=200, noise=0.3, random_state=42):
    """
    Generate non-linear data where LWLR should outperform linear regression
    """
    np.random.seed(random_state)
    
    # Generate X from 0 to 10
    x = np.linspace(0, 10, n_samples)
    
    # Non-linear function: combination of sine and quadratic
    y = 2 * np.sin(x) + 0.5 * x**2 - 3 * x + np.random.randn(n_samples) * noise
    
    # Add intercept term
    X = np.column_stack([np.ones(n_samples), x])
    
    return X, y, x


# Generate data
print("üîÑ Generating non-linear data...")
X, y, x_raw = generate_nonlinear_data(n_samples=200, noise=0.5)

print(f"‚úÖ Data generated: {X.shape[0]} samples")

# Visualize data
plt.figure(figsize=(10, 6))
plt.scatter(x_raw, y, alpha=0.5, s=30)
plt.xlabel('X', fontsize=12)
plt.ylabel('y', fontsize=12)
plt.title('Non-Linear Data: LWLR vs. Linear Regression', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Train/Test Split

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Also split raw x for visualization
x_train_raw = X_train[:, 1]
x_test_raw = X_test[:, 1]

print(f"‚úÖ Data split:")
print(f"   Training: {len(X_train)} samples")
print(f"   Test: {len(X_test)} samples")

## 4. Bandwidth (k) Selection Experiment

In [None]:
print("\n" + "="*70)
print("BANDWIDTH (k) SELECTION EXPERIMENT")
print("="*70)

# Test multiple k values
k_values = [0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0]

print("\nüîÑ Testing different bandwidth values...\n")
print(f"{'k Value':<12} | {'Train RSS':>12} | {'Test RSS':>12} | {'Test R¬≤':>10}")
print("-" * 60)

results = []

for k in k_values:
    # Predictions on training set
    yHat_train = lwlr_test(X_train, X_train, y_train, k)
    rss_train = rss_error(y_train, yHat_train)
    
    # Predictions on test set
    yHat_test = lwlr_test(X_test, X_train, y_train, k)
    rss_test = rss_error(y_test, yHat_test)
    r2_test = r2_score(y_test, yHat_test)
    
    results.append({
        'k': k,
        'train_rss': rss_train,
        'test_rss': rss_test,
        'test_r2': r2_test,
        'predictions': yHat_test
    })
    
    print(f"{k:<12.2f} | {rss_train:>12.2f} | {rss_test:>12.2f} | {r2_test:>10.4f}")

# Find optimal k
optimal_result = min(results, key=lambda x: x['test_rss'])
optimal_k = optimal_result['k']

print("\n" + "="*70)
print(f"‚úÖ Optimal k: {optimal_k:.2f} (lowest test RSS)")
print(f"   Test RSS: {optimal_result['test_rss']:.2f}")
print(f"   Test R¬≤: {optimal_result['test_r2']:.4f}")
print("="*70)

## 5. Visualize Effect of Bandwidth

In [None]:
print("\nüìä Creating bandwidth comparison visualization...")

# Create grid for smooth predictions
x_grid = np.linspace(x_raw.min(), x_raw.max(), 200)
X_grid = np.column_stack([np.ones(200), x_grid])

# Plot predictions for different k values
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

k_to_plot = [0.01, 0.1, 1.0, 2.0, 5.0, 10.0]

for idx, k in enumerate(k_to_plot):
    ax = axes[idx]
    
    # Get predictions
    result = [r for r in results if r['k'] == k][0]
    
    # Predict on grid for smooth curve
    y_grid = lwlr_test(X_grid, X_train, y_train, k)
    
    # Plot
    ax.scatter(x_train_raw, y_train, alpha=0.3, s=20, label='Training data')
    ax.scatter(x_test_raw, y_test, alpha=0.3, s=20, color='red', label='Test data')
    ax.plot(x_grid, y_grid, 'g-', linewidth=2, label='LWLR fit')
    
    ax.set_title(f'k = {k:.2f}\nTest R¬≤ = {result["test_r2"]:.3f}', 
                fontsize=11, fontweight='bold')
    ax.set_xlabel('X', fontsize=10)
    ax.set_ylabel('y', fontsize=10)
    ax.legend(fontsize=8)
    ax.grid(True, alpha=0.3)
    
    # Highlight optimal
    if k == optimal_k:
        ax.set_facecolor('#ffe6e6')
        for spine in ax.spines.values():
            spine.set_edgecolor('red')
            spine.set_linewidth(2)

plt.tight_layout()
plt.show()

print("\nüìä Interpretation:")
print("   ‚Ä¢ Small k (0.01): Overfitting - very wiggly, high variance")
print("   ‚Ä¢ Medium k (1.0-2.0): Balanced - captures non-linearity, generalizes well")
print("   ‚Ä¢ Large k (10.0): Underfitting - too smooth, approaches linear regression")

## 6. Comparison: LWLR vs. Standard Linear Regression

In [None]:
print("\n" + "="*70)
print("COMPARISON: LWLR vs. STANDARD LINEAR REGRESSION")
print("="*70)

# Standard linear regression
xMat_train = np.mat(X_train)
yMat_train = np.mat(y_train).T
xMat_test = np.mat(X_test)

xTx = xMat_train.T * xMat_train
ws_standard = xTx.I * (xMat_train.T * yMat_train)

# Predictions
y_train_pred_std = (xMat_train * ws_standard).A.flatten()
y_test_pred_std = (xMat_test * ws_standard).A.flatten()

# LWLR with optimal k
y_train_pred_lwlr = lwlr_test(X_train, X_train, y_train, optimal_k)
y_test_pred_lwlr = lwlr_test(X_test, X_train, y_train, optimal_k)

# Metrics
print("\nüìä Performance Comparison:\n")
print(f"{'Method':<35} | {'Train RSS':>12} | {'Test RSS':>12} | {'Test R¬≤':>10}")
print("-" * 75)

# Standard regression
train_rss_std = rss_error(y_train, y_train_pred_std)
test_rss_std = rss_error(y_test, y_test_pred_std)
test_r2_std = r2_score(y_test, y_test_pred_std)
print(f"{'Standard Linear Regression':<35} | {train_rss_std:>12.2f} | {test_rss_std:>12.2f} | {test_r2_std:>10.4f}")

# LWLR
train_rss_lwlr = rss_error(y_train, y_train_pred_lwlr)
test_rss_lwlr = rss_error(y_test, y_test_pred_lwlr)
test_r2_lwlr = r2_score(y_test, y_test_pred_lwlr)
print(f"{'LWLR (k=' + f'{optimal_k:.2f}' + ')':<35} | {train_rss_lwlr:>12.2f} | {test_rss_lwlr:>12.2f} | {test_r2_lwlr:>10.4f}")

# Improvement
improvement = (test_rss_std - test_rss_lwlr) / test_rss_std * 100
print("\n" + "="*75)
print(f"‚úÖ LWLR improves test RSS by {improvement:.2f}%")
print(f"‚úÖ LWLR improves test R¬≤ from {test_r2_std:.4f} to {test_r2_lwlr:.4f}")
print("="*75)

## 7. Side-by-Side Visualization

In [None]:
print("\nüìä Creating side-by-side comparison...")

# Create smooth prediction grid
x_grid = np.linspace(x_raw.min(), x_raw.max(), 300)
X_grid = np.column_stack([np.ones(300), x_grid])

# Standard regression predictions on grid
xMat_grid = np.mat(X_grid)
y_grid_std = (xMat_grid * ws_standard).A.flatten()

# LWLR predictions on grid
y_grid_lwlr = lwlr_test(X_grid, X_train, y_train, optimal_k)

# Plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Standard Linear Regression
ax1.scatter(x_train_raw, y_train, alpha=0.4, s=30, label='Training data', color='blue')
ax1.scatter(x_test_raw, y_test, alpha=0.4, s=30, label='Test data', color='red')
ax1.plot(x_grid, y_grid_std, 'g-', linewidth=3, label='Linear fit')
ax1.set_xlabel('X', fontsize=12, fontweight='bold')
ax1.set_ylabel('y', fontsize=12, fontweight='bold')
ax1.set_title(f'Standard Linear Regression\nTest R¬≤ = {test_r2_std:.4f}', 
             fontsize=14, fontweight='bold')
ax1.legend(fontsize=11)
ax1.grid(True, alpha=0.3)

# LWLR
ax2.scatter(x_train_raw, y_train, alpha=0.4, s=30, label='Training data', color='blue')
ax2.scatter(x_test_raw, y_test, alpha=0.4, s=30, label='Test data', color='red')
ax2.plot(x_grid, y_grid_lwlr, 'purple', linewidth=3, label=f'LWLR fit (k={optimal_k:.2f})')
ax2.set_xlabel('X', fontsize=12, fontweight='bold')
ax2.set_ylabel('y', fontsize=12, fontweight='bold')
ax2.set_title(f'Locally Weighted Linear Regression\nTest R¬≤ = {test_r2_lwlr:.4f}', 
             fontsize=14, fontweight='bold')
ax2.legend(fontsize=11)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n‚úÖ Visualization complete!")
print("\nüéì Key Observation:")
print("   LWLR captures the non-linear pattern, while standard regression")
print("   is constrained to a straight line.")

## 8. Bias-Variance Analysis

In [None]:
print("\n" + "="*70)
print("BIAS-VARIANCE TRADEOFF ANALYSIS")
print("="*70)

# Plot train/test error vs. k
k_values_plot = [r['k'] for r in results]
train_errors = [r['train_rss'] for r in results]
test_errors = [r['test_rss'] for r in results]

fig, ax = plt.subplots(figsize=(10, 6))

ax.plot(k_values_plot, train_errors, 'b-o', linewidth=2, markersize=8, label='Training RSS')
ax.plot(k_values_plot, test_errors, 'r-s', linewidth=2, markersize=8, label='Test RSS')

# Mark optimal k
ax.axvline(x=optimal_k, color='green', linestyle='--', linewidth=2, 
          label=f'Optimal k = {optimal_k:.2f}')

ax.set_xlabel('Bandwidth (k)', fontsize=13, fontweight='bold')
ax.set_ylabel('RSS Error', fontsize=13, fontweight='bold')
ax.set_title('Bias-Variance Tradeoff: Training vs. Test Error', 
            fontsize=15, fontweight='bold')
ax.set_xscale('log')
ax.legend(fontsize=12)
ax.grid(True, alpha=0.3)

# Add annotations
ax.annotate('High Variance\n(Overfitting)', xy=(0.01, train_errors[0]), 
           xytext=(0.02, train_errors[0]*1.5),
           fontsize=10, ha='center',
           bbox=dict(boxstyle='round,pad=0.5', facecolor='yellow', alpha=0.3),
           arrowprops=dict(arrowstyle='->', color='red', lw=1.5))

ax.annotate('High Bias\n(Underfitting)', xy=(10, test_errors[-1]), 
           xytext=(5, test_errors[-1]*1.2),
           fontsize=10, ha='center',
           bbox=dict(boxstyle='round,pad=0.5', facecolor='lightblue', alpha=0.3),
           arrowprops=dict(arrowstyle='->', color='blue', lw=1.5))

plt.tight_layout()
plt.show()

print("\nüéì Interpretation:")
print("   ‚Ä¢ Small k: Model is too flexible ‚Üí high variance ‚Üí overfitting")
print("   ‚Ä¢ Large k: Model is too simple ‚Üí high bias ‚Üí underfitting")
print(f"   ‚Ä¢ Optimal k = {optimal_k:.2f}: Best balance between bias and variance")

## 9. Summary and Conclusions

In [None]:
print("\n" + "="*70)
print("SUMMARY: LOCALLY WEIGHTED LINEAR REGRESSION")
print("="*70)

print("\n‚úÖ Key Findings:")
print(f"   ‚Ä¢ Optimal bandwidth: k = {optimal_k:.2f}")
print(f"   ‚Ä¢ Test R¬≤ (LWLR): {test_r2_lwlr:.4f}")
print(f"   ‚Ä¢ Test R¬≤ (Linear): {test_r2_std:.4f}")
print(f"   ‚Ä¢ Improvement: {improvement:.2f}% reduction in test error")

print("\nüéì When to Use LWLR:")
print("   ‚úì Data has non-linear relationships")
print("   ‚úì Local patterns are important")
print("   ‚úì You have moderate dataset size (not huge)")
print("   ‚úì Interpretability is not the main concern")

print("\n‚ö†Ô∏è  When NOT to Use LWLR:")
print("   ‚úó Very large datasets (slow prediction)")
print("   ‚úó High-dimensional data (curse of dimensionality)")
print("   ‚úó Need fast real-time predictions")
print("   ‚úó Need interpretable coefficients")

print("\nüí° Advantages:")
print("   + No assumptions about functional form")
print("   + Captures non-linear patterns automatically")
print("   + Single hyperparameter (k) to tune")
print("   + Straightforward implementation")

print("\n‚ö° Disadvantages:")
print("   - Computationally expensive (O(n) per prediction)")
print("   - Must store all training data")
print("   - Suffers from curse of dimensionality")
print("   - Bandwidth selection can be tricky")

print("\nüöÄ Next Steps:")
print("   1. Try LWLR on your own data")
print("   2. Implement cross-validation for k selection")
print("   3. Compare with polynomial regression")
print("   4. Explore other kernel functions (Epanechnikov, Tricube)")
print("   5. Combine with feature engineering")

print("\n" + "="*70)

## 10. Export Function for Your Own Data

In [None]:
def apply_lwlr(X_train, y_train, X_test, y_test, k_values=None):
    """
    Complete LWLR workflow for your own data
    
    Parameters:
        X_train, y_train: Training data
        X_test, y_test: Test data
        k_values: Optional list of k values to test
    
    Returns:
        optimal_k: Best bandwidth parameter
        predictions: Predictions on test set
        metrics: Performance metrics
    """
    if k_values is None:
        k_values = [0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0]
    
    best_rss = float('inf')
    optimal_k = None
    best_predictions = None
    
    for k in k_values:
        y_pred = lwlr_test(X_test, X_train, y_train, k)
        rss = rss_error(y_test, y_pred)
        
        if rss < best_rss:
            best_rss = rss
            optimal_k = k
            best_predictions = y_pred
    
    metrics = {
        'RSS': best_rss,
        'MSE': mean_squared_error(y_test, best_predictions),
        'RMSE': np.sqrt(mean_squared_error(y_test, best_predictions)),
        'R¬≤': r2_score(y_test, best_predictions)
    }
    
    return optimal_k, best_predictions, metrics


print("\n‚úÖ Helper function 'apply_lwlr' ready for your data!")
print("\nUsage:")
print("  optimal_k, predictions, metrics = apply_lwlr(X_train, y_train, X_test, y_test)")

---

## Ready for Your Own Data!

To use this with your own dataset:

```python
# Load your data
X = your_features
y = your_target

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Apply LWLR
optimal_k, predictions, metrics = apply_lwlr(X_train, y_train, X_test, y_test)

print(f"Optimal k: {optimal_k}")
print(f"Test R¬≤: {metrics['R¬≤']:.4f}")
```

**Repository:** https://github.com/enzodata3-blip/Task4  
**Based on:** https://github.com/Jack-Cherish/Machine-Learning  
**Created:** 2026-02-09