Lab 8: Simple Linear Regression
This script demonstrates Simple Linear Regression with one feature.

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error


In [None]:
def generate_sample_data():
    """Generate sample data for linear regression"""
    np.random.seed(42)
    X = np.random.rand(100, 1) * 10
    y = 2.5 * X + 5 + np.random.randn(100, 1) * 2
    return X, y.ravel()


In [None]:
def basic_linear_regression():
    """Demonstrate basic linear regression"""
    print("=" * 50)
    print("Basic Simple Linear Regression")
    print("=" * 50)
    
    # Generate data
    X, y = generate_sample_data()
    
    print(f"\nDataset shape: {X.shape}")
    print(f"X range: [{X.min():.2f}, {X.max():.2f}]")
    print(f"y range: [{y.min():.2f}, {y.max():.2f}]")
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Create and train model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Get parameters
    print(f"\nModel Parameters:")
    print(f"Coefficient (slope): {model.coef_[0]:.4f}")
    print(f"Intercept: {model.intercept_:.4f}")
    print(f"Equation: y = {model.coef_[0]:.4f}x + {model.intercept_:.4f}")
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Evaluate
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"\nModel Evaluation:")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"R² Score: {r2:.4f}")
    
    # Visualize
    plt.figure(figsize=(12, 5))
    
    # Plot 1: Training data and regression line
    plt.subplot(1, 2, 1)
    plt.scatter(X_train, y_train, alpha=0.6, label='Training Data')
    plt.plot(X_train, model.predict(X_train), 'r-', linewidth=2, label='Regression Line')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.title('Linear Regression - Training Data')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Plot 2: Test data predictions
    plt.subplot(1, 2, 2)
    plt.scatter(X_test, y_test, alpha=0.6, label='Actual')
    plt.scatter(X_test, y_pred, alpha=0.6, label='Predicted')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.title('Linear Regression - Test Predictions')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('lab8_linear_regression.png')
    plt.close()
    print("\nLinear regression plot saved as 'lab8_linear_regression.png'")


In [None]:
def residual_analysis():
    """Analyze residuals"""
    print("\n" + "=" * 50)
    print("Residual Analysis")
    print("=" * 50)
    
    # Generate data
    X, y = generate_sample_data()
    
    # Train model
    model = LinearRegression()
    model.fit(X, y)
    
    # Calculate residuals
    y_pred = model.predict(X)
    residuals = y - y_pred
    
    print(f"\nResidual Statistics:")
    print(f"Mean: {residuals.mean():.6f}")
    print(f"Std Dev: {residuals.std():.4f}")
    print(f"Min: {residuals.min():.4f}")
    print(f"Max: {residuals.max():.4f}")
    
    # Visualize residuals
    plt.figure(figsize=(12, 5))
    
    # Plot 1: Residual plot
    plt.subplot(1, 2, 1)
    plt.scatter(y_pred, residuals, alpha=0.6)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')
    plt.title('Residual Plot')
    plt.grid(True, alpha=0.3)
    
    # Plot 2: Residual histogram
    plt.subplot(1, 2, 2)
    plt.hist(residuals, bins=20, edgecolor='black')
    plt.xlabel('Residuals')
    plt.ylabel('Frequency')
    plt.title('Residual Distribution')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('lab8_residuals.png')
    plt.close()
    print("\nResidual plots saved as 'lab8_residuals.png'")


In [None]:
def manual_implementation():
    """Implement linear regression manually"""
    print("\n" + "=" * 50)
    print("Manual Linear Regression Implementation")
    print("=" * 50)
    
    # Generate data
    X, y = generate_sample_data()
    
    # Calculate parameters manually using least squares
    X_mean = X.mean()
    y_mean = y.mean()
    
    # Calculate slope (coefficient)
    numerator = np.sum((X.ravel() - X_mean) * (y - y_mean))
    denominator = np.sum((X.ravel() - X_mean) ** 2)
    slope = numerator / denominator
    
    # Calculate intercept
    intercept = y_mean - slope * X_mean
    
    print(f"\nManual Calculation:")
    print(f"Slope: {slope:.4f}")
    print(f"Intercept: {intercept:.4f}")
    
    # Compare with sklearn
    model = LinearRegression()
    model.fit(X, y)
    
    print(f"\nSklearn Calculation:")
    print(f"Slope: {model.coef_[0]:.4f}")
    print(f"Intercept: {model.intercept_:.4f}")
    
    print(f"\nDifference:")
    print(f"Slope difference: {abs(slope - model.coef_[0]):.10f}")
    print(f"Intercept difference: {abs(intercept - model.intercept_):.10f}")


In [None]:
def prediction_intervals():
    """Calculate and visualize prediction intervals"""
    print("\n" + "=" * 50)
    print("Prediction Intervals")
    print("=" * 50)
    
    # Generate data
    X, y = generate_sample_data()
    
    # Train model
    model = LinearRegression()
    model.fit(X, y)
    
    # Make predictions
    y_pred = model.predict(X)
    
    # Calculate standard error
    residuals = y - y_pred
    mse = np.mean(residuals ** 2)
    std_error = np.sqrt(mse)
    
    print(f"\nStandard Error: {std_error:.4f}")
    
    # Calculate prediction intervals (95% confidence)
    confidence = 1.96  # 95% confidence interval
    upper_bound = y_pred + confidence * std_error
    lower_bound = y_pred - confidence * std_error
    
    # Visualize
    X_sorted_idx = X.ravel().argsort()
    plt.figure(figsize=(10, 6))
    plt.scatter(X, y, alpha=0.6, label='Data')
    plt.plot(X[X_sorted_idx], y_pred[X_sorted_idx], 'r-', linewidth=2, label='Prediction')
    plt.fill_between(X.ravel()[X_sorted_idx], 
                     lower_bound[X_sorted_idx], 
                     upper_bound[X_sorted_idx],
                     alpha=0.2, label='95% Prediction Interval')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.title('Linear Regression with Prediction Intervals')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('lab8_prediction_intervals.png')
    plt.close()
    print("\nPrediction intervals plot saved as 'lab8_prediction_intervals.png'")


In [None]:
def real_world_example():
    """Real-world example: predicting house prices"""
    print("\n" + "=" * 50)
    print("Real-World Example: House Price Prediction")
    print("=" * 50)
    
    # Create sample data: area (sq ft) vs price ($1000s)
    np.random.seed(42)
    area = np.random.uniform(500, 3000, 100).reshape(-1, 1)
    price = 50 + 0.1 * area + np.random.normal(0, 20, (100, 1))
    price = price.ravel()
    
    print(f"\nPredicting house price from area")
    print(f"Sample size: {len(area)}")
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        area, price, test_size=0.2, random_state=42
    )
    
    # Train model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    print(f"\nModel: Price = {model.coef_[0]:.4f} × Area + {model.intercept_:.4f}")
    
    # Predictions
    y_pred = model.predict(X_test)
    
    # Evaluate
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    print(f"\nR² Score: {r2:.4f}")
    print(f"RMSE: ${rmse:.2f}k")
    
    # Example predictions
    sample_areas = np.array([[1000], [1500], [2000]])
    sample_prices = model.predict(sample_areas)
    
    print(f"\nSample Predictions:")
    for area_val, price_val in zip(sample_areas, sample_prices):
        print(f"Area: {area_val[0]:.0f} sq ft → Predicted Price: ${price_val:.2f}k")
    
    # Visualize
    plt.figure(figsize=(10, 6))
    plt.scatter(X_train, y_train, alpha=0.6, label='Training Data')
    plt.scatter(X_test, y_test, alpha=0.6, label='Test Data')
    
    # Plot regression line
    X_range = np.linspace(area.min(), area.max(), 100).reshape(-1, 1)
    y_range = model.predict(X_range)
    plt.plot(X_range, y_range, 'r-', linewidth=2, label='Regression Line')
    
    plt.xlabel('Area (sq ft)')
    plt.ylabel('Price ($1000s)')
    plt.title('House Price Prediction')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('lab8_real_world_example.png')
    plt.close()
    print("\nReal-world example plot saved as 'lab8_real_world_example.png'")


In [None]:
def main():
    """Main function to demonstrate simple linear regression"""
    print("\n" + "=" * 50)
    print("Lab 8: Simple Linear Regression")
    print("=" * 50)
    
    # Basic linear regression
    basic_linear_regression()
    
    # Residual analysis
    residual_analysis()
    
    # Manual implementation
    manual_implementation()
    
    # Prediction intervals
    prediction_intervals()
    
    # Real-world example
    real_world_example()
    
    print("\n" + "=" * 50)
    print("Lab 8 Complete!")
    print("=" * 50)


In [None]:
if __name__ == "__main__":
    main()
