# **5 EXPERIMENTS OF LINEAR REGRESSION**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Function to generate synthetic data
def generate_data(size, noise_level, features=1):
    np.random.seed(42)  # For reproducibility
    X = np.random.rand(size, features) * 10  # Features
    true_coefficients = np.random.rand(features) * 10  # Random coefficients
    y = X.dot(true_coefficients) + np.random.randn(size) * noise_level  # Add noise
    return X, y, true_coefficients

# Experiment 1: Vary dataset sizes
dataset_sizes = [10, 50, 100, 500, 1000]
noise_level = 2
features = 1

print("=== Experiment 1: Effect of Dataset Size ===")
for size in dataset_sizes:
    X, y, true_coeffs = generate_data(size, noise_level, features)
    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)
    print(f"Size: {size}, MSE: {mse:.2f}, Coefficients: {model.coef_}, True: {true_coeffs}")

# Experiment 2: Vary noise levels
size = 100
noise_levels = [0.5, 2, 5, 10]

print("\n=== Experiment 2: Effect of Noise ===")
for noise in noise_levels:
    X, y, true_coeffs = generate_data(size, noise, features)
    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)
    print(f"Noise: {noise}, MSE: {mse:.2f}, Coefficients: {model.coef_}, True: {true_coeffs}")

# Experiment 3: Multivariate regression
features_list = [1, 2, 5, 10]
noise_level = 2
size = 100

print("\n=== Experiment 3: Multivariate Regression ===")
for features in features_list:
    X, y, true_coeffs = generate_data(size, noise_level, features)
    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)
    print(f"Features: {features}, MSE: {mse:.2f}, Coefficients: {model.coef_}, True: {true_coeffs}")

In [None]:
from sklearn.linear_model import Ridge, Lasso

# Generate a high-dimensional dataset
X, y, true_coeffs = generate_data(size=100, noise_level=2, features=20)

# Train Ridge and Lasso models
ridge_model = Ridge(alpha=1.0)  # Ridge with L2 regularization
lasso_model = Lasso(alpha=0.1)  # Lasso with L1 regularization

ridge_model.fit(X, y)
lasso_model.fit(X, y)

print("=== Experiment 4: Regularization ===")
print(f"True Coefficients: {true_coeffs}")
print(f"Ridge Coefficients: {ridge_model.coef_}")
print(f"Lasso Coefficients: {lasso_model.coef_}")

In [None]:
from sklearn.preprocessing import PolynomialFeatures

# Generate a nonlinear dataset
np.random.seed(42)
X = np.random.rand(100, 1) * 10
y = 0.5 * X**2 - 2 * X + 5 + np.random.randn(100, 1) * 2  # Quadratic relationship

# Transform input features to include polynomial terms
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)

# Train the model
model = LinearRegression()
model.fit(X_poly, y)

# Predict and plot
y_pred = model.predict(X_poly)
plt.scatter(X, y, color="blue", label="Data")
plt.plot(X, y_pred, color="red", label="Polynomial Fit")
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.show()

print("=== Experiment 5: Polynomial Regression ===")
print(f"Coefficients: {model.coef_}, Intercept: {model.intercept_}")

# **The End**