In [1]:
import numpy as np

def linear_regression(X, y):
  """
  This function performs linear regression using the closed-form solution.

  Args:
      X: A 2D array representing the independent features (each row is a data point).
      y: A 1D array representing the dependent variable (target values).

  Returns:
      A tuple containing the slope (beta_1) and intercept (beta_0) of the regression line.
  """

  # Calculate the mean of features and target
  X_mean = np.mean(X, axis=0)
  y_mean = np.mean(y)

  # Center the data (optional but can improve numerical stability)
  X_centered = X - X_mean
  y_centered = y - y_mean

  # Calculate the slope (beta_1) (slope =(y₂ - y₁)/(x₂ - x₁))
  beta_1 = np.dot(X_centered.T, y_centered) / np.dot(X_centered.T, X_centered)

  # Calculate the intercept (beta_0)
  beta_0 = y_mean - beta_1 * X_mean

  return beta_0, beta_1

# Example usage (replace with your actual data)
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 5, 4, 5])

# Perform linear regression
beta_0, beta_1 = linear_regression(X, y)

# Print the results
print("Slope (beta_1):", beta_1)
print("Intercept (beta_0):", beta_0)

# Predict a new value for x = 6
new_x = 6
predicted_y = beta_0 + beta_1 * new_x
print("Predicted y for x =", new_x, ":", predicted_y)


Slope (beta_1): [[0.6]]
Intercept (beta_0): [[2.2]]
Predicted y for x = 6 : [[5.8]]


In [3]:
predicted_y = beta_0 + beta_1 * 2
print("Predicted y for x =", new_x, ":", predicted_y)


Predicted y for x = 6 : [[3.4]]


<H1>lINEAR REGRESSION USING SKLEARN </H1>

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_iris

# Load the Boston house price dataset
IRIS = load_iris()
X = IRIS.data  # Features
y = IRIS.target  # Target variable (house prices)

# Create a linear regression model
model = LinearRegression()

# Train the model
model.fit(X, y)

# Make predictions on new data (optional)
# ... (replace with your new data for prediction)

# Print the intercept and coefficients
print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)



Intercept: 0.186495247206249
Coefficients: [-0.11190585 -0.04007949  0.22864503  0.60925205]


R-squared is a statistical metric that represents the proportion of variance in the dependent variable (y) that can be explained by the independent variables (X) in the linear regression model.
It ranges from 0 to 1, where:
0 indicates the model explains none of the variance (terrible fit).
1 indicates the model explains all of the variance (perfect fit, which can be rare in real-world data).
Generally, a higher R-squared value signifies a better fit, but it's important to consider the context of your problem and the complexity of the data.

In [6]:
from sklearn.metrics import r2_score

# ... (your linear regression model training code)

# Evaluate R-squared
r_squared = r2_score(y, model.predict(X))
print("R-squared:", r_squared)


R-squared: 0.9303939218549564
