<a href="https://colab.research.google.com/github/Yashmaini30/My_Own_Linear_Regression/blob/main/Multiple_linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, mean_squared_error

In [14]:
np.random.seed(42)

num_samples = 10000
num_features = 100

# random feature matrix (X)
X = 10 * np.random.rand(num_samples, num_features)

# true weights (randomly generated)
true_theta = np.random.randn(num_features, 1)

# target variable (y) with some noise
y = X.dot(true_theta) + np.random.randn(num_samples, 1)  # y = X * theta + noise

# Convert to DataFrame
df = pd.DataFrame(X, columns=[f"Feature_{i+1}" for i in range(num_features)])
df["Target"] = y

# Save to CSV
df.to_csv("generated_data_100_features.csv", index=False)

print("Dataset Created: 100 Features, 10,000 Rows")

Dataset Created: 100 Features, 10,000 Rows


In [15]:
df=pd.read_csv("generated_data_100_features.csv")
df.sample(5)

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,...,Feature_92,Feature_93,Feature_94,Feature_95,Feature_96,Feature_97,Feature_98,Feature_99,Feature_100,Target
730,6.249241,7.35124,3.698223,1.131328,8.052901,1.404068,3.826818,2.622782,2.965349,6.018567,...,6.976744,3.64232,8.319544,0.736731,4.959098,9.563345,1.904631,1.723242,3.786967,17.62843
164,5.819372,7.957277,4.303926,4.538379,8.793265,8.959345,2.386889,1.992419,4.962352,9.00277,...,1.656632,3.951249,3.163339,0.970541,4.326601,3.381158,1.226192,2.961171,8.864361,29.826104
3756,8.459513,1.076949,8.789035,3.104154,2.353225,3.452403,7.103492,1.881563,3.295876,6.118475,...,6.84986,1.294242,2.861808,4.466394,7.241434,3.755656,0.300687,5.654086,3.403308,26.14494
1124,7.953578,0.325157,5.181091,7.202177,7.953812,1.555556,3.904783,3.208549,9.921099,7.351286,...,6.972943,9.576606,8.977332,1.133283,8.763974,3.909686,0.70629,2.077277,0.285567,-1.600432
1432,1.466107,8.331251,6.10403,1.977928,8.550179,6.423464,0.76819,3.164274,1.320192,0.917311,...,6.127399,0.540071,6.988046,3.881553,4.5767,2.254532,1.755811,4.273464,5.841007,31.004665


In [16]:
# Extract features (X) and target (y)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values.reshape(-1, 1)

# Add bias term or 1's coloumn
X_b = np.c_[np.ones((X.shape[0], 1)), X]

# Compute closed-form solution: θ = (X^T X)^(-1) X^T y
theta_best = np.linalg.inv(X_b.T @ X_b) @ X_b.T @ y

# Predicted values
y_pred = X_b @ theta_best

# Evaluate model
r2 = r2_score(y, y_pred)
mse = mean_squared_error(y, y_pred)

print(f"Model Evaluation:\nR-squared: {r2:.4f}\nMean Squared Error: {mse:.4f}")

Model Evaluation:
R-squared: 0.9986
Mean Squared Error: 0.9995


In [17]:
intercept=theta_best[0,0]
weights=theta_best[1:,0]

print(f"Intercept (Bias): {intercept:.4f}")
print("Feature Weights:")
for i, w in enumerate(weights):
    print(f"Feature {i+1}: {w:.4f}")

Intercept (Bias): -0.1211
Feature Weights:
Feature 1: -1.7231
Feature 2: 1.2158
Feature 3: 0.0120
Feature 4: -0.1171
Feature 5: 0.0775
Feature 6: 0.9531
Feature 7: -0.5517
Feature 8: 0.4648
Feature 9: -0.6395
Feature 10: 1.1707
Feature 11: 1.0369
Feature 12: 0.0435
Feature 13: -0.3900
Feature 14: 1.0487
Feature 15: 0.4825
Feature 16: -0.3591
Feature 17: 0.7774
Feature 18: -0.5553
Feature 19: -1.3203
Feature 20: -0.7540
Feature 21: -1.6847
Feature 22: 1.1908
Feature 23: 0.6052
Feature 24: 0.8057
Feature 25: -1.1122
Feature 26: 1.8722
Feature 27: -0.5036
Feature 28: 0.8778
Feature 29: -0.5422
Feature 30: -0.2663
Feature 31: 1.7857
Feature 32: 0.5767
Feature 33: 0.7319
Feature 34: 0.4287
Feature 35: -0.4091
Feature 36: 0.0551
Feature 37: 0.2388
Feature 38: -1.2655
Feature 39: 0.8084
Feature 40: -0.4072
Feature 41: -0.7511
Feature 42: -0.8326
Feature 43: 0.2745
Feature 44: 1.1598
Feature 45: 0.5190
Feature 46: -0.2310
Feature 47: -0.8154
Feature 48: -0.3637
Feature 49: -0.1757
Feature 50: 

In [30]:
df=pd.read_csv("generated_data_100_features.csv")

# Hyperparameters
learning_rate = 0.015
n_iterations = 100000
tolerance = 1e-8

# Insert bias column
df.insert(0, "Bias", 1)

# Apply Min-Max Scaling
X_mean = df.iloc[:, 1:-1].mean()
X_range = df.iloc[:, 1:-1].max() - df.iloc[:, 1:-1].min()
df.iloc[:, 1:-1] = (df.iloc[:, 1:-1] - X_mean) / X_range

# Extract features (X) and target (y)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values.reshape(-1, 1)

# Initialize theta with small random values
m, n = X.shape
theta = np.random.uniform(-0.01, 0.01, size=(n, 1))

# Gradient Descent Loop
for iteration in range(n_iterations):
    gradients = (2/m) * X.T @ (X @ theta - y)
    theta -= learning_rate * gradients

    mse = np.mean((X @ theta - y) ** 2)

    if np.linalg.norm(gradients) < tolerance:
        print(f"Converged after {iteration} iterations.")
        break

# Predictions
y_pred = X @ theta

# Evaluate model
r2 = r2_score(y, y_pred)
mse = mean_squared_error(y, y_pred)

# Extract feature weights
intercept = theta[0, 0]
weights = theta[1:, 0]

# Display results
print(f"Model Evaluation:\nR-squared: {r2:.4f}\nMean Squared Error: {mse:.4f}\n")
print(f"Intercept (Bias): {intercept:.4f}")
print("Feature Weights:")
for i, w in enumerate(weights):
    print(f"Feature {i+1}: {w:.4f}")


Converged after 9611 iterations.
Model Evaluation:
R-squared: 0.9986
Mean Squared Error: 0.9995

Intercept (Bias): 10.6807
Feature Weights:
Feature 1: -17.2297
Feature 2: 12.1574
Feature 3: 0.1202
Feature 4: -1.1705
Feature 5: 0.7750
Feature 6: 9.5291
Feature 7: -5.5148
Feature 8: 4.6464
Feature 9: -6.3945
Feature 10: 11.7067
Feature 11: 10.3681
Feature 12: 0.4349
Feature 13: -3.8992
Feature 14: 10.4856
Feature 15: 4.8250
Feature 16: -3.5898
Feature 17: 7.7730
Feature 18: -5.5524
Feature 19: -13.1980
Feature 20: -7.5389
Feature 21: -16.8453
Feature 22: 11.9056
Feature 23: 6.0508
Feature 24: 8.0553
Feature 25: -11.1175
Feature 26: 18.7196
Feature 27: -5.0360
Feature 28: 8.7764
Feature 29: -5.4202
Feature 30: -2.6632
Feature 31: 17.8512
Feature 32: 5.7663
Feature 33: 7.3173
Feature 34: 4.2866
Feature 35: -4.0907
Feature 36: 0.5510
Feature 37: 2.3872
Feature 38: -12.6532
Feature 39: 8.0807
Feature 40: -4.0711
Feature 41: -7.5112
Feature 42: -8.3244
Feature 43: 2.7443
Feature 44: 11.5975
F