In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
data = pd.read_csv("multiple linear regression dataset/Student_Performance.csv")

In [4]:
data = data.drop(['Extracurricular Activities'], axis=1)

In [5]:
X = data.drop(['Performance Index'], axis=1).to_numpy()
Y = data['Performance Index'].to_numpy()

In [6]:
def mean_squared_error(y_true, y_predicted):
    # Calculate the mean squared error
    cost = np.sum((y_predicted - y_true) ** 2) / len(y_true)
    return cost

In [18]:
def linear_regression(X, y, learning_rate=0.01, num_iterations=20000, stopping_threshold=1e-6, verbose=True):
    weights = np.zeros((X.shape[1], 1))
    bias = 0
    curr_cost = 0
    prev_cost = 0
    
    # Number of samples
    m = len(y)
    
    # Gradient Descent
    for i in range(num_iterations):

        y_pred = (np.dot(X, weights) + bias).reshape(-1,1)
        curr_cost = mean_squared_error(y, y_pred)
        
        if i>0 and abs(prev_cost - curr_cost) <= stopping_threshold:
            break
        
        if i>0 and curr_cost > prev_cost:
            if verbose: print("Learning rate too large....Objective function is increasing....\nReducing learning rate....")
            learning_rate = learning_rate*0.5
        prev_cost = curr_cost
        
        # Calculate gradients
        dW = (2/m) * np.dot(X.T, (y_pred - y))
        db = (2/m) * np.sum(y_pred - y)
        
        # Update weights and bias
        weights -= learning_rate * dW
        bias -= learning_rate * db
        
        if verbose:
            print(f"\nIteration {i}: Cost {curr_cost}  \nWeigths:\n{weights}  \nBias: {bias}")
    
    return weights, bias

In [19]:
# Reshape x to (1000, 2)
X1 = X.reshape(-1, 4)
# Reshape y to (1000, 1)
Y1 = Y.reshape(-1, 1)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X1 = sc.fit_transform(X1)

# Estimate weight and bias using gradient descent
estimated_weight, estimated_bias = linear_regression(X1, Y1)

print(f"\n\nEstimated Weight: \n{estimated_weight}\nEstimated Bias: {estimated_bias}")

# Make predictions using estimated parameters
Y_pred = np.dot(X1,estimated_weight) + estimated_bias

mse = mean_squared_error(Y1, Y_pred)
print(f"Mean Squared Error: {mse}")


Iteration 0: Cost 3418.864  
Weigths:
[[0.14359914]
 [0.3516449 ]
 [0.0184838 ]
 [0.01662507]]  
Bias: 1.1044960000000001

Iteration 1: Cost 3283.7473842136583  
Weigths:
[[0.28440716]
 [0.69628767]
 [0.03655121]
 [0.03281054]]  
Bias: 2.1869020800000003

Iteration 2: Cost 3153.9775428330718  
Weigths:
[[0.42247833]
 [1.03406777]
 [0.05421151]
 [0.04856731]]  
Bias: 3.2476600384000003

Iteration 3: Cost 3029.3428877531314  
Weigths:
[[0.55786583]
 [1.3651219 ]
 [0.07147375]
 [0.06390602]]  
Bias: 4.287202837632

Iteration 4: Cost 2909.640204361445  
Weigths:
[[0.69062183]
 [1.68958402]
 [0.08834678]
 [0.07883706]]  
Bias: 5.30595478087936

Iteration 5: Cost 2794.6743201495738  
Weigths:
[[0.82079747]
 [2.00758544]
 [0.10483929]
 [0.09337058]]  
Bias: 6.304331685261773

Iteration 6: Cost 2684.257786439753  
Weigths:
[[0.94844291]
 [2.31925483]
 [0.12095974]
 [0.10751649]]  
Bias: 7.282741051556537

Iteration 7: Cost 2578.210572708001  
Weigths:
[[1.07360734]
 [2.62471832]
 [0.13671642]


Iteration 274: Cost 4.298887592162883  
Weigths:
[[ 7.35459601]
 [17.5941633 ]
 [ 0.80714114]
 [ 0.56365006]]  
Bias: 55.01134567093145

Iteration 275: Cost 4.296763237915275  
Weigths:
[[ 7.35524607]
 [17.59556251]
 [ 0.8071623 ]
 [ 0.56359337]]  
Bias: 55.01561475751282

Iteration 276: Cost 4.294722908365354  
Weigths:
[[ 7.35588349]
 [17.59693391]
 [ 0.80718286]
 [ 0.56353735]]  
Bias: 55.019798462362566

Iteration 277: Cost 4.292763279927747  
Weigths:
[[ 7.35650853]
 [17.59827805]
 [ 0.80720284]
 [ 0.56348202]]  
Bias: 55.02389849311531

Iteration 278: Cost 4.290881160486946  
Weigths:
[[ 7.35712141]
 [17.59959546]
 [ 0.80722225]
 [ 0.56342736]]  
Bias: 55.027916523253005

Iteration 279: Cost 4.289073484196571  
Weigths:
[[ 7.35772239]
 [17.60088668]
 [ 0.8072411 ]
 [ 0.56337338]]  
Bias: 55.03185419278795

Iteration 280: Cost 4.28733730648439  
Weigths:
[[ 7.35831168]
 [17.60215223]
 [ 0.80725941]
 [ 0.56332005]]  
Bias: 55.035713108932185

Iteration 281: Cost 4.28566979925491  

In [9]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Assuming X and Y are already defined

# Create a linear regression model
model = LinearRegression()

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Fit the model to the standardized dataset
model.fit(X_scaled, Y)

# Make predictions on the same standardized dataset
predictions_scaled = model.predict(X_scaled)

# Evaluate the model
mse_scaled = mean_squared_error(Y, predictions_scaled)
print(f'Mean Squared Error (scaled): {mse_scaled}')

# Print the coefficients and intercept of the model
print('Coefficients (scaled):', model.coef_)
print('Intercept (scaled):', model.intercept_)

Mean Squared Error (scaled): 4.245176108662532
Coefficients (scaled): [ 7.38803993 17.66456609  0.80775508  0.55967342]
Intercept (scaled): 55.2248
