In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
FileData = pd.read_csv("data2.txt",header = None)

# Plotting the data (i.e., x-axis for the 1st column, y-axis for the 2nd column)

In [None]:
FileData.columns=["x","y"]
FileData.plot(kind="scatter",x='x',y='y',color='red')

In [None]:
FileData

# Normal Equation to find coefficients

In [None]:
X = FileData.iloc[:,0].values
Y = FileData.iloc[:,1].values
Xbias = np.c_[np.ones((97,1)),X]

In [None]:
thetaCoefficients = np.linalg.inv(Xbias.T @ Xbias) @ Xbias.T @ Y
print(thetaCoefficients)

In [None]:
XTest = np.array([[2],[30]])
XTestBias = np.c_[np.ones((2,1)), XTest]

Plotting the normal equation line, on the dataset, using the calculated parameters

In [None]:
YPredict = XTestBias @ thetaCoefficients
plt.plot(XTest,YPredict,'b')
plt.scatter(X,Y, color = '#88c999')

# Spliting dataset into 80% training data and 20% testing data

In [None]:
trainingData = FileData.sample(frac=0.8, random_state = 30)
testingData = FileData.drop(trainingData.index)
print(len(trainingData),len(testingData))

In [None]:
XTrain = trainingData["x"]
XTest = testingData["x"]
YTraining = trainingData["y"]
YTesting = testingData["y"]
XTrainBias = np.c_[np.ones((len(XTrain), 1)), XTrain]
XTestBias = np.c_[np.ones((len(XTest), 1)), XTest]

Below function performs batch gradient descent which uses the training data. The gradient for updating theta is calculated using the training data. The mse for both the training data and the test data is then determined using the new theta.

In [None]:
def BatchGradientDescent(LearningRate, theta, iterations, XTrain, XTest, YTraining, YTesting):
    theta = random_theta
    for i in range(iterations):
        j = (2 / numberOfTrainSamples)
        gradTrain = j * XTrain.T @ (XTrain @ theta - YTraining)
        theta = theta - LearningRate * gradTrain
        MSETraining = (1 / numberOfTrainSamples) * ((YTraining - XTrain @ theta).T @ (YTraining - XTrain @ theta))
        MSEListTraining.append(float(MSETraining))
        MSETesting = (1 / numberOfTestSamples) * ((YTesting - XTest @ theta).T @ (YTesting - XTest @ theta))
        MSEListTesting.append(float(MSETesting))
    return theta, MSEListTraining, MSEListTesting

In [None]:
YTraining = YTraining.values.tolist()
YTesting = YTesting.values.tolist()
YTraining = np.array(YTraining)
YTesting = np.array(YTesting)
YTraining = YTraining.reshape(-1, 1)  
YTesting = YTesting.reshape(-1, 1)    
LearningRate = 0.01
iterations = 500
numberOfTrainSamples = len(XTrain)
numberOfTestSamples = len(XTest)
MSEListTraining = []
MSEListTesting = []
random_theta = np.random.randn(2, 1)


theta, MSEListTraining, MSEListTesting = BatchGradientDescent(LearningRate, random_theta, iterations, XTrainBias, XTestBias, YTraining, YTesting)

MSE vs Iterations in Batch training descent for training data

In [None]:
plt.plot(MSEListTraining)
plt.title("MSE vs Iterations for training data")

MSE vs Iterations in Batch training descent for testing data

In [None]:
plt.plot(MSEListTesting)
plt.title("MSE vs Iterations for testing data")

Below is the function that performs Stochastic gradient descent

In [None]:
def StochasticGradientDescent(LearningRate, theta, epochs, XTrain, XTest, YTraining, YTesting):
    theta = random_theta
    for epoch in range(epochs):
        MSETrainSum = 0
        for i in range(num_samples_train):
            random_index = np.random.randint(num_samples_train)
            trainXi = XTrain[random_index : random_index + 1]
            trainYi = YTraining[random_index : random_index + 1]
            grad = (trainXi @ theta - trainYi)
            gradients = 2 * (trainXi.T @ grad)
            theta = theta - LearningRate * gradients
            MSETrainSum += ((trainYi - (trainXi @ theta)) ** 2)
            
        MSETraining = MSETrainSum / num_samples_train
        MSEListTraining.append(float(MSETraining))
        
        MSETesting = (1 / numberOfTestSamples) * ((YTesting - XTest @ theta).T @ (YTesting - XTest @ theta))
        MSEListTesting.append(float(MSETesting))
        
    return theta, MSEListTraining, MSEListTesting

In [None]:
epochs = 30
LearningRate = 0.01
num_samples_train = len(XTrain)
num_samples_test = len(XTest)
MSEListTraining = []
MSEListTesting = []

theta, MSEListTraining, MSEListTesting = StochasticGradientDescent(LearningRate, random_theta, epochs, XTrainBias, XTestBias, YTraining, YTesting)

In [None]:
plt.plot(MSEListTraining)
plt.title("MSE vs Iterations for training data in Stochastic", color ="green")

In [None]:
plt.plot(MSEListTesting)
plt.title("MSE vs Iterations for testing data in Stochastic", color ="green")

In [None]:
LearningRate_list = [0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01]
MSEListBatch = []
MSEListStochastic = []


In [None]:
for LearningRate in LearningRate_list:
    MSEListTraining_batch = []
    
    _, MSEListTraining_batch, _ = BatchGradientDescent(LearningRate, random_theta, iterations, XTrainBias, XTestBias, YTraining, YTesting)
    
    MSEListBatch.append(MSEListTraining_batch[-1])
    

    
for LearningRate in LearningRate_list:
    MSEListTraining_stochastic = []
    
    _, MSEListTraining_stochastic, _ = StochasticGradientDescent(LearningRate, random_theta, epochs, XTrainBias, XTestBias, YTraining, YTesting)
    
    MSEListStochastic.append(MSEListTraining_stochastic[-1])

In [None]:
plt.plot(LearningRate_list, MSEListBatch)
plt.title("Learning rate vs MSE (Batch)", color ="green")

In [None]:
plt.plot(LearningRate_list, MSEListStochastic)
plt.title("Learning rate vs MSE (Stochastic)", color ="green")

### 3B) We can observe that the batch gradient descent has more accuracy as the error rate decreases smoothly while in case of stochastic,it is less reliable but it converges faster as compared to batch gradient descent. In case of batch gradient regarding termination condition, it smooths out but after a certain point it becomes flat so we can terminate it at that point. While in case of Stochastic, at start, it has a sharp drop. After that point we can terminate.