In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import datasets
import seaborn as sns
from sklearn.metrics import mean_squared_error

In [None]:
iris = datasets.load_iris()
features = iris.data 
target = iris.target

sepal_length = np.array(features[:, 0])
petal_width = np.array(features[:, 3])

species_names = list()

for i in target:
    if i == 0:
        species_names.append('setosa')
    elif i == 1:
        species_names.append('versicolor')
    else:
        species_names.append('virginica')

In [None]:
sgd_loss = pd.DataFrame(columns=['intercept', 'coefficient', 'loss'])

def predict(intercept, coefficient, x):
    return intercept + coefficient * x

def sgd(x,y, alpha = 0.01):
    intercept, coefficient= 0.0, 0.0
    sum_error = 0.0
    for i in range(len(x)):
        prediction = predict(intercept, coefficient, x[i])
        error = prediction - y[i]
        sum_error = error ** 2
        intercept = intercept - alpha * error
        coefficient = coefficient - alpha * error * x[i]
        sgd_loss.loc[i] = [intercept, coefficient, sum_error]
    return intercept, coefficient

sgd_intercept, sgd_coefficient = sgd(x = features[:, 0], y = features[:, 3])

In [None]:
plt.plot(np.arange(1, len(features)+1), sgd_loss['loss'], color = 'r')
plt.title('SGD Loss Function Over Time')

In [None]:
sns.scatterplot(
    x = sepal_length, 
    y = petal_width, 
    hue = species_names
)

plt.plot(
    sepal_length, 
    sgd_intercept + sgd_coefficient * sepal_length, 
    color = 'r'
)

In [None]:
sgd_predictions = [sgd_intercept + sgd_coefficient * x for x in sepal_length]

sgd_mse = mean_squared_error(
    sgd_predictions,
    petal_width
)

print(f"SGD's MSE is {sgd_mse}")