In [2]:
!pip install numpy

Collecting numpy
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hDownloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: numpy
Successfully installed numpy-1.26.4


In [4]:
import numpy as np

# Given sentence
sentence = "The rewards of all your hard work in the garden are easy to see."

# Convert sentence to lowercase and remove spaces
sentence_cleaned = sentence.lower().replace(" ", "")

# Create a character to index and index to character mapping
chars = sorted(list(set(sentence_cleaned)))
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}

# Convert sentence to numerical representation
sentence_indices = [char_to_idx[char] for char in sentence_cleaned]

# Generate input-output pairs: use 3 characters to predict the 4th character
X = []
y = []
for i in range(len(sentence_indices) - 3):
    X.append(sentence_indices[i:i+3])
    y.append(sentence_indices[i+3])
X = np.array(X)
y = np.array(y).reshape(-1, 1)

# Add bias term to X
X_b = np.c_[np.ones((X.shape[0], 1)), X]

# MSE loss function
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

# Gradient of MSE loss function
def mse_gradient(X, y, theta):
    n = len(y)
    return 2/n * X.T.dot(X.dot(theta) - y)

# Gradient Descent algorithm
def gradient_descent(X, y, learning_rate=0.001, n_iterations=1000):
    theta = np.random.randn(4, 1) # Random initialization for 3 characters + bias
    for iteration in range(n_iterations):
        gradients = mse_gradient(X, y, theta)
        theta -= learning_rate * gradients
    return theta

# Stochastic Gradient Descent algorithm
def stochastic_gradient_descent(X, y, learning_rate=0.001, n_epochs=50):
    theta = np.random.randn(4, 1) # Random initialization for 3 characters + bias
    m = len(y)
    for epoch in range(n_epochs):
        for i in range(m):
            random_index = np.random.randint(m)
            xi = X[random_index:random_index+1]
            yi = y[random_index:random_index+1]
            gradients = mse_gradient(xi, yi, theta)
            theta -= learning_rate * gradients
    return theta

# Mini-Batch Gradient Descent algorithm
def mini_batch_gradient_descent(X, y, learning_rate=0.001, n_epochs=50, batch_size=20):
    theta = np.random.randn(4, 1) # Random initialization for 3 characters + bias
    m = len(y)
    for epoch in range(n_epochs):
        shuffled_indices = np.random.permutation(m)
        X_shuffled = X[shuffled_indices]
        y_shuffled = y[shuffled_indices]
        for i in range(0, m, batch_size):
            xi = X_shuffled[i:i+batch_size]
            yi = y_shuffled[i:i+batch_size]
            gradients = mse_gradient(xi, yi, theta)
            theta -= learning_rate * gradients
    return theta

# Run Gradient Descent
theta_gd = gradient_descent(X_b, y)
print("Theta using Gradient Descent:\n", theta_gd)

# Run Stochastic Gradient Descent
theta_sgd = stochastic_gradient_descent(X_b, y)
print("Theta using Stochastic Gradient Descent:\n", theta_sgd)

# Run Mini-Batch Gradient Descent
theta_mbgd = mini_batch_gradient_descent(X_b, y)
print("Theta using Mini-Batch Gradient Descent:\n", theta_mbgd)


Theta using Gradient Descent:
 [[ 2.17374796]
 [-0.05705186]
 [ 0.47123164]
 [ 0.22120924]]
Theta using Stochastic Gradient Descent:
 [[ 3.37094979]
 [-0.1574335 ]
 [ 0.57592458]
 [ 0.36825206]]
Theta using Mini-Batch Gradient Descent:
 [[-1.53322242]
 [ 0.07631192]
 [ 0.61436968]
 [ 0.30174171]]
