In [None]:
def my_function(x):
    return x**2 + 2*x + 1

result = my_function(3)
print(result)

16


In [None]:
import sympy as sp

x = sp.symbols('x')
expression = x**2 + 2*x + 1
print("Expression:", expression)

Expression: x**2 + 2*x + 1


In [None]:
# Differentiate with respect to x
derivative = sp.diff(expression, x)
print("Derivative:", derivative)

Derivative: 2*x + 2


In [None]:
import numpy as np

def f(x):
    return x**2 + 2*x + 1

# Define a small step size
h = 1e-5
x0 = 3.0
numerical_derivative = (f(x0 + h) - f(x0)) / h
print("Numerical Derivative:", numerical_derivative)

Numerical Derivative: 8.000009999875601


In [None]:
import jax.numpy as jnp
from jax import grad

def f(x):
    return x**2 + 2*x + 1

# Compute derivative using JAX's grad
dfdx = grad(f)
print("Automatic Differentiation with JAX:", dfdx(3.0))

Automatic Differentiation with JAX: 8.0


In [None]:
import numpy as np

# Function: f(x) = x^2
def f(x):
    return x**2

# Derivative: f'(x) = 2x
def df(x):
    return 2*x

# Gradient Descent parameters
learning_rate = 0.1
x = 10  # Starting point
tolerance = 1e-6
max_iterations = 1000

# Gradient Descent loop
for i in range(max_iterations):
    gradient = df(x)
    new_x = x - learning_rate * gradient  # Update rule

    if abs(new_x - x) < tolerance:  # Convergence check
        break
    x = new_x

print("Global minimum found at x =", x)

Global minimum found at x = 4.017345110647478e-06


In [None]:
import numpy as np

# Function: f(x) = cos(x)
def f(x):
    return np.cos(x)

# Derivative: f'(x) = -sin(x)
def df(x):
    return -np.sin(x)

# Gradient Descent parameters
learning_rate = 0.1
x = 2  # Starting point near a local minimum
tolerance = 1e-6
max_iterations = 1000

# Gradient Descent loop
for i in range(max_iterations):
    gradient = df(x)
    new_x = x - learning_rate * gradient  # Update rule

    if abs(new_x - x) < tolerance:  # Convergence check
        break
    x = new_x

print("Local minimum found at x =", x)

Local minimum found at x = 3.141582832390514


In [None]:
import numpy as np

# Function: f(x, y) = x^2 + y^2
def f(x, y):
    return x**2 + y**2

# Gradient of f(x, y): df/dx = 2x, df/dy = 2y
def gradient(x, y):
    return np.array([2*x, 2*y])

# Gradient Descent parameters
learning_rate = 0.1
point = np.array([10, 5])  # Initial starting point (x0, y0)
tolerance = 1e-6
max_iterations = 1000

# Gradient Descent loop
for i in range(max_iterations):
    grad = gradient(point[0], point[1])
    new_point = point - learning_rate * grad  # Update rule

    if np.linalg.norm(new_point - point) < tolerance:  # Convergence check
        break
    point = new_point

print("Global minimum found at (x, y) =", point)

Global minimum found at (x, y) = [4.01734511e-06 2.00867256e-06]


In [None]:
import numpy as np

# Rosenbrock function: f(x, y) = (1 - x)^2 + 100(y - x^2)^2
def f(x, y):
    return (1 - x)**2 + 100 * (y - x**2)**2

# Gradient of the Rosenbrock function
def gradient(x, y):
    df_dx = -2*(1 - x) - 400*x*(y - x**2)
    df_dy = 200*(y - x**2)
    return np.array([df_dx, df_dy])

# Gradient Descent parameters
learning_rate = 0.001
point = np.array([0, 0])  # Initial starting point (x0, y0)
tolerance = 1e-6
max_iterations = 10000

# Gradient Descent loop
for i in range(max_iterations):
    grad = gradient(point[0], point[1])
    new_point = point - learning_rate * grad  # Update rule

    if np.linalg.norm(new_point - point) < tolerance:  # Convergence check
        break
    point = new_point

print("Local minimum found at (x, y) =", point)

Local minimum found at (x, y) = [0.99440095 0.98881076]


In [None]:
import numpy as np
# ReLU activation function
def relu(Z):
    return np.maximum(0, Z)

# Derivative of ReLU for backpropagation
def relu_derivative(Z):
    return Z > 0

# Softmax activation function for the output layer (classification)
def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z))  # For numerical stability
    return exp_Z / exp_Z.sum(axis=1, keepdims=True)

In [None]:
# Forward propagation step
def forward_propagation(X, W1, b1, W2, b2):
    Z1 = np.dot(X, W1) + b1  # Hidden layer linear step
    A1 = relu(Z1)            # Apply ReLU activation
    Z2 = np.dot(A1, W2) + b2  # Output layer linear step
    Y_hat = softmax(Z2)       # Apply softmax to get probabilities
    return Z1, A1, Z2, Y_hat

In [None]:
# Backward propagation step
def backward_propagation(X, Y, Z1, A1, Z2, Y_hat, W2):
    m = X.shape[0]  # Number of samples

    # Gradient of the loss with respect to Z2 (output layer)
    dZ2 = Y_hat - Y

    # Gradient with respect to W2 and b2
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    # Gradient of the activation with respect to Z1 (hidden layer)
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)  # ReLU derivative for backprop

    # Gradient with respect to W1 and b1
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    return dW1, db1, dW2, db2

In [None]:
# Training function
def train(X, Y, W1, b1, W2, b2, learning_rate, num_epochs):
    for i in range(num_epochs):
        # Forward propagation
        Z1, A1, Z2, Y_hat = forward_propagation(X, W1, b1, W2, b2)

        # Compute the loss (cross-entropy)
        loss = -np.sum(Y * np.log(Y_hat + 1e-8)) / X.shape[0]
        if i % 100 == 0:
            print(f'Epoch {i}, Loss: {loss}')

        # Backward propagation
        dW1, db1, dW2, db2 = backward_propagation(X, Y, Z1, A1, Z2, Y_hat, W2)

        # Update weights and biases
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2

    return W1, b1, W2, b2

In [None]:
# XOR problem dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Input (4 samples, 2 features)
Y = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])  # Output (4 samples, 2 classes, one-hot encoded)


In [None]:
# Initialize weights and biases randomly
np.random.seed(42)  # Seed for reproducibility

# 2 input features, 3 neurons in hidden layer, 2 output classes
W1 = np.random.randn(2, 3) * 0.01
b1 = np.zeros((1, 3))
W2 = np.random.randn(3, 2) * 0.01
b2 = np.zeros((1, 2))

learning_rate = 0.01
num_epochs = 10000

In [None]:
# Train the model
W1, b1, W2, b2 = train(X, Y, W1, b1, W2, b2, learning_rate, num_epochs)

Epoch 0, Loss: 0.6931471686966064
Epoch 100, Loss: 0.6931459426633895
Epoch 200, Loss: 0.6931445641746916
Epoch 300, Loss: 0.6931428589320954
Epoch 400, Loss: 0.6931406117127277
Epoch 500, Loss: 0.6931381335181085
Epoch 600, Loss: 0.6931353477214024
Epoch 700, Loss: 0.6931316969565677
Epoch 800, Loss: 0.6931268983807521
Epoch 900, Loss: 0.6931205747652627
Epoch 1000, Loss: 0.693112552382283
Epoch 1100, Loss: 0.6931025982437519
Epoch 1200, Loss: 0.6930898511937645
Epoch 1300, Loss: 0.6930735125467349
Epoch 1400, Loss: 0.6930553480006876
Epoch 1500, Loss: 0.6930329432957081
Epoch 1600, Loss: 0.69300553801811
Epoch 1700, Loss: 0.6929712522908049
Epoch 1800, Loss: 0.6929288512136436
Epoch 1900, Loss: 0.6928755963554181
Epoch 2000, Loss: 0.6928099640021175
Epoch 2100, Loss: 0.6927280922801112
Epoch 2200, Loss: 0.6926267088683247
Epoch 2300, Loss: 0.6924989128430565
Epoch 2400, Loss: 0.6923410733283064
Epoch 2500, Loss: 0.6921443259407292
Epoch 2600, Loss: 0.691900370473917
Epoch 2700, Loss:

In [None]:
# Prediction function
def predict(X, W1, b1, W2, b2):
    _, _, _, Y_hat = forward_propagation(X, W1, b1, W2, b2)
    predictions = np.argmax(Y_hat, axis=1)
    return predictions

# Test the model on the training data
predictions = predict(X, W1, b1, W2, b2)
print("Predictions:", predictions)

# Convert one-hot encoded Y to label format
true_labels = np.argmax(Y, axis=1)
print("True Labels:", true_labels)

Predictions: [0 0 1 0]
True Labels: [0 1 1 0]
