Pytorch Implementation defining a simplelayer and trying it out with different activation functions

In [None]:
import pandas as pd
import io
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

# Dataset
data = """Day No.,Humidity  (%),Temperature (degree Celsius),Rain
1,80,20,Yes
2,20,15,No
3,70,25,Yes
4,30,24,Yes
5,55,28,No
6,68,19,Yes
7,45,21,No
8,73,17,Yes
9,44,16,No
10,56,22,No"""

df = pd.read_csv(io.StringIO(data))
df['Rain'] = df['Rain'].map({'Yes': 1, 'No': 0})
X = torch.tensor(df[['Humidity  (%)', 'Temperature (degree Celsius)']].values, dtype=torch.float32)
y = torch.tensor(df['Rain'].values, dtype=torch.float32).unsqueeze(1)

# Add Gaussian noise to features (relates to prob PDF section)
mean = 0
std = 0.1
noise = torch.normal(mean, std, size=X.shape)
X_noisy = X + noise

# Split
train_idx, test_idx = train_test_split(range(len(X_noisy)), test_size=0.2, random_state=90)
X_train = X_noisy[train_idx]
y_train = y[train_idx]
X_test = X_noisy[test_idx]
y_test = y[test_idx]

In [None]:
class ExtendedLayer(nn.Module):
    def __init__(self, input_size=2, hidden_size=4, output_size=1, activation='linear'):
        super(ExtendedLayer, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.activation = activation.lower()

    def forward(self, x):
        x = self.fc1(x)
        if self.activation == 'relu':
            x = nn.ReLU()(x)
        elif self.activation == 'silu':
            x = nn.SiLU()(x)
        elif self.activation == 'sigmoid':
            x = nn.Sigmoid()(x)
        elif self.activation == 'tanh':
            x = nn.Tanh()(x)
        else:
            pass  # linear
        x = self.fc2(x)
        return x

def train_and_evaluate_ext(activation):
    model = ExtendedLayer(activation=activation)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    criterion = nn.BCELoss() if activation in ['sigmoid', 'tanh'] else nn.MSELoss()
    sigmoid_output = nn.Sigmoid()

    # Train for 200 epochs
    for epoch in range(200):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        if activation in ['linear', 'relu', 'silu']: # Apply sigmoid for these activations before BCELoss
            outputs = sigmoid_output(outputs)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

    # Evaluate
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test)
        if activation in ['linear', 'relu', 'silu']: # Apply sigmoid for these activations before calculating accuracy
            test_outputs = sigmoid_output(test_outputs)
        preds = (test_outputs > 0.5).float()
        acc = (preds == y_test).float().mean().item()

    # Eigenvalue analysis on first layer weights (symmetric approx via Gram matrix)
    W = model.fc1.weight.data
    gram = W.T @ W  # Quadratic form related (PSD if positive eigenvalues)
    eigenvalues = torch.linalg.eigvals(gram).real
    print(f"Eigenvalues for {activation}: {eigenvalues.tolist()}")

    return acc

# Train and get accuracies
linear_acc = train_and_evaluate_ext('linear')
relu_acc = train_and_evaluate_ext('relu')
silu_acc = train_and_evaluate_ext('silu')
#sigmoid_acc = train_and_evaluate_ext('sigmoid')#tanh_acc = train_and_evaluate_ext('tanh')

print(f"Linear Validation Accuracy: {linear_acc}")
print(f"ReLU Validation Accuracy: {relu_acc}")
print(f"SiLU Validation Accuracy: {silu_acc}")
#print(f"Sigmoid Validation Accuracy: {sigmoid_acc}")
#print(f"Tanh Validation Accuracy: {tanh_acc}")

Eigenvalues for linear: [0.5283904671669006, 1.1151251792907715]
Eigenvalues for relu: [0.26024460792541504, 0.767879843711853]
Eigenvalues for silu: [1.0480756759643555, 1.3192983865737915]
Linear Validation Accuracy: 0.5
ReLU Validation Accuracy: 1.0
SiLU Validation Accuracy: 0.5


Probabilistic Implementations

a. Expectations and Variance Computation

In [None]:
# Expectation E(X) for features (discrete approx)
exp_hum = torch.mean(X[:, 0]).item()
exp_temp = torch.mean(X[:, 1]).item()
print(f"E(Humidity): {exp_hum}, E(Temperature): {exp_temp}")

# Variance Var(X)
var_hum = torch.var(X[:, 0]).item()
var_temp = torch.var(X[:, 1]).item()
print(f"Var(Humidity): {var_hum}, Var(Temperature): {var_temp}")

# Covariance Cov(Humidity, Temperature)
cov_matrix = torch.cov(X.T)
print("Covariance matrix:\n", cov_matrix.tolist())

E(Humidity): 54.099998474121094, E(Temperature): 20.700000762939453
Var(Humidity): 378.54443359375, Var(Temperature): 17.344444274902344
Covariance matrix:
 [[378.54443359375, 13.922221183776855], [13.922221183776855, 17.344444274902344]]


b. Conditional Distributions and Independence Check

In [None]:
# Conditional expectation E(Rain|Humidity > mean)
high_hum_mask = X[:, 0] > exp_hum
cond_exp_rain_high_hum = torch.mean(y[high_hum_mask]).item()
print(f"E(Rain|Humidity > mean): {cond_exp_rain_high_hum}")

# Check approximate independence (cov ~0?)
cov_hum_rain = torch.cov(torch.cat((X[:, 0].unsqueeze(0), y.T), dim=0))[0, 1].item()
print(f"Cov(Humidity, Rain) ~0 for independence: {cov_hum_rain}")

E(Rain|Humidity > mean): 0.6666666865348816
Cov(Humidity, Rain) ~0 for independence: 5.611111164093018


c. Bayes Rule Approximation

In [None]:
# Priors
p_rain = torch.mean(y).item()
p_hum_high = (X[:, 0] > exp_hum).float().mean().item()

# Likelihood P(Rain|Humidity high)
p_rain_given_high_hum = torch.mean(y[high_hum_mask]).item()

# Posterior
p_high_hum_given_rain = (p_rain_given_high_hum * p_hum_high) / p_rain
print(f"P(High Humidity|Rain) via Bayes: {p_high_hum_given_rain}")

P(High Humidity|Rain) via Bayes: 0.8000000556310027


d. Sampling from Distributions (Bernoulli, Poisson, Gaussian)

In [None]:
# Bernoulli for simulated labels
bern_p = 0.3  # From dataset rain prob
bern_samples = torch.distributions.Bernoulli(probs=bern_p).sample((10, 1))
print("Bernoulli samples (sim rain):\n", bern_samples.tolist())

# Poisson for simulated event counts (e.g., rain days)
pois_lambda = 3.0
pois_samples = torch.distributions.Poisson(rate=pois_lambda).sample((10,))
print("Poisson samples:\n", pois_samples.tolist())

# Gaussian for noisy features
gauss_mean = torch.tensor([exp_hum, exp_temp])
gauss_cov = cov_matrix
gauss_dist = torch.distributions.MultivariateNormal(gauss_mean, gauss_cov)
gauss_samples = gauss_dist.sample((5,))
print("Gaussian samples:\n", gauss_samples.tolist())

Bernoulli samples (sim rain):
 [[0.0], [0.0], [0.0], [0.0], [0.0], [1.0], [0.0], [1.0], [0.0], [1.0]]
Poisson samples:
 [2.0, 3.0, 4.0, 3.0, 5.0, 1.0, 3.0, 2.0, 2.0, 7.0]
Gaussian samples:
 [[38.422908782958984, 19.886516571044922], [62.206932067871094, 17.692588806152344], [57.98236083984375, 22.325788497924805], [72.23552703857422, 22.396703720092773], [49.999847412109375, 18.946880340576172]]


e. Jensen's Inequality Demo

In [None]:
# Convex function f(x) = x^2, apply to humidity
hum_vals = X[:, 0]
exp_f = torch.mean(hum_vals ** 2).item()
f_exp = (torch.mean(hum_vals) ** 2).item()
print(f"Jensen: E(f(X))={exp_f} >= f(E(X))={f_exp}")

Jensen: E(f(X))=3267.5 >= f(E(X))=2926.809814453125


Linear Algebra Implementations

a. Trace, Determinant, and Inverse

In [None]:
# Cov matrix from prob section
print(f"Trace of cov: {torch.trace(cov_matrix).item()}")

# Determinant (for PSD check, |cov| >0)
det_cov = torch.det(cov_matrix).item()
print(f"Det of cov: {det_cov}")

# Inverse (if invertible)
if det_cov != 0:
    inv_cov = torch.inverse(cov_matrix)
    print("Inverse cov:\n", inv_cov.tolist())

Trace of cov: 395.8888854980469
Det of cov: 6371.814453125
Inverse cov:
 [[0.002722057281062007, -0.0021849696058779955], [-0.0021849696058779955, 0.05940920487046242]]


b. Norms and Rank

In [None]:
# L2 norm of first feature vector
l2_norm = torch.norm(X[0]).item()
print(f"L2 norm of first sample: {l2_norm}")

# L1 norm
l1_norm = torch.norm(X[0], p=1).item()
print(f"L1 norm: {l1_norm}")

# Matrix rank (full rank=2 for 2 features)
rank_X = torch.linalg.matrix_rank(X.T @ X).item()  # Gram for stability
print(f"Rank of data matrix: {rank_X}")

L2 norm of first sample: 82.46211242675781
L1 norm: 100.0
Rank of data matrix: 2


c. Eigenvalues/Eigenvectors and Quadratic Form

In [None]:
# Eigen decomp of cov (symmetric, relates to PSD)
eigvals, eigvecs = torch.linalg.eigh(cov_matrix)
print("Eigenvalues:\n", eigvals.tolist())
print("Eigenvectors:\n", eigvecs.tolist())

# Quadratic form x^T A x (A=cov, x=first sample)
quad_form = (X[0].T @ cov_matrix @ X[0]).item()
print(f"Quadratic form: {quad_form}")

Eigenvalues:
 [16.808616638183594, 379.08026123046875]
Eigenvectors:
 [[0.03845878317952156, -0.9992601275444031], [-0.9992601275444031, -0.03845878317952156]]
Quadratic form: 2474173.25


  quad_form = (X[0].T @ cov_matrix @ X[0]).item()


d. Matrix Transpose and Hadamard Product

In [None]:
# Transpose of X
X_transpose = X.T
print("X transpose shape:", X_transpose.shape)

# Hadamard product (element-wise) of first two rows
hadamard_prod = X[0] * X[1]
print("Hadamard product of rows 0 and 1:\n", hadamard_prod.tolist())

X transpose shape: torch.Size([2, 10])
Hadamard product of rows 0 and 1:
 [1600.0, 300.0]


e. Orthogonal Matrix Construction

In [None]:
# Generate a random matrix (e.g., from data covariance or weights)
random_mat = cov_matrix + torch.randn(2, 2)  # Perturb for diversity

# QR decomposition to get orthogonal Q
Q, R = torch.linalg.qr(random_mat)
print("Orthogonal Matrix Q:\n", Q.tolist())

# Verify orthogonality: Q^T Q should be identity
Q_t_Q = Q.T @ Q
print("Q^T Q (should be identity):\n", Q_t_Q.tolist())

Orthogonal Matrix Q:
 [[-0.9992358684539795, -0.039084240794181824], [-0.039084240794181824, 0.9992359280586243]]
Q^T Q (should be identity):
 [[0.9999999403953552, -3.725290298461914e-09], [-3.725290298461914e-09, 1.0]]
