In [13]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [14]:
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load the data from the Excel file
file_path = "C:/Users/rileybla/Desktop/CIE500_SP2025/week 10/BSA Analysis Results 2023-2025.xlsx"
iris = pd.read_excel(file_path, sheet_name="BSA Master Log")

# Select columns of interest
columns_of_interest = ["Turbidity (NTU)", "TSS (mg solids/L)", "Calc. FC (CFU/100 mL)"]
iris = iris.dropna(subset=columns_of_interest)


# Log-transform target variable
iris["Calc. FC (CFU/100 mL)"] = np.log1p(iris["Calc. FC (CFU/100 mL)"])

# Normalize features and target using StandardScaler
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X = scaler_X.fit_transform(iris[["Turbidity (NTU)", "TSS (mg solids/L)"]])  # Features
y = scaler_y.fit_transform(iris[["Calc. FC (CFU/100 mL)"]])  # Target

# Convert to PyTorch tensors with consistent dtype (float32)
x = torch.tensor(X, dtype=torch.float32, requires_grad=False)  # Features
y = torch.tensor(y.flatten(), dtype=torch.float32, requires_grad=False)  # Target

# Initialize weights and bias with consistent dtype
w = torch.tensor([1.0, 1.0], dtype=torch.float32, requires_grad=True)  # Weights for two features
b = torch.tensor([0.0], dtype=torch.float32, requires_grad=True)  # Bias

print(f'Before optimization, w is {w.tolist()}')
print(f'Before optimization, b is {b.item()}')

# Define optimizer
optimizer = torch.optim.Adam([w, b], lr=0.01)

# Training loop
epochs = 500
for epoch in range(epochs):
    optimizer.zero_grad()  # Reset gradients
    
    # Forward pass: calculate predictions using matrix multiplication
    y_pred = torch.matmul(x, w) + b
    
    # Compute loss (Mean Squared Error)
    loss = ((y_pred - y) ** 2).mean()
    
    # Backward pass: compute gradients
    loss.backward()
    
    # Update weights and bias using optimizer
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Print final values of weights and bias
print(f'After optimization, w is {w.tolist()}')
print(f'After optimization, b is {b.item()}')

# Predictions using optimized parameters
with torch.no_grad():
    y_pred_final = torch.matmul(x, w) + b

print("First few predictions:", y_pred_final[:5].numpy())
print("First few actual values:", y[:5].numpy())


Before optimization, w is [1.0, 1.0]
Before optimization, b is 0.0
Epoch [10/500], Loss: 2.0397
Epoch [20/500], Loss: 1.6624
Epoch [30/500], Loss: 1.3642
Epoch [40/500], Loss: 1.1407
Epoch [50/500], Loss: 0.9818
Epoch [60/500], Loss: 0.8747
Epoch [70/500], Loss: 0.8064
Epoch [80/500], Loss: 0.7652
Epoch [90/500], Loss: 0.7415
Epoch [100/500], Loss: 0.7286
Epoch [110/500], Loss: 0.7217
Epoch [120/500], Loss: 0.7180
Epoch [130/500], Loss: 0.7157
Epoch [140/500], Loss: 0.7142
Epoch [150/500], Loss: 0.7130
Epoch [160/500], Loss: 0.7120
Epoch [170/500], Loss: 0.7111
Epoch [180/500], Loss: 0.7103
Epoch [190/500], Loss: 0.7095
Epoch [200/500], Loss: 0.7089
Epoch [210/500], Loss: 0.7083
Epoch [220/500], Loss: 0.7078
Epoch [230/500], Loss: 0.7074
Epoch [240/500], Loss: 0.7070
Epoch [250/500], Loss: 0.7067
Epoch [260/500], Loss: 0.7064
Epoch [270/500], Loss: 0.7062
Epoch [280/500], Loss: 0.7060
Epoch [290/500], Loss: 0.7058
Epoch [300/500], Loss: 0.7057
Epoch [310/500], Loss: 0.7056
Epoch [320/5

In [None]:
# are there more related variables to improve prediction
# Load the data from the Excel file
file_path = "C:/Users/rileybla/Desktop/CIE500_SP2025/week 10/BSA Analysis Results 2023-2025.xlsx"
iris = pd.read_excel(file_path, sheet_name="BSA Master Log")

# Select columns of interest
columns_of_interest = ["Turbidity (NTU)", "TSS (mg solids/L)", "Calc. FC (CFU/100 mL)"]
iris = iris.dropna(subset=columns_of_interest)

# Log-transform target variable
iris["Calc. FC (CFU/100 mL)"] = np.log1p(iris["Calc. FC (CFU/100 mL)"]) # test if need to do this beause fit_transform may take care of this

# Normalize features and target using StandardScaler
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X = scaler_X.fit_transform(iris[["Turbidity (NTU)", "TSS (mg solids/L)"]])  # Features    reverse function of this check online
y = scaler_y.fit_transform(iris[["Calc. FC (CFU/100 mL)"]])  # Target


In [16]:

# This format scaler_y.inverse_transform(output) and include log undo
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
x_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train.flatten(), dtype=torch.float32)
x_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test.flatten(), dtype=torch.float32)


In [None]:
#increase number of neurons and layers for more accuracy
# Define a complex neural network model
class ComplexNN(torch.nn.Module):
    def __init__(self):
        super(ComplexNN, self).__init__()
        self.hidden1 = torch.nn.Linear(2, 30)  # First hidden layer with 20 neurons 
        self.hidden2 = torch.nn.Linear(30, 10)  # Second hidden layer with 10 neurons
        self.output = torch.nn.Linear(10, 1)   # Output layer

    def forward(self, x):
        x = torch.relu(self.hidden1(x))       # ReLU activation for first hidden layer
        x = torch.relu(self.hidden2(x))       # ReLU activation for second hidden layer
        x = self.output(x)                    # Output layer
        return x

# Initialize model and optimizer
model = ComplexNN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()  # Mean Squared Error Loss RMSE


In [None]:
# also split into multiple batches. train iteratively use batch size: number of training example in one forward/backward pass
# Training loop
model.train()  # Set model to training mode all the model can be trained

epochs = 600
for epoch in range(epochs):
    
    optimizer.zero_grad()  # Reset gradients
    
    # Forward pass: calculate predictions on training set
    y_pred_train = model(x_train)
    
    # Compute loss on training set
    loss_train = criterion(y_pred_train.flatten(), y_train)
    
    # Backward pass: compute gradients
    loss_train.backward()
    
    # Update weights using optimizer
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Training Loss: {loss_train.item():.4f}')


Epoch [10/600], Training Loss: 0.2179
Epoch [20/600], Training Loss: 0.2064
Epoch [30/600], Training Loss: 0.2063
Epoch [40/600], Training Loss: 0.2054
Epoch [50/600], Training Loss: 0.2040
Epoch [60/600], Training Loss: 0.2032
Epoch [70/600], Training Loss: 0.2031
Epoch [80/600], Training Loss: 0.2031
Epoch [90/600], Training Loss: 0.2011
Epoch [100/600], Training Loss: 0.2008
Epoch [110/600], Training Loss: 0.1990
Epoch [120/600], Training Loss: 0.1986
Epoch [130/600], Training Loss: 0.1980
Epoch [140/600], Training Loss: 0.2016
Epoch [150/600], Training Loss: 0.1979
Epoch [160/600], Training Loss: 0.1991
Epoch [170/600], Training Loss: 0.1987
Epoch [180/600], Training Loss: 0.1978
Epoch [190/600], Training Loss: 0.1973
Epoch [200/600], Training Loss: 0.1948
Epoch [210/600], Training Loss: 0.1961
Epoch [220/600], Training Loss: 0.1948
Epoch [230/600], Training Loss: 0.1966
Epoch [240/600], Training Loss: 0.1933
Epoch [250/600], Training Loss: 0.1930
Epoch [260/600], Training Loss: 0.

In [19]:

# Evaluate model on test set
model.eval()  # Set model to evaluation mode

with torch.no_grad():
    y_pred_test = model(x_test).flatten()  # Predictions on test set
    
    # Compute loss on test set
    loss_test = criterion(y_pred_test, y_test)
    print(f"Test Loss: {loss_test.item():.4f}")


Test Loss: 1.9675


In [20]:

# Print first few predictions and actual values from test set (scaled values)
print("First few predictions:", y_pred_test[:5].numpy())
print("First few actual values:", y_test[:5].numpy())


First few predictions: [ 4.2992525  -0.38058853  0.20363145  0.94705623 -0.75729764]
First few actual values: [ 1.2027607  -0.30847684  1.3158773   1.2296234  -1.3362627 ]


In [21]:

# Reverse scaling for predictions and actual values to interpret results in original scale
y_pred_scaled = scaler_y.inverse_transform(y_pred_test.numpy().reshape(-1, 1))
y_actual_scaled = scaler_y.inverse_transform(y_test.numpy().reshape(-1, 1))

# Reverse log1p transform using exponential
y_pred_original = np.expm1(y_pred_scaled)  # Undo log1p: e^(x) - 1
y_actual_original = np.expm1(y_actual_scaled)

print("First few predictions (original scale):", y_pred_original[:5].flatten())
print("First few actual values (original scale):", y_actual_original[:5].flatten())


First few predictions (original scale): [1.1171509e+13 2.1245959e+03 3.4751078e+04 1.2166596e+06 3.4975797e+02]
First few actual values (original scale): [4.1333348e+06 2.9999990e+03 7.0999980e+06 4.7000005e+06 2.0999996e+01]
