In [27]:

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.metrics import mean_squared_error, r2_score




In [43]:


df = pd.read_csv("Penicillin.csv")
df = df.drop(columns=[col for col in df.columns if "Unnamed" in col])

target_col = 'Penicillin concentration(P:g/L)'
batch_col = 'Batch ID'
time_col = 'Time (h)'
input_cols = [col for col in df.columns if col not in [target_col, batch_col, time_col]]


df



Unnamed: 0,Time (h),Aeration rate(Fg:L/h),Agitator RPM(RPM:RPM),Sugar feed rate(Fs:L/h),Acid flow rate(Fa:L/h),Base flow rate(Fb:L/h),Heating/cooling water flow rate(Fc:L/h),Heating water flow rate(Fh:L/h),Water for injection/dilution(Fw:L/h),Air head pressure(pressure:bar),...,Oxygen in percent in off-gas(O2:O2 (%)),Carbon evolution rate(CER:g/h),Ammonia shots(NH3_shots:kgs),Fault reference(Fault_ref:Fault ref),0 - Recipe driven 1 - Operator controlled(Control_ref:Control ref),1- No Raman spec,1-Raman spec recorded,2-PAT control(PAT_ref:PAT ref),Batch reference(Batch_ref:Batch ref),Batch ID
0,0.2,30,100,8,0.0000,30.1180,9.8335,0.00010,0,0.6,...,0.19595,0.034045,0,0,0,1,1,1,0,0.0
1,0.4,30,100,8,0.0000,51.2210,18.1550,0.00010,0,0.6,...,0.20390,0.038702,0,0,0,1,1,1,0,0.0
2,0.6,30,100,8,0.0000,54.3020,9.5982,0.00010,0,0.6,...,0.20575,0.040240,0,0,0,1,1,1,0,0.0
3,0.8,30,100,8,0.0000,37.8160,4.3395,0.00010,0,0.6,...,0.20602,0.041149,0,0,0,1,1,1,0,0.0
4,1.0,30,100,8,0.5181,18.9080,1.1045,0.00010,0,0.6,...,0.20589,0.041951,0,0,0,1,1,1,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113930,229.2,65,100,80,0.0000,12.9000,33.5860,5.93590,250,0.9,...,0.19996,0.995500,0,0,0,1,100,100,1,43854.0
113931,229.4,65,100,80,0.0000,10.0470,10.0760,98.83400,250,0.9,...,0.20002,0.995220,0,0,0,1,100,100,1,44814.0
113932,229.6,65,100,80,0.0000,7.3609,69.3450,9.88340,250,0.9,...,0.20006,0.994940,0,0,0,1,100,100,1,44273.0
113933,229.8,65,100,80,0.0000,6.2794,38.6100,0.98834,250,0.9,...,0.20006,0.994660,0,0,0,1,100,100,1,44733.0


In [29]:
# Extracting & Simulate Batches
first_batch_id = df[batch_col].value_counts().idxmax()
batch_df = df[df[batch_col] == first_batch_id].sort_values(by=time_col).head(1000)

X_base = batch_df[input_cols].values
y_base = batch_df[target_col].values

def simulate_batches(X, y, n_batches=50, noise=0.10):
    X_sim = np.array([X * (1 + np.random.normal(0, noise, X.shape)) for _ in range(n_batches)])
    y_sim = np.array([y * (1 + np.random.normal(0, noise, y.shape)) for _ in range(n_batches)])
    return X_sim, y_sim

X_batches, y_batches = simulate_batches(X_base, y_base)

In [30]:
#  Time-Slice Unfolding
def time_slice_unfold(X_batches, y_batches, window=4):
    X_unfolded, y_unfolded = [], []
    for b in range(X_batches.shape[0]):
        for t in range(window - 1, X_batches.shape[1]):
            X_win = X_batches[b, t - window + 1: t + 1].flatten()
            y_val = y_batches[b, t]
            X_unfolded.append(X_win)
            y_unfolded.append(y_val)
    return np.array(X_unfolded), np.array(y_unfolded)

X_unfolded, y_unfolded = time_slice_unfold(X_batches, y_batches)

In [32]:
#  Normalize Target Features & Feature Selection
y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y_unfolded.reshape(-1, 1)).flatten()

# Standardizing inputs
X_scaler = StandardScaler()
X_unfolded_scaled = X_scaler.fit_transform(X_unfolded)

# Feature selection
selector = SelectKBest(score_func=f_regression, k=30)
X_selected = selector.fit_transform(X_unfolded_scaled, y_scaled)

In [41]:

# DQRRL Model

class DQRRLModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.Tanh(),
            nn.Linear(64, 32),
            nn.Tanh(),
            nn.Linear(32, 16),
            nn.Tanh(),
            nn.Linear(16, 1)  # regression head
        )

    def forward(self, x):
        return self.net(x)



In [42]:
 ###10 Fold Cross-Validation

kf = KFold(n_splits=10, shuffle=True, random_state=42)
rmse_scores, r2_scores = [], []

for fold, (train_idx, test_idx) in enumerate(kf.split(X_selected)):
    print(f"\n Fold {fold + 1}/10")

    X_train, X_test = X_selected[train_idx], X_selected[test_idx]
    y_train, y_test = y_scaled[train_idx], y_scaled[test_idx]

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test.reshape(-1, 1), dtype=torch.float32)

    model = DQRRLModel(X_train.shape[1])
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.MSELoss()

    # Training
    for epoch in range(50):
        model.train()
        preds = model(X_train_tensor)
        loss = loss_fn(preds, y_train_tensor)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Model Evaluation
    model.eval()
    with torch.no_grad():
        y_pred_scaled = model(X_test_tensor).numpy()
        y_true_scaled = y_test_tensor.numpy()

    y_pred_unscaled = y_scaler.inverse_transform(y_pred_scaled)
    y_true_unscaled = y_scaler.inverse_transform(y_true_scaled)

    rmse = np.sqrt(mean_squared_error(y_true_unscaled, y_pred_unscaled))
    r2 = r2_score(y_true_unscaled, y_pred_unscaled)
    rmse_scores.append(rmse)
    r2_scores.append(r2)

    print(f" RMSE: {rmse:.6f}, R²: {r2:.4f}")



print("\n Final 10-Fold DQRRL Results")
print(f" Average RMSE: {np.mean(rmse_scores):.6f}")
print(f"Average R²:   {np.mean(r2_scores):.4f}")





 Fold 1/10
 RMSE: 0.000134, R²: 0.0847

 Fold 2/10
 RMSE: 0.000136, R²: 0.1092

 Fold 3/10
 RMSE: 0.000142, R²: 0.1000

 Fold 4/10
 RMSE: 0.000143, R²: 0.0991

 Fold 5/10
 RMSE: 0.000140, R²: 0.1000

 Fold 6/10
 RMSE: 0.000139, R²: 0.0973

 Fold 7/10
 RMSE: 0.000136, R²: 0.0889

 Fold 8/10
 RMSE: 0.000148, R²: 0.1015

 Fold 9/10
 RMSE: 0.000128, R²: 0.0753

 Fold 10/10
 RMSE: 0.000133, R²: 0.0891

 Final 10-Fold DQRRL Results
 Average RMSE: 0.000138
Average R²:   0.0945
