In [24]:
%load_ext kedro.ipython
df = catalog.load("s3_conc_aligned_df")

The kedro.ipython extension is already loaded. To reload it, use:
  %reload_ext kedro.ipython


In [25]:
df.head()

Unnamed: 0,exp_no,timestamp_bin,A1_Resistance,A1_Resistance_diff,A1_Resistance_norm,A1_Sensor,A1_Sensor_diff,A1_Sensor_norm,SHT40_Humidity,SHT40_temp,index,resistance_ratio,ace_conc,expo_time
0,0,0.0,720650.75,0.0,1.0,4518.0,0.0,1.0,42.835,29.43,4481.5,1.768473,3.033e-07,3.0
1,0,1.0,720361.875,0.0,0.999599,4519.5,0.0,1.000332,42.84,29.435,4483.5,1.768473,3.033e-07,3.0
2,0,2.0,719688.47,-673.405,0.998665,4523.0,3.5,1.001107,42.845,29.445,4485.5,1.768473,3.033e-07,3.0
3,0,3.0,719015.94,-1634.81,0.997731,4526.5,8.5,1.001881,42.83,29.435,4487.5,1.768473,3.033e-07,3.0
4,0,4.0,718727.905,-1345.095,0.997332,4528.0,7.0,1.002213,42.845,29.44,4489.5,1.768473,3.033e-07,3.0


In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import StandardScaler
import torch


# Check for missing values and handle them if necessary
df.fillna(df.mean(), inplace=True)

# Define the features and target
features = ['timestamp_bin', 'A1_Resistance']
target = 'resistance_ratio'

# Extract the feature matrix and target vector
X = df[features]
y = df[target]
groups = df['exp_no']

# Initialize the GroupShuffleSplit object for training/validation split
gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

# Split into training and temporary (validation + testing) sets
train_idx, temp_idx = next(gss.split(X, y, groups=groups))

# Create DataFrames for training and temporary sets
train_df = df.iloc[train_idx]
temp_df = df.iloc[temp_idx]

# Split the temporary set into validation and testing sets
gss_temp = GroupShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
val_idx, test_idx = next(gss_temp.split(temp_df[features], temp_df[target], groups=temp_df['exp_no']))

# Create DataFrames for validation and testing sets
val_df = temp_df.iloc[val_idx]
test_df = temp_df.iloc[test_idx]

# Extract features and targets for each set
X_train = train_df[features]
y_train = train_df[target]
X_val = val_df[features]
y_val = val_df[target]
X_test = test_df[features]
y_test = test_df[target]

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)


In [27]:
import torch.nn as nn
import torch.optim as optim

class LinearNN(nn.Module):
    def __init__(self, input_dim):
        super(LinearNN, self).__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x):
        return self.linear(x)

# Define model, loss function, and optimizer
input_dim = X_train.shape[1]
model = LinearNN(input_dim)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
num_epochs = 200
for epoch in range(num_epochs):
    # Forward pass on training set
    model.train()
    train_outputs = model(X_train_tensor)
    train_loss = criterion(train_outputs, y_train_tensor.view(-1, 1))

    # Backward pass and optimization
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = criterion(val_outputs, y_val_tensor.view(-1, 1))

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')

# Testing the model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = criterion(test_outputs, y_test_tensor.view(-1, 1))
    print(f'Test Loss: {test_loss.item():.4f}')


Epoch [10/300], Train Loss: 1.9148, Validation Loss: 1.9173
Epoch [20/300], Train Loss: 1.2878, Validation Loss: 1.2923
Epoch [30/300], Train Loss: 0.8717, Validation Loss: 0.8776
Epoch [40/300], Train Loss: 0.5955, Validation Loss: 0.6022
Epoch [50/300], Train Loss: 0.4120, Validation Loss: 0.4192
Epoch [60/300], Train Loss: 0.2901, Validation Loss: 0.2974
Epoch [70/300], Train Loss: 0.2090, Validation Loss: 0.2164
Epoch [80/300], Train Loss: 0.1551, Validation Loss: 0.1624
Epoch [90/300], Train Loss: 0.1193, Validation Loss: 0.1264
Epoch [100/300], Train Loss: 0.0954, Validation Loss: 0.1023
Epoch [110/300], Train Loss: 0.0796, Validation Loss: 0.0862
Epoch [120/300], Train Loss: 0.0690, Validation Loss: 0.0755
Epoch [130/300], Train Loss: 0.0620, Validation Loss: 0.0682
Epoch [140/300], Train Loss: 0.0573, Validation Loss: 0.0633
Epoch [150/300], Train Loss: 0.0541, Validation Loss: 0.0601
Epoch [160/300], Train Loss: 0.0521, Validation Loss: 0.0578
Epoch [170/300], Train Loss: 0.05