In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from torch.utils.data import random_split
from torch.amp import autocast, GradScaler

In [2]:
print(torch.__version__)              # 2.5.1+cu118
print(torch.cuda.is_available())      # True
print(torch.cuda.get_device_name(0))  # Your GPU name

2.5.1+cu118
True
NVIDIA GeForce RTX 3060 Laptop GPU


# Data Prep

In [3]:
file_paths = {
    "GreenhouseClimate": "D:/Bitchass Agri stupid af shjt/Reference/GreenhouseClimate.csv",
    "GrodanSens": "D:/Bitchass Agri stupid af shjt/Reference/GrodanSens.csv",
    "Resources": "D:/Bitchass Agri stupid af shjt/Reference/Resources.csv",
    "Weather": "D:/Bitchass Agri stupid af shjt/Weather/Weather.csv",
    "CropParameters": "D:/Bitchass Agri stupid af shjt/Reference/CropParameters.csv"
}

In [4]:
dfs = {}
for name, path in file_paths.items():
    df = pd.read_csv(path, low_memory=False)
    df.columns = df.columns.str.strip().str.replace('\t', '')
    if '%time' in df.columns or '%Time' in df.columns:
            time_col = '%time' if '%time' in df.columns else '%Time'
            df[time_col] = pd.to_numeric(df[time_col], errors='coerce')
            df['Date'] = pd.to_datetime(df[time_col], origin='1899-12-30', unit='D')
    elif 'time' in df.columns:
        df['time'] = pd.to_numeric(df['time'], errors='coerce')
        df['Date'] = pd.to_datetime(df['time'], origin='1899-12-30', unit='D')
    elif 'Date' in df.columns:
        if pd.api.types.is_numeric_dtype(df['Date']):
            df['Date'] = pd.to_datetime(df['Date'], origin='1899-12-30', unit='D', errors='coerce')
        else:
            df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    else:
        df['Date'] = pd.NaT
    df.loc[:, df.columns != 'Date'] = df.loc[:, df.columns != 'Date'].apply(pd.to_numeric, errors='coerce')
    dfs[name] = df


In [5]:
print(dfs['Resources']['Date'])

0     2019-12-16
1     2019-12-17
2     2019-12-18
3     2019-12-19
4     2019-12-20
         ...    
161   2020-05-25
162   2020-05-26
163   2020-05-27
164   2020-05-28
165   2020-05-29
Name: Date, Length: 166, dtype: datetime64[ns]


In [6]:
soil_cols = ['EC_slab1', 'EC_slab2', 'WC_slab1', 'WC_slab2', 't_slab1', 't_slab2']
indoor_cols = ['Tair', 'Rhair', 'CO2air', 'HumDef', 'PipeLow', 'VentLee', 'Ventwind', 'Tot_PAR', 'Tot_PAR_Lamps', 'EC_drain_PC']
weather_cols = ['Tout', 'Rhout', 'Iglob', 'PARout', 'Pyrgeo', 'Rain', 'Winddir', 'Windsp']
crop_cols = ['Stem_elong', 'Stem_thick', 'Cum_trusses', 'stem_dens', 'plant_dens']

In [7]:
def reshape_sliding(df, cols, steps, stride=1):
    df = df.copy()
    df[cols] = df[cols].astype(np.float32)
    arr = df[cols].values
    if len(arr) < steps:
        return np.empty((0, steps, len(cols)))
    windows = [arr[i:i + steps] for i in range(0, len(arr) - steps + 1, stride)]
    return np.stack(windows)

In [8]:
def compute_delta(mask):
    B, T, D = mask.shape
    delta = np.zeros((B, T, D), dtype=np.float32)
    for b in range(B):
        for d in range(D):
            last_obs = 0
            for t in range(T):
                if mask[b, t, d] == 1:
                    delta[b, t, d] = 0
                    last_obs = 0
                else:
                    last_obs += 1
                    delta[b, t, d] = last_obs
    return delta

In [9]:
base = dfs['GrodanSens'].copy()
base = base.dropna(subset=['Date']).sort_values('Date').reset_index(drop=True)
merge_partners = ['GreenhouseClimate', 'Weather', 'CropParameters']
for name in merge_partners:
    df = dfs[name].copy().dropna(subset=['Date']).sort_values('Date')
    if '%time' in df.columns:
        df.drop(columns=['%time'], inplace=True)
    try:
        base = pd.merge_asof(base, df, on='Date', direction='nearest', tolerance=pd.Timedelta('1D'))
    except ValueError as e:
        print(f"[ERROR] Skipped {name} during merge: {e}")
resources_df = dfs['Resources'].copy().dropna(subset=['Date']).sort_values('Date').reset_index(drop=True)
resources_df['Energy'] = resources_df[['Heat_cons', 'ElecHigh', 'ElecLow']].astype(np.float32).sum(axis=1)
resources_df = resources_df[['Date', 'Energy']]
resources_df['Date'] = resources_df['Date'].dt.floor('D')
base['Date'] = base['Date'].dt.floor('D')
base = pd.merge_asof(base.sort_values('Date'), resources_df, on='Date', direction='nearest', tolerance=pd.Timedelta('1D'))
base = base.dropna(subset=['Energy']).reset_index(drop=True)

In [10]:
steps = 288  # full day (5 min x 288 = 24 hours)
# --- Sliding window time-series ---
soil_data = reshape_sliding(base, soil_cols, steps=steps, stride=1)
soil_mask = (~np.isnan(soil_data)).astype(np.float32)
soil_delta = compute_delta(soil_mask)
soil_data = np.nan_to_num(soil_data)
indoor_data = reshape_sliding(base, indoor_cols, steps=steps, stride=1)
weather_data = reshape_sliding(base, weather_cols, steps=steps, stride=1)
# --- Crop is static per day, align with sliding windows ---
crop_data_raw = base[crop_cols].astype(np.float32).to_numpy()
crop_data = crop_data_raw[steps - 1:]  # align with window end points

In [11]:
window_end_dates = base['Date'].iloc[steps - 1:].reset_index(drop=True)
targets = []
valid_indices = []
for i in range(len(soil_data)):
    end_date = window_end_dates[i].floor('D')
    match = resources_df[resources_df['Date'] == (end_date + pd.Timedelta(days=1))]
    if not match.empty:
        energy_val = match['Energy'].values[0]
        if not np.isnan(energy_val):
            targets.append(energy_val.astype(np.float32))
            valid_indices.append(i)

In [12]:
print(f"Matched {len(targets)} out of {len(soil_data)} total windows")

Matched 47233 out of 47522 total windows


In [13]:
if valid_indices:
    soil_data = soil_data[valid_indices]
    soil_mask = soil_mask[valid_indices]
    soil_delta = soil_delta[valid_indices]
    indoor_data = indoor_data[valid_indices]
    weather_data = weather_data[valid_indices]
    crop_data = crop_data[valid_indices]
    targets = torch.tensor(np.array(targets), dtype=torch.float32).unsqueeze(1)
    dataset = TensorDataset(
        torch.tensor(soil_data, dtype=torch.float32),
        torch.tensor(soil_mask, dtype=torch.float32),
        torch.tensor(soil_delta, dtype=torch.float32),
        torch.tensor(indoor_data, dtype=torch.float32),
        torch.tensor(weather_data, dtype=torch.float32),
        torch.tensor(crop_data, dtype=torch.float32),
        targets
    )
    dataloader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)
    print(f"Dataloader ready: {len(dataset)} labeled windows with next-day energy targets")
else:
    print("No labeled windows found for energy prediction.")

Dataloader ready: 47233 labeled windows with next-day energy targets


In [14]:
sample_batch = next(iter(dataloader))
soil_x, soil_mask, soil_delta, indoor_x, weather_x, crop_x, target_y = sample_batch

In [15]:
# Check your full training target tensor
print("Any NaN in y tensor:", torch.isnan(target_y).any())

Any NaN in y tensor: tensor(False)


In [16]:
# Check shapes
print(f"Soil shape:        {soil_x.shape}")
print(f"Soil mask shape:   {soil_mask.shape}")
print(f"Soil delta shape:  {soil_delta.shape}")
print(f"Indoor shape:      {indoor_x.shape}")
print(f"Weather shape:     {weather_x.shape}")
print(f"Crop shape:        {crop_x.shape}")
print(f"Target shape:      {target_y.shape}")

Soil shape:        torch.Size([64, 288, 6])
Soil mask shape:   torch.Size([64, 288, 6])
Soil delta shape:  torch.Size([64, 288, 6])
Indoor shape:      torch.Size([64, 288, 10])
Weather shape:     torch.Size([64, 288, 8])
Crop shape:        torch.Size([64, 5])
Target shape:      torch.Size([64, 1])


In [17]:
# Lengths
total_len = len(dataset)
train_len = int(0.8 * total_len)
val_len = int(0.1 * total_len)
test_len = total_len - train_len - val_len  # handle rounding
# Split dataset
train, val, test = random_split(dataset, [train_len, val_len, test_len])
# DataLoaders
batch_size = 64  # or 64
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
print(f"Split sizes — Train: {len(train)}, Val: {len(val)}, Test: {len(test)}")

Split sizes — Train: 37786, Val: 4723, Test: 4724


# Model

In [18]:
class FeatureNorm(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
    def forward(self, x):
        return self.norm(x)

In [19]:
class GRUD(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=None, device="cpu"):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.device = device
        self.input_embed = nn.Linear(input_size, input_size)
        self.gamma_x = nn.Parameter(torch.ones(input_size) * 0.1)
        self.gamma_h = nn.Parameter(torch.ones(hidden_size) * 0.1)
        self.z_gate = nn.Linear(input_size * 3 + hidden_size, hidden_size)
        self.r_gate = nn.Linear(input_size * 3 + hidden_size, hidden_size)
        self.h_tilde = nn.Linear(input_size * 3 + hidden_size, hidden_size)
        self.output = nn.Sequential(
            nn.LayerNorm(hidden_size),
            nn.Dropout(0.2),
            nn.Linear(hidden_size, output_size) if output_size else nn.Identity()
        )
    def forward(self, x, x_mask, x_delta, x_mean=None):
        B, T, D = x.shape
        x = self.input_embed(x)
        x = torch.nan_to_num(x, nan=0.0, posinf=0.0, neginf=0.0)
        if x_mean is None:
            x_mean = torch.mean(x, dim=1, keepdim=True).detach()
        x_mean = torch.nan_to_num(x_mean, nan=0.0, posinf=0.0, neginf=0.0)
        h = torch.zeros(B, self.hidden_size, device=self.device)
        outputs = []
        gamma_h = torch.exp(-F.relu(self.gamma_h)).unsqueeze(0).expand(B, -1)
        gamma_h = torch.nan_to_num(gamma_h, nan=0.0, posinf=0.0, neginf=0.0)
        for t in range(T):
            x_t = x[:, t, :]
            m_t = x_mask[:, t, :]
            d_t = x_delta[:, t, :]
            d_t = torch.clamp(d_t, 0.0, 100.0)
            d_t = torch.nan_to_num(d_t, nan=0.0, posinf=100.0, neginf=0.0)
            gamma_x = torch.exp(-F.relu(self.gamma_x) * d_t)
            gamma_x = torch.nan_to_num(gamma_x, nan=1.0, posinf=1.0, neginf=0.0)
            x_t_hat = m_t * x_t + (1 - m_t) * (gamma_x * x_mean.squeeze(1))
            x_t_hat = torch.nan_to_num(x_t_hat, nan=0.0, posinf=0.0, neginf=0.0)
            h = gamma_h * h
            h = torch.nan_to_num(h, nan=0.0, posinf=0.0, neginf=0.0)
            inputs = torch.cat([x_t_hat, m_t, d_t, h], dim=1)
            inputs = torch.nan_to_num(inputs, nan=0.0, posinf=0.0, neginf=0.0)
            z = torch.sigmoid(self.z_gate(inputs))
            r = torch.sigmoid(self.r_gate(inputs))
            h_tilde = torch.tanh(self.h_tilde(torch.cat([x_t_hat, m_t, d_t, r * h], dim=1)))
            h = (1 - z) * h + z * h_tilde
            h = torch.nan_to_num(h, nan=0.0, posinf=0.0, neginf=0.0)
            outputs.append(h.unsqueeze(1))
        outputs = torch.cat(outputs, dim=1)
        pooled = outputs[:, -1, :]
        pooled = torch.nan_to_num(pooled, nan=0.0, posinf=0.0, neginf=0.0)
        return self.output(pooled)

In [20]:
class MLPEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, output_dim)
        )
    def forward(self, x):
        x = torch.nan_to_num(x, nan=0.0, posinf=0.0, neginf=0.0)
        return self.net(x)

In [21]:
class SimpleGRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, batch_first=True)
        self.out = nn.Sequential(
            nn.LayerNorm(hidden_dim),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, output_dim)
        )
    def forward(self, x):
        x = torch.nan_to_num(x, nan=0.0, posinf=0.0, neginf=0.0)
        _, h = self.gru(x)
        h = h[-1]
        h = torch.nan_to_num(h, nan=0.0, posinf=0.0, neginf=0.0)
        return self.out(h)

In [22]:
class AttentionFusion(nn.Module):
    def __init__(self, input_dims, fusion_dim):
        super().__init__()
        self.attn = nn.Sequential(
            nn.Linear(sum(input_dims), fusion_dim),
            nn.Tanh(),
            nn.Linear(fusion_dim, len(input_dims)),
            nn.Softmax(dim=1)
        )
    def forward(self, features):
        features = [torch.nan_to_num(f, nan=0.0, posinf=0.0, neginf=0.0) for f in features]
        all_feat = torch.cat(features, dim=1)
        all_feat = torch.nan_to_num(all_feat, nan=0.0, posinf=0.0, neginf=0.0)
        weights = self.attn(all_feat)  # [B, num_modalities]
        weighted = [f * weights[:, i:i+1] for i, f in enumerate(features)]
        return torch.cat(weighted, dim=1)

In [23]:
class HybridAgriModel(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.soil_encoder = GRUD(config['soil_in'], config['soil_hidden'], config['branch_out'], device=config['device'])
        self.env_encoder = SimpleGRU(config['indoor_in'] + config['weather_in'], 32, config['branch_out'])
        self.crop_encoder = MLPEncoder(config['crop_in'], 16, config['branch_out'])
        self.fusion = AttentionFusion([config['branch_out']] * 3, config['fusion_dim'])
        self.head = nn.Sequential(
            nn.LayerNorm(config['branch_out'] * 3),
            nn.Dropout(0.3),
            nn.Linear(config['branch_out'] * 3, 64),
            nn.ReLU(),
            nn.Linear(64, config['output_dim']),
            nn.Softplus()
        )
    def forward(self, soil, mask, delta, indoor, weather, crop):
        f1 = self.soil_encoder(soil, mask, delta)
        f2 = self.env_encoder(torch.cat([indoor, weather], dim=2))
        f3 = self.crop_encoder(crop)
        f1 = torch.nan_to_num(f1, nan=0.0, posinf=0.0, neginf=0.0)
        f2 = torch.nan_to_num(f2, nan=0.0, posinf=0.0, neginf=0.0)
        f3 = torch.nan_to_num(f3, nan=0.0, posinf=0.0, neginf=0.0)
        fused = self.fusion([f1, f2, f3])
        fused = torch.nan_to_num(fused, nan=0.0, posinf=0.0, neginf=0.0)
        return self.head(fused)

In [24]:
config_virtual = {
    'soil_in': 6,
    'soil_hidden': 64,
    'indoor_in': 10,
    'weather_in': 8,
    'crop_in': 5,
    'branch_out': 64,
    'fusion_dim': 128,
    'output_dim': 1,
    'device': 'cpu'
}
# === Dummy inputs that reflect your DataLoader ===
B = 8
sample_inputs = (
    torch.rand(B, 20, config_virtual['soil_in']),       # soil_x
    torch.ones(B, 20, config_virtual['soil_in']),       # soil_mask
    torch.zeros(B, 20, config_virtual['soil_in']),      # soil_delta
    torch.rand(B, 20, config_virtual['indoor_in']),     # indoor_x
    torch.rand(B, 20, config_virtual['weather_in']),    # weather_x
    torch.rand(B, config_virtual['crop_in'])            # crop_x
)

In [25]:
def validate_model_structure(model, sample_inputs):
    print("Model Architecture Validation:\n")
    logs = []
    def safe_shape(x):
        if isinstance(x, torch.Tensor):
            return list(x.shape)
        elif isinstance(x, (list, tuple)) and isinstance(x[0], torch.Tensor):
            return [list(t.shape) for t in x]
        return str(type(x))
    def hook_fn(module, input, output):
        logs.append({
            "layer": module.__class__.__name__,
            "input_shape": safe_shape(input),
            "output_shape": safe_shape(output)
        })
    hooks = [m.register_forward_hook(hook_fn) for m in model.modules()
             if not isinstance(m, (nn.Sequential, nn.ModuleList)) and m != model]
    model.eval()
    with torch.no_grad():
        model(*sample_inputs)
    for i, log in enumerate(logs):
        print(f"{i:02d} - {log['layer']:20} | Input: {log['input_shape']} -> Output: {log['output_shape']}")
    for h in hooks:
        h.remove()

In [26]:
model = HybridAgriModel(config_virtual)
validate_model_structure(model, sample_inputs)

Model Architecture Validation:

00 - Linear               | Input: [[8, 20, 6]] -> Output: [8, 20, 6]
01 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
02 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
03 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
04 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
05 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
06 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
07 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
08 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
09 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
10 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
11 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
12 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
13 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
14 - Linear               | Input: [[8, 82]] -> Output: [8, 64]
15

# Train this bitchass

In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

config_real = {
    'soil_in': 6,
    'soil_hidden': 64,
    'indoor_in': 10,
    'weather_in': 8,
    'crop_in': 5,
    'branch_out': 64,
    'fusion_dim': 128,
    'output_dim': 1,
    'device': device
}

Using device: cuda


In [28]:
def compute_metrics(preds, targets):
    preds = preds.flatten()
    targets = targets.flatten()
    rmse = np.sqrt(mean_squared_error(targets, preds))
    r2 = r2_score(targets, preds)
    return rmse, r2

In [32]:
def train_and_validate(model, train_loader, val_loader, optimizer, criterion, device, epochs=20):
    scaler = GradScaler(device='cuda')
    for epoch in range(1, epochs + 1):
        model.train()
        train_preds, train_targets = [], []
        train_loss = 0.0
        for batch in train_loader:
            soil_x, soil_mask, soil_delta, indoor_x, weather_x, crop_x, target_y = [b.to(device) for b in batch]
            optimizer.zero_grad()
            with autocast(device_type='cuda'):
                outputs = model(soil_x, soil_mask, soil_delta, indoor_x, weather_x, crop_x)
                loss = criterion(outputs, target_y)
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)  # AMP-safe unscale
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            train_loss += loss.item()
            train_preds.append(outputs.detach().cpu().numpy())
            train_targets.append(target_y.cpu().numpy())
        # Metrics
        train_preds = np.concatenate(train_preds)
        train_targets = np.concatenate(train_targets)
        train_rmse, train_r2 = compute_metrics(train_preds, train_targets)
        # Validation
        model.eval()
        val_preds, val_targets = [], []
        with torch.no_grad():
            for batch in val_loader:
                soil_x, soil_mask, soil_delta, indoor_x, weather_x, crop_x, target_y = [b.to(device) for b in batch]
                with autocast(device_type='cuda'):
                    outputs = model(soil_x, soil_mask, soil_delta, indoor_x, weather_x, crop_x)
                val_preds.append(outputs.cpu().numpy())
                val_targets.append(target_y.cpu().numpy())
        val_preds = np.concatenate(val_preds)
        val_targets = np.concatenate(val_targets)
        val_rmse, val_r2 = compute_metrics(val_preds, val_targets)
        print(f"[Epoch {epoch:03}] Train RMSE: {train_rmse:.4f}, R²: {train_r2:.4f} | Val RMSE: {val_rmse:.4f}, R²: {val_r2:.4f}")

In [33]:
def evaluate(model, test_loader, device):
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for batch in test_loader:
            soil_x, soil_mask, soil_delta, indoor_x, weather_x, crop_x, target_y = [b.to(device) for b in batch]
            outputs = model(soil_x, soil_mask, soil_delta, indoor_x, weather_x, crop_x)
            preds.append(outputs.cpu().numpy())
            targets.append(target_y.cpu().numpy())
    preds = np.concatenate(preds)
    targets = np.concatenate(targets)
    rmse, r2 = compute_metrics(preds, targets)
    print(f"Test RMSE: {rmse:.4f}, R²: {r2:.4f}")
    return rmse, r2

In [None]:
model = HybridAgriModel(config_real).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.MSELoss()
train_and_validate(model, train_loader, val_loader, optimizer, criterion, device, epochs=20)

In [None]:
evaluate(model, test_loader, device)