# Neural Network (PyTorch) - HepG2 Cryoprotectant Optimization

Rede neural simples com PyTorch otimizada para datasets pequenos (regularização + early stopping).

## Importar bibliotecas e configurar constantes

In [1]:
import pandas as pd, numpy as np, torch, torch.nn as nn
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

BASE_DIR = Path('..').resolve()
FEATURES, TARGET = ['% DMSO', 'TREHALOSE'], '% QUEDA DA VIABILIDADE'
device = torch.device('cpu')

In [2]:
def safe_float(x):
    s = str(x).replace('%', '').replace(',', '.').strip()
    return float('nan') if s in ('', 'nan') else float(s)

df = pd.read_csv(BASE_DIR / 'data/raw/hepg2.csv', decimal=',', thousands='.')
for col in FEATURES + [TARGET]:
    df[col] = df[col].apply(safe_float)

df = df.dropna(subset=FEATURES + [TARGET])
df = df[~((df[FEATURES[0]] == 0) & (df[FEATURES[1]] == 0))]
df = df[((df[FEATURES] >= 0).all(axis=1)) & ((df[FEATURES] <= 100).all(axis=1))]

X, y = df[FEATURES].values, df[TARGET].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train_t = torch.FloatTensor(X_train).to(device)
y_train_t = torch.FloatTensor(y_train.reshape(-1, 1)).to(device)
X_test_t = torch.FloatTensor(X_test).to(device)
y_test_t = torch.FloatTensor(y_test.reshape(-1, 1)).to(device)

print(f"Dataset: {len(df)} samples | Train: {len(X_train)} | Test: {len(X_test)}")
print(f"Viability drop: {y.min():.2f}% - {y.max():.2f}% (mean: {y.mean():.2f}%)")

Dataset: 200 samples | Train: 160 | Test: 40
Viability drop: 0.15% - 100.00% (mean: 45.10%)


## Carregar e preparar dados

Leitura do CSV com conversão de valores percentuais e decimais em virgula. Limpeza de valores ausentes, normalização com StandardScaler e conversão para tensores PyTorch.

In [None]:
class SimpleNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(2, 32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(16, 1)
        )
    def forward(self, x):
        return self.net(x)

model = SimpleNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.001)
loss_fn = nn.MSELoss()

best_test_loss = float('inf')
patience, patience_counter = 50, 0

for epoch in range(500):
    model.train()
    y_pred = model(X_train_t)
    loss = loss_fn(y_pred, y_train_t)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        model.eval()
        with torch.no_grad():
            test_loss = loss_fn(model(X_test_t), y_test_t).item()
            if test_loss < best_test_loss:
                best_test_loss = test_loss
                patience_counter = 0
            else:
                patience_counter += 1
            if patience_counter >= patience:
                break

model.eval()
with torch.no_grad():
    y_pred_train = model(X_train_t).cpu().numpy()
    y_pred_test = model(X_test_t).cpu().numpy()

r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)
mae = mean_absolute_error(y_test, y_pred_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))

print("="*50)
print("NEURAL NETWORK PERFORMANCE")
print("="*50)
print(f"R² (Train): {r2_train:.4f} | R² (Test): {r2_test:.4f}")
print(f"MAE: {mae:.4f}% | RMSE: {rmse:.4f}%")
print(f"Epochs trained: {epoch+1} (early stopped at patience={patience_counter})")

In [None]:
conc = np.arange(0, 101, 1)
grid = np.array(np.meshgrid(conc, conc)).reshape(2, -1).T
grid_scaled = scaler.transform(grid)
grid_t = torch.FloatTensor(grid_scaled).to(device)

model.eval()
with torch.no_grad():
    y_pred = model(grid_t).cpu().numpy().flatten()

valid = ~((grid[:, 0] == 0) & (grid[:, 1] == 0))
best_idx = np.argmin(y_pred[valid])
best_global_idx = np.where(valid)[0][best_idx]

best_dmso, best_tre = grid[best_global_idx]
best_viab = 100 - y_pred[best_global_idx]

top_idx = np.argsort(y_pred[valid])[:15]
top_global = np.where(valid)[0][top_idx]

print("\n" + "="*50)
print("OPTIMAL RECOMMENDATION")
print("="*50)
print(f"DMSO: {best_dmso:.0f}% | Trehalose: {best_tre:.0f}%")
print(f"Predicted Viability: {best_viab:.2f}%")
print("\nTOP 15 COMBINATIONS:")
print("-" * 60)
print(f"{'Rank':>4} {'DMSO':>7} {'Trehalose':>11} {'Viability':>12}")
print("-" * 60)
for i, idx in enumerate(top_global, 1):
    d, t = grid[idx]
    v = 100 - y_pred[idx]
    print(f"{i:4d} {d:6.0f}% {t:10.0f}% {v:11.2f}%")

## Gerar predições para grid de combinações

Avalia todas as combinações de concentração (0-100%) em passos de 1%. Identifica as top 15 combinações com melhor predição de viabilidade.

In [None]:
best_obs_idx = df[TARGET].argmin()
best_obs = df.iloc[best_obs_idx]
best_obs_viab = 100 - best_obs[TARGET]

print("\n" + "="*60)
print("COMPARISON: Neural Network vs Dataset Observed")
print("="*60)
print(f"\nDataset Best Observed:")
print(f"   DMSO: {best_obs[FEATURES[0]]:.0f}% | Trehalose: {best_obs[FEATURES[1]]:.0f}%")
print(f"   Viability: {best_obs_viab:.2f}% (actual measurement)")

print(f"\nNeural Network Recommendation:")
print(f"   DMSO: {best_dmso:.0f}% | Trehalose: {best_tre:.0f}%")
print(f"   Predicted Viability: {best_viab:.2f}%")

test_best = torch.FloatTensor(scaler.transform(np.array([[best_obs[FEATURES[0]], best_obs[FEATURES[1]]]]))).to(device)
with torch.no_grad():
    nn_pred_for_best = 100 - model(test_best).cpu().item()
print(f"\n   NN prediction for observed best: {nn_pred_for_best:.2f}%")

## Comparar predição com observações do dataset

Valida se a recomendação do modelo está alinhada com os melhores casos realmente observados nos dados experimentais.