## Imports

In [None]:
import pandas as pd
import torch
import os
import json
import numpy as np

## File Paths

In [None]:
input_csv = '../data/balanced_data/normalized_metadata_inference.csv'
model_path = 'saved_models/best_model.pth'
output_csv = 'inference_results.csv'
normalization_params_path = 'saved_models/normalization_params.json'

## Data Processing

In [None]:
columns_to_drop = [
    "patient_id", "lesion_id", "iddx_full", "iddx_1", "iddx_2", "iddx_3", "iddx_4", "iddx_5",
    "mel_mitotic_index", "mel_thick_mm", "tbp_lv_dnn_lesion_confidence", "attribution", "copyright_license",
    "image_type", "anatom_site_general", "tbp_tile_type", "tbp_lv_location"
]

# Load the inference CSV
data = pd.read_csv(input_csv)

# Step 1: Drop unnecessary columns
data = data.drop(columns=columns_to_drop)

# Step 2: Drop rows with missing values
data.dropna(inplace=True)

# Step 3: Convert 'sex' to binary
data['sex'] = data['sex'].map({'male': 1, 'female': 0})

# Step 4: One-hot encode 'tbp_lv_location_simple'
data = pd.get_dummies(data, columns=['tbp_lv_location_simple'], prefix='', prefix_sep='', dtype=int)

# Step 5: Rename one-hot encoded columns to match training format
data.rename(
    columns={col: col.lower().replace(' ', '_') for col in data.columns if col.startswith(('Torso', 'Left', 'Right', 'Head', 'Unknown'))},
    inplace=True
)

# Step 6: Convert True/False to 1/0
data = data.applymap(lambda x: 1 if x is True else (0 if x is False else x))

# Separate ISIC IDs
isic_ids = data['isic_id']

# Step 7: Normalize non-binary features
with open(normalization_params_path, 'r') as f:
    normalization_params = json.load(f)

binary_columns = ['sex'] + [col for col in data.columns if col not in normalization_params and col != 'isic_id']
features = data.drop(columns=['isic_id']).copy()

for col, params in normalization_params.items():
    col_min = params['min']
    col_max = params['max']
    features[col] = (features[col] - col_min) / (col_max - col_min)

# Convert to tensor
features_tensor = torch.tensor(features.values.astype(np.float32))

## Model Loading

In [None]:
class FeedforwardNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedforwardNN, self).__init__()
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(hidden_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        return self.fc(x)

In [None]:
# Load the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = features_tensor.shape[1]
model = FeedforwardNN(input_dim=input_dim, hidden_dim=128, output_dim=2)
model.load_state_dict(torch.load(model_path, map_location=device)['model_state_dict'])
model.to(device)
model.eval()

## Inference

In [None]:

with torch.no_grad():
    outputs = model(features_tensor.to(device))
    probabilities = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()  


results = pd.DataFrame({'isic_id': isic_ids, 'target': probabilities})
results.to_csv(output_csv, index=False)

print(f"Inference results saved to {output_csv}")