In [None]:
# train.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import boto3, io, pandas as pd
from PIL import Image
from botocore.exceptions import ClientError
import time, matplotlib.pyplot as plt, numpy as np
import os


# --- AWS setup ---
REGION = "us-east-1"
BUCKET = "aws-chest-xray-model-hackathon"
IMAGES_PREFIX = "archive"
LABELS_PATH = f"s3://{BUCKET}/archive/effusion.csv"

s3 = boto3.client("s3", region_name=REGION)

test_key = "archive/00001389_004.png"
try:
    s3.head_object(Bucket=BUCKET, Key=test_key)
    print("‚úÖ File exists and is accessible!")
except Exception as e:
    print("‚ùå Error fetching file:", e)

# --- Load CSV ---
labels_df = pd.read_csv(LABELS_PATH)
labels_df.rename(columns={"Image Index": "image_name", "Label": "label"}, inplace=True)
print(f"Loaded {len(labels_df)} rows from CSV")


# --- Transform ---
img_size = 256
transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# --- Dataset ---
class S3ImageDataset(Dataset):
    def __init__(self, df, bucket, prefix, s3_client, transform=None):
        self.df = df.reset_index(drop=True)
        self.bucket = bucket
        self.prefix = prefix
        self.s3 = s3_client
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_name = str(row["image_name"])
        label = torch.tensor(row["label"], dtype=torch.float32)
        key = f"{self.prefix}/{image_name}"

        try:
            self.s3.head_object(Bucket=self.bucket, Key=key)
        except ClientError:
            return None

        obj = self.s3.get_object(Bucket=self.bucket, Key=key)
        img_bytes = obj["Body"].read()
        img = Image.open(io.BytesIO(img_bytes)).convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img, label

def collate_skip_missing(batch):
    batch = [x for x in batch if x is not None]
    if len(batch) == 0:
        return None
    imgs, labels = zip(*batch)
    return torch.stack(imgs, 0), torch.tensor(labels)

# --- Split ---
train_df, val_df = train_test_split(labels_df, test_size=0.2, stratify=labels_df["label"], random_state=42)

trainloader = DataLoader(
    S3ImageDataset(train_df, BUCKET, IMAGES_PREFIX, s3, transform),
    batch_size=32, shuffle=True, num_workers=2, collate_fn=collate_skip_missing)

valloader = DataLoader(
    S3ImageDataset(val_df, BUCKET, IMAGES_PREFIX, s3, transform),
    batch_size=32, shuffle=False, num_workers=2, collate_fn=collate_skip_missing)

# --- Model ---
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * (img_size // 8) * (img_size // 8), 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

# --- Training ---
print("training")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConvNet().to(device)
criterion = nn.BCEWithLogitsLoss()
print(type(model))
print(isinstance(model, nn.Module))
print(hasattr(model, 'parameters'))
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

num_epochs = 2
loss_history = []
epoch_loss_history = []

for epoch in range(num_epochs):
    model.train()
    running_loss, samples = 0.0, 0
    start = time.time()
    print("Epoch ", {epoch+1})
    for batch in trainloader:
        if batch is None: 
            continue
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.unsqueeze(1).to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        loss_history.append(loss.item())
        running_loss += loss.item() * inputs.size(0)
        samples += inputs.size(0)

    if samples == 0:
        print(f" Epoch {epoch+1}: No valid images were loaded. Skipping this epoch.")
        continue
    
    epoch_loss = running_loss / samples
    epoch_loss_history.append(epoch_loss)
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Time: {time.time()-start:.1f}s")

# Save model + losses

torch.save(model.state_dict(), "model/effusion1_cnn.pth")
np.save("model/loss_history1.npy", np.array(loss_history))
np.save("model/epoch_loss_history1.npy", np.array(epoch_loss_history))

# Plot
plt.figure(figsize=(10,4))
plt.plot(epoch_loss_history)
plt.title("Epoch Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.savefig("model/train_loss_curve.png")
print("Training complete.")


‚úÖ File exists and is accessible!
Loaded 7725 rows from CSV
training
<class '__main__.ConvNet'>
True
True
Epoch  {1}


In [None]:
# test.py
import torch, numpy as np, matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score
from train import ConvNet, S3ImageDataset, collate_skip_missing, transform, s3, BUCKET, IMAGES_PREFIX, labels_df
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

# --- Load trained model ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConvNet().to(device)
model.load_state_dict(torch.load("model/effusion1_cnn.pth", map_location=device))
model.eval()

# --- Prepare test data ---
_, val_df = train_test_split(labels_df, test_size=0.2, stratify=labels_df["label"], random_state=42)
valloader = DataLoader(
    S3ImageDataset(val_df, BUCKET, IMAGES_PREFIX, s3, transform),
    batch_size=32, shuffle=False, num_workers=2, collate_fn=collate_skip_missing
)

# --- Metrics containers ---
criterion = torch.nn.BCEWithLogitsLoss()
val_loss, samples = 0.0, 0
all_labels, all_preds, all_probs = [], [], []

with torch.no_grad():
    for batch in valloader:
        if batch is None: continue
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.unsqueeze(1).to(device)
        logits = model(inputs)
        loss = criterion(logits, labels)
        val_loss += loss.item() * inputs.size(0)
        samples += inputs.size(0)

        probs = torch.sigmoid(logits).squeeze(1).cpu().numpy()
        preds = (probs > 0.5).astype(int)
        labs = labels.squeeze(1).cpu().numpy().astype(int)

        all_probs.extend(probs.tolist())
        all_preds.extend(preds.tolist())
        all_labels.extend(labs.tolist())

# --- Compute metrics ---
val_loss /= samples
all_labels = np.array(all_labels)
all_preds = np.array(all_preds)
val_acc = (all_preds == all_labels).mean()
val_prec = precision_score(all_labels, all_preds, zero_division=0)
val_rec = recall_score(all_labels, all_preds, zero_division=0)
val_f1 = f1_score(all_labels, all_preds, zero_division=0)

print(f"\nValidation Metrics:")
print(f"Loss={val_loss:.4f} | Acc={val_acc:.4f} | Prec={val_prec:.4f} | Rec={val_rec:.4f} | F1={val_f1:.4f}")

# --- Save metrics and plots ---
np.save("model/val_probs1.npy", all_probs)
np.save("model/val_preds1.npy", all_preds)
np.save("model/val_labels1.npy", all_labels)

plt.figure(figsize=(6,4))
plt.hist(all_probs, bins=30, alpha=0.7)
plt.title("Predicted Probabilities")
plt.xlabel("Probability of Effusion")
plt.ylabel("Count")
plt.savefig("model/test_probs_hist.png")

print("Test results + plots saved in model/")


In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import boto3, io, pandas as pd
from PIL import Image
from botocore.exceptions import ClientError
import time, matplotlib.pyplot as plt, numpy as np
import os

# --- AWS setup ---
REGION = "us-east-1"
BUCKET = "aws-chest-xray-model-hackathon"
IMAGES_PREFIX = "archive"
LABELS_PATH = f"s3://{BUCKET}/archive/effusion.csv"

s3 = boto3.client("s3", region_name=REGION)

# --- Load CSV ---
labels_df = pd.read_csv(LABELS_PATH)
labels_df.rename(columns={"Image Index": "image_name", "Label": "label"}, inplace=True)
print(f"‚úÖ Loaded {len(labels_df)} rows from CSV")

# --- Transform ---
img_size = 256
transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# --- Dataset ---
class S3ImageDataset(Dataset):
    def __init__(self, df, bucket, prefix, s3_client, transform=None):
        self.df = df.reset_index(drop=True)
        self.bucket = bucket
        self.prefix = prefix
        self.s3 = s3_client
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_name = str(row["image_name"])
        label = torch.tensor(row["label"], dtype=torch.float32)
        key = f"{self.prefix}/{image_name}"

        try:
            obj = self.s3.get_object(Bucket=self.bucket, Key=key)
        except ClientError:
            return None

        img_bytes = obj["Body"].read()
        img = Image.open(io.BytesIO(img_bytes)).convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img, label

def collate_skip_missing(batch):
    batch = [x for x in batch if x is not None]
    if len(batch) == 0:
        return None
    imgs, labels = zip(*batch)
    return torch.stack(imgs, 0), torch.tensor(labels)

# --- Split dataset ---
train_df, val_df = train_test_split(labels_df, test_size=0.2, stratify=labels_df["label"], random_state=42)

trainloader = DataLoader(
    S3ImageDataset(train_df, BUCKET, IMAGES_PREFIX, s3, transform),
    batch_size=16, shuffle=True, num_workers=2, collate_fn=collate_skip_missing)

valloader = DataLoader(
    S3ImageDataset(val_df, BUCKET, IMAGES_PREFIX, s3, transform),
    batch_size=16, shuffle=False, num_workers=2, collate_fn=collate_skip_missing)

# --- Model ---
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * (img_size // 8) * (img_size // 8), 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

# --- Train ---
print("üöÄ Starting training...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConvNet().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 2  # (Keep small for now)
loss_history = []

os.makedirs("model", exist_ok=True)

for epoch in range(num_epochs):
    model.train()
    running_loss, samples = 0.0, 0
    start = time.time()
    for batch in trainloader:
        if batch is None: 
            continue
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.unsqueeze(1).to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        samples += inputs.size(0)

    epoch_loss = running_loss / samples
    loss_history.append(epoch_loss)
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Time: {time.time()-start:.1f}s")

# Save model weights
torch.save(model.state_dict(), "model/effusion1_cnn.pth")
print("‚úÖ Model weights saved to model/effusion1_cnn.pth")


‚úÖ Loaded 7725 rows from CSV
üöÄ Starting training...
Epoch 1/2 - Loss: 0.6948 - Time: 49.0s
Epoch 2/2 - Loss: 0.6849 - Time: 47.4s
‚úÖ Model weights saved to model/effusion1_cnn.pth


In [28]:
!ls model/

effusion1_cnn.pth


In [29]:
%%writefile inference.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import io, json

# --- Model Definition (must match your training one) ---
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * (256 // 8) * (256 // 8), 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)


# --- Required by SageMaker ---
def model_fn(model_dir):
    """Load the model for inference"""
    model = ConvNet()
    model.load_state_dict(torch.load(f"{model_dir}/effusion1_cnn.pth", map_location="cpu"))
    model.eval()
    return model


def input_fn(request_body, content_type):
    """Convert the input image bytes into a tensor"""
    if content_type == "application/x-image":
        image = Image.open(io.BytesIO(request_body)).convert("RGB")
        transform = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        return transform(image).unsqueeze(0)
    else:
        raise ValueError(f"Unsupported content type: {content_type}")


def predict_fn(input_data, model):
    """Run the model and return the prediction"""
    with torch.no_grad():
        output = model(input_data)
        prob = torch.sigmoid(output).item()
        prediction = "Yes" if prob > 0.5 else "No"
        return {"prediction": prediction, "confidence": prob}


def output_fn(prediction_output, accept):
    """Return the result as JSON"""
    return json.dumps(prediction_output)

Overwriting inference.py


In [30]:
!ls

inference.py  model  model.tar.gz  outputs  test_image.png  xraymodel.ipynb


In [31]:
import tarfile

with tarfile.open("model.tar.gz", "w:gz") as tar:
    tar.add("inference.py")
    tar.add("model/effusion1_cnn.pth", arcname="effusion1_cnn.pth")

print("‚úÖ model.tar.gz created successfully!")
!ls -lh model.tar.gz

‚úÖ model.tar.gz created successfully!
-rw-r--r-- 1 sagemaker-user users 30M Oct 26 15:40 model.tar.gz


In [32]:
import boto3

region = "us-east-1"
bucket = "aws-chest-xray-model-hackathon"
key = "models/model.tar.gz"

s3 = boto3.client("s3", region_name=region)
s3.upload_file("model.tar.gz", bucket, key)

print(f"‚úÖ Uploaded to s3://{bucket}/{key}")


‚úÖ Uploaded to s3://aws-chest-xray-model-hackathon/models/model.tar.gz


In [33]:
import boto3
import sagemaker
import time

# ---- Configuration ----
region = "us-east-1"
bucket = "aws-chest-xray-model-hackathon"
model_artifact = f"s3://{bucket}/models/model.tar.gz"

model_name = "xray-effusion-model"
endpoint_config_name = "xray-effusion-config"
endpoint_name = "xray-effusion-endpoint"

# SageMaker execution role (check this in your AWS console under SageMaker -> Roles)
role = sagemaker.get_execution_role()

print("‚úÖ Region:", region)
print("‚úÖ Model artifact:", model_artifact)
print("‚úÖ Role:", role)


‚úÖ Region: us-east-1
‚úÖ Model artifact: s3://aws-chest-xray-model-hackathon/models/model.tar.gz
‚úÖ Role: arn:aws:iam::902917435242:role/bedrock-workshop-studio-v2-SageMakerExecutionRole-r8PQyktIIzy5


In [37]:
import boto3
from time import sleep

region = "us-east-1"
model_name = "effusion-cnn-model"
role_arn = "arn:aws:iam::902917435242:role/bedrock-workshop-studio-v2-SageMakerExecutionRole-r8PQyktIIzy5"
model_data = "s3://aws-chest-xray-model-hackathon/models/model.tar.gz"

sagemaker = boto3.client("sagemaker", region_name=region)

# ‚úÖ Use the correct PyTorch inference image
container = {
    "Image": "763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-inference:2.0.0-cpu-py310-ubuntu20.04-sagemaker",
    "ModelDataUrl": model_data,
}

response = sagemaker.create_model(
    ModelName=model_name,
    ExecutionRoleArn=role_arn,
    PrimaryContainer=container,
)

print(f"‚úÖ Model created: {model_name}")

‚úÖ Model created: effusion-cnn-model


In [40]:
import boto3

sagemaker = boto3.client("sagemaker", region_name="us-east-1")

response = sagemaker.create_endpoint_config(
    EndpointConfigName="effusion-cnn-config",
    ProductionVariants=[
        {
            "VariantName": "AllTraffic",
            "ModelName": "effusion-cnn-model",
            "InitialInstanceCount": 1,
            "InstanceType": "ml.m5.large",  # good default for CPU inference
        }
    ],
)

print("‚úÖ Endpoint config created: effusion-cnn-config")

‚úÖ Endpoint config created: effusion-cnn-config


In [43]:
endpoint_name = "effusion-cnn-endpoint"

response = sagemaker.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name,
)

print(f"üöÄ Creating endpoint: {endpoint_name} ... this may take ~10 minutes")

# Wait for deployment to finish
waiter = sagemaker.get_waiter("endpoint_in_service")
waiter.wait(EndpointName=endpoint_name)

print(f"‚úÖ Endpoint is live and ready: {endpoint_name}")


üöÄ Creating endpoint: effusion-cnn-endpoint ... this may take ~10 minutes
‚úÖ Endpoint is live and ready: effusion-cnn-endpoint


In [44]:
import boto3

# S3 setup
bucket = "aws-chest-xray-model-hackathon"
key = "archive/00001338_003.png"  # replace with any image key that exists in your bucket

s3 = boto3.client("s3")
s3.download_file(bucket, key, "test_image.png")

print("‚úÖ Image downloaded from S3 ‚Üí test_image.png")

‚úÖ Image downloaded from S3 ‚Üí test_image.png


In [45]:
import boto3, json

runtime = boto3.client("sagemaker-runtime", region_name="us-east-1")
endpoint_name = "effusion-cnn-endpoint"

# ‚ö†Ô∏è replace this with the actual path to a test image in your Jupyter environment
test_image_path = "test_image.png"

with open(test_image_path, "rb") as f:
    payload = f.read()

response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/x-image",
    Body=payload,
)

result = json.loads(response["Body"].read().decode())
print("‚úÖ Prediction result:")
print(json.dumps(result, indent=2))


‚úÖ Prediction result:
{
  "prediction": "Yes",
  "confidence": 0.5406027436256409
}
