In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pickle
import datetime
import matplotlib.pyplot as plt
import numpy as np
import os
from pathlib import Path
import pandas as pd
import pydicom as dcm
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import DatasetFolder
from torchvision.transforms import Compose, ToTensor, Normalize, RandomAffine, RandomResizedCrop
import pneumonia.preprocess as preproc
from pneumonia.evaluate import evaluate_classifier
from pneumonia.classifier import PneumoniaClassifier, train_pneumonia_classifier

# Take a look at the x-ray images

In [None]:
raw_dir = 'data/stage_2_train_images'
label_path = 'data/stage_2_train_labels.csv'

num_images = 9
labels = pd.read_csv(label_path)
dicom_files = [file for file in os.listdir(raw_dir) if file.endswith('.dcm')]

# Plot the pixel array of the first 9 DICOM images
for i in range(num_images):
    file_path = os.path.join(raw_dir, dicom_files[i])
    patient_id = os.path.splitext(dicom_files[i])[0]
    label = labels[labels['patientId'] == patient_id]['Target'].iloc[0]
    ds = dcm.dcmread(file_path)
    pixel_array = ds.pixel_array
    plt.subplot(3, 3, i+1)
    plt.title(f'Label: {label}')
    plt.imshow(pixel_array, cmap='bone')
    plt.axis('off')

plt.show()


# Parameters

In [None]:
shape = (224, 224)
raw_dir = raw_dir
label_path = label_path
preproc_dir = 'preprocessed'
batch_size = 64
num_workers = 4

# Preprocessing

In [None]:
if not os.path.exists(preproc_dir):
    preproc.preprocess(raw_dir, label_path, preproc_dir, shape)
    
standard_params = preproc.compute_standard_params(preproc_dir, shape)
with open('../models/standard_params.pkl', 'wb') as file:
    pickle.dump(standard_params, file)



# Construct data loader

In [None]:
def load_img(file_name):
    return np.load(file_name).astype(np.float32)

In [None]:
train_transforms = Compose([
    ToTensor(),
    Normalize(*standard_params),
    RandomAffine(degrees=5, translate=(0, 0.05), scale=(0.9, 1.1)),
    RandomResizedCrop(224, scale=(0.35, 1.))
])
val_transforms = Compose([ToTensor(), Normalize(*standard_params)])

In [None]:
train_data = DatasetFolder(os.path.join(preproc_dir, 'train'), 
loader=load_img, extensions='.npy', transform=train_transforms)
val_data = DatasetFolder(os.path.join(preproc_dir, 'val'), loader=load_img, extensions='.npy', transform=val_transforms)

In [None]:
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)


In [None]:
np.unique(train_data.targets, return_counts=True), np.unique(val_data.targets, return_counts=True)

In [None]:
for batch in train_loader:
    for i in range(3):
        rand_idx = np.random.randint(batch[0].shape[0])
        plt.subplot(1, 3, i+1)
        plt.imshow(batch[0][rand_idx, 0, :, :], cmap='bone')
        plt.title(f'label: {batch[1][rand_idx]}')
        plt.axis('off')
    break

# Create and train model

In [None]:
model = train_pneumonia_classifier()

# Save model

In [None]:
model_dir = Path('dev/model')
current_date = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
file_name = f'resnet_{current_date}.pth'

In [None]:
os.makedirs(model_dir, exist_ok=True)
save(pnm_model.state_dict(), model_dir / file_name)


# Load model

In [None]:
model_path = '../models/resnet_2024-02-06.pth'
pnm_model = PneumoniaClassifier()
pnm_model.load_state_dict(torch.load(model_path))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
pnm_model.to(device)

In [None]:
torch.unsqueeze(torch.from_numpy(img), 0)

In [None]:
img = np.array(val_data[0][0][0])
# Pass the image through the trained model to obtain the predicted probabilities
with torch.no_grad():
    img_tensor = torch.unsqueeze(torch.unsqueeze(torch.from_numpy(img), 0), 0).to(device)
    output = pnm_model(img_tensor)

# Extract the predicted probability for the desired class
predicted_probability = torch.sigmoid(output).item()


# Evaluation

In [None]:
train_preds, train_labels, val_preds, val_labels = evaluate_classifier(pnm_model, train_loader, val_loader)

In [None]:

show_random_images(val_data, val_preds, val_labels)

In [None]:
# Load and preprocess the image
image_path = 'path_to_image.jpg'  # Replace with the actual path to your image
image = Image.open(image_path)
preprocessed_image = val_transforms(image)

# Convert the preprocessed image to a tensor
tensor_image = torch.unsqueeze(preprocessed_image, 0)

# Move the tensor to the appropriate device
tensor_image = tensor_image.to(device)

# Pass the tensor through the trained model to obtain the predicted probabilities
with torch.no_grad():
    output = pnm_model(tensor_image)

# Extract the predicted probability for the desired class
predicted_probability = torch.sigmoid(output).item()
