In [1]:
import os
from PIL import Image
import pandas as pd
import torch
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

#pid folder
main_folder = '../../../data/train'

# Define the transformation to convert images to tensors
transform = transforms.Compose([
    transforms.Resize((60, 60)),  # Resize the image if necessary
    transforms.ToTensor()           # Convert images to PyTorch tensors
])

# Function to load an image and convert it to a tensor
def load_image(image_path):
    with Image.open(image_path) as img:
        return transform(img)

# List to hold all the tensors
image_tensors = []

# Navigate through the directory structure
for pid in os.listdir(main_folder):
    pid_path = os.path.join(main_folder, pid)
    if os.path.isdir(pid_path):
        for study in os.listdir(pid_path):
            study_path = os.path.join(pid_path, study)
            if os.path.isdir(study_path):
                # Specify the filename you are looking for
                for view in os.listdir(study_path):
                    target_image_path = os.path.join(study_path, view)
                    if os.path.exists(target_image_path):
                        # Load the image and convert to tensor
                        tensor = load_image(target_image_path)
                        image_tensors.append(tensor)

print(f"Loaded {len(image_tensors)} images.")

  warn(


Loaded 12 images.


In [2]:
df = pd.read_csv('../../../data/student_labels/train2023.csv')
outcomes = ['No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Pneumonia', 'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices']
dfs = {}
for col in outcomes:
    temp_df = df[['Path', col]].copy()
    temp_df = temp_df.dropna(subset = [col])
    tensors = []
    for path in list(temp_df['Path']):
        tensors.append(load_image(os.path.join('../../../data', path)))
    dfs[col] = (tensors, list(temp_df[col]))

print(dfs)

{'No Finding': ([tensor([[[0.2353, 0.3412, 0.3451,  ..., 0.0000, 0.0000, 0.0000],
         [0.2431, 0.3373, 0.3490,  ..., 0.0078, 0.0078, 0.0039],
         [0.3059, 0.3647, 0.3412,  ..., 0.0196, 0.0157, 0.0118],
         ...,
         [0.4431, 0.5529, 0.6392,  ..., 0.9176, 0.9098, 0.8980],
         [0.5255, 0.6353, 0.7020,  ..., 0.9333, 0.9216, 0.9176],
         [0.6627, 0.7255, 0.7765,  ..., 0.9333, 0.9255, 0.9255]]]), tensor([[[0.0667, 0.0588, 0.0510,  ..., 0.0314, 0.0353, 0.1882],
         [0.0510, 0.0471, 0.0431,  ..., 0.0235, 0.0314, 0.1765],
         [0.0510, 0.0431, 0.0314,  ..., 0.0471, 0.0314, 0.1608],
         ...,
         [0.0745, 0.0824, 0.0784,  ..., 0.0941, 0.1098, 0.1255],
         [0.1137, 0.1137, 0.0980,  ..., 0.1294, 0.1373, 0.1529],
         [0.2039, 0.1961, 0.1804,  ..., 0.2588, 0.2706, 0.2941]]]), tensor([[[0.0902, 0.2627, 0.3647,  ..., 0.4078, 0.3373, 0.2392],
         [0.0863, 0.2549, 0.3765,  ..., 0.4196, 0.3490, 0.2549],
         [0.0863, 0.2431, 0.3843,  ...,

In [4]:
test_df = pd.read_csv('../../../data/student_labels/test_ids.csv')
test_outcomes = ['No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Pneumonia', 'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices']
test_dfs = []

for path in list(test_df['Path']):
    print(path)
    test_dfs.append(load_image(os.path.join('../../../data', path)))

test/pid56785/study1/view1_frontal.jpg
test/pid56785/study1/view2_lateral.jpg
test/pid57943/study1/view1_frontal.jpg
test/pid57943/study2/view1_frontal.jpg
test/pid54805/study1/view1_frontal.jpg
test/pid54777/study1/view1_frontal.jpg
test/pid54251/study2/view1_frontal.jpg
test/pid54251/study2/view2_lateral.jpg
test/pid54251/study1/view1_frontal.jpg
test/pid54251/study1/view2_lateral.jpg


In [5]:
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(3600, 20),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(20, 3)
)


In [6]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

In [7]:
def train_nn(data_loader):
    data, target = data_loader

    target = [int(n + 1) for n in target]

    data = torch.stack(data)
    target = torch.tensor(target)
    dataset = TensorDataset(data, target)
    train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

    model.train()

    for epoch in range(10):
        for batch_idx, (data, target) in enumerate(train_loader):
            # Erase accumulated gradients
            optimizer.zero_grad()

            # Forward pass
            output = model(data)

            # Calculate loss
            loss = loss_fn(output, target)

            # Backward pass
            loss.backward()

            # Weight update
            optimizer.step()

    return model

In [8]:
def get_output(train_data, test_data):
    model = train_nn(train_data)

    test_data = torch.stack(test_data)

    with torch.no_grad():
        output = model(test_data)
        pred = output.argmax(dim=1, keepdim=True)
        return(pred)

In [10]:
features = ["No Finding", "Enlarged Cardiomediastinum", "Cardiomegaly", "Lung Opacity", "Pneumonia", "Pleural Effusion", "Pleural Other", "Fracture", "Support Devices"]

classification_dict = {}
classification_dict["Id"] = test_df['Id']

for feature in features:
    if len(dfs[feature][0]) == 0:
        classification_dict[feature] = [0 for i in range(len(test_dfs))]
    else:
        classification_dict[feature] = get_output(dfs[feature], test_dfs)
        classification_dict[feature] = [tensor.item() - 1 for tensor in classification_dict[feature]]

print(classification_dict)
submission_df = pd.DataFrame(classification_dict)

print(submission_df.shape)
submission_df.to_csv('gobeavers_submission.csv', index=False)

{'Id': 0     18
1     19
2     44
3     45
4     57
5    102
6    104
7    105
8    106
9    107
Name: Id, dtype: int64, 'No Finding': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'Enlarged Cardiomediastinum': [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1], 'Cardiomegaly': [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1], 'Lung Opacity': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'Pneumonia': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'Pleural Effusion': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'Pleural Other': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'Fracture': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'Support Devices': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
(10, 10)
