In [19]:
import os
from PIL import Image
import pandas as pd
import torch
from torchvision import transforms

#pid folder
main_folder = '../../../data/train'

# Define the transformation to convert images to tensors
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize the image if necessary
    transforms.ToTensor()           # Convert images to PyTorch tensors
])

# Function to load an image and convert it to a tensor
def load_image(image_path):
    with Image.open(image_path) as img:
        return transform(img)

# List to hold all the tensors
image_tensors = []

# Navigate through the directory structure
for pid in os.listdir(main_folder):
    pid_path = os.path.join(main_folder, pid)
    if os.path.isdir(pid_path):
        for study in os.listdir(pid_path):
            study_path = os.path.join(pid_path, study)
            if os.path.isdir(study_path):
                # Specify the filename you are looking for
                for view in os.listdir(study_path):
                    target_image_path = os.path.join(study_path, view)
                    if os.path.exists(target_image_path):
                        # Load the image and convert to tensor
                        tensor = load_image(target_image_path)
                        image_tensors.append(tensor)

print(f"Loaded {len(image_tensors)} images.")

Loaded 12 images.


In [20]:
df = pd.read_csv('../../../data/student_labels/train2023.csv')
outcomes = ['No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Pneumonia', 'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices']
dfs = {}
for col in outcomes:
    temp_df = df[['Path', col]].copy()
    temp_df = temp_df.dropna(subset = [col])
    tensors = []
    for path in list(temp_df['Path']):
        tensors.append(load_image(os.path.join('../../../data', path)))
    dfs[col] = (tensors, list(temp_df[col]))

print(dfs)

{'No Finding': ([tensor([[[0.0275, 0.2039, 0.2235,  ..., 0.0039, 0.0039, 0.0000],
         [0.0275, 0.1843, 0.1922,  ..., 0.0039, 0.0039, 0.0000],
         [0.0275, 0.1843, 0.2118,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.6275, 0.6588, 0.6706,  ..., 0.9294, 0.9294, 0.9216],
         [0.6627, 0.6824, 0.6941,  ..., 0.9255, 0.9255, 0.9176],
         [0.6784, 0.7059, 0.7137,  ..., 0.9176, 0.9176, 0.9137]]]), tensor([[[0.0745, 0.0745, 0.0784,  ..., 0.1608, 0.2627, 0.4078],
         [0.0745, 0.0706, 0.0706,  ..., 0.1529, 0.2549, 0.4039],
         [0.0784, 0.0667, 0.0667,  ..., 0.1529, 0.2471, 0.3922],
         ...,
         [0.1765, 0.1725, 0.1804,  ..., 0.2980, 0.2784, 0.3020],
         [0.2275, 0.2196, 0.2157,  ..., 0.3725, 0.3569, 0.3569],
         [0.3137, 0.3098, 0.3020,  ..., 0.4431, 0.4510, 0.5020]]]), tensor([[[0.0627, 0.0745, 0.0706,  ..., 0.2980, 0.0706, 0.0000],
         [0.0667, 0.0745, 0.0745,  ..., 0.3137, 0.1373, 0.0000],
         [0.0706, 0.0745, 0.0824,  ...,

In [21]:
import torch.nn as nn
import torch.optim as optim
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 20),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(20, 3)
)


In [22]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

AttributeError: module 'torch' has no attribute 'version'

In [None]:
def train_nn(data_loader):
    model.train()

    for epoch in range(10):
        for batch_idx, (data, target) in enumerate(data_loader):
            # Erase accumulated gradients
            optimizer.zero_grad()

            # Forward pass
            output = model(data)

            # Calculate loss
            loss = loss_fn(output, target)

            # Backward pass
            loss.backward()

            # Weight update
            optimizer.step()

    return model

In [None]:
def get_output(test_data, train_data):
    model = train_nn(train_data)

    predictions = []

    with torch.no_grad():
        for data, target in test_data:
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)

    return predictions

In [None]:
features = ["No Finding", "Enlarged Cardiomediastinum", "Cardiomegaly", "Lung Opacity", "Pneumonia", "Pleural Effusion", "Pleural Other", "Fracture", "Support Devices"]

classification_dict = {}
test_df = pd.read_csv("../../../data/student_labels/test_ids.csv")
classification_dict["Id"] = test_df["Id"]

#Parse tensor files

for feature in features:
    classification_dict[feature] = get_output(test_data, train_data)

submission_df = pd.DataFrame.from_dict(classification_dict)
submission_df.to_csv('gobeavers_submission.csv', index=False)