In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import pandas as pd
from PIL import Image
import os
import math
from tqdm import tqdm

In [2]:
class ChestXrayDataset(Dataset):
    def __init__(self, image_folder, mask_folder, csv_file, transform=None):
        self.image_folder = image_folder
        self.mask_folder = mask_folder
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, f'img_{self.data.iloc[idx, 0]}.png')
        mask_name = os.path.join(self.mask_folder, f'img_{self.data.iloc[idx, 0]}.png')
        
        image = Image.open(img_name).convert('RGB')  
        mask = Image.open(mask_name).convert('L')  
        
        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
        
        image = torch.where(mask > 0, image, torch.tensor(0))
        
        label = int(self.data.iloc[idx, 1]) 
        return image, label


In [3]:
def conv_bn(inp, oup, stride):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True)
    )


def conv_1x1_bn(inp, oup):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True)
    )


def conv_dw(inp, oup, stride):
    return nn.Sequential(
        nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
        nn.BatchNorm2d(inp),
        nn.ReLU6(inplace=True),

        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True),
    )


class InvertedResidual(nn.Module):
    def __init__(self, in_channels, out_channels, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        hidden_dim = round(in_channels * expand_ratio)
        self.use_res_connect = self.stride == 1 and in_channels == out_channels

        layers = []
        if expand_ratio != 1:
            layers.append(nn.Conv2d(in_channels, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False))
            layers.append(nn.BatchNorm2d(hidden_dim))
            layers.append(nn.ReLU6(inplace=True))

        layers.extend([
            nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, stride=stride, padding=1, groups=hidden_dim, bias=False),
            nn.BatchNorm2d(hidden_dim),
            nn.ReLU6(inplace=True),
            nn.Conv2d(hidden_dim, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(out_channels),
        ])

        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)

In [4]:
class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, width_mult=1.0, round_nearest=8):
        super(MobileNetV2, self).__init__()
        self.cfgs = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]
        self.round_nearest = round_nearest
        self.inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]
        self.last_channel = 1280
        input_channel = 32

        input_channel = self._make_divisible(input_channel * width_mult)
        self.features = [conv_bn(3, input_channel, 2)]

        for t, c, n, s in self.inverted_residual_setting:
            output_channel = self._make_divisible(c * width_mult)
            for i in range(n):
                stride = s if i == 0 else 1
                self.features.append(InvertedResidual(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel

        self.features.append(conv_1x1_bn(input_channel, self.last_channel))
        self.features = nn.Sequential(*self.features)

        self.classifier = nn.Linear(self.last_channel, num_classes)

        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.mean([2, 3])
        x = self.classifier(x)
        return x

    def _make_divisible(self, v):
        return int((v + self.round_nearest / 2) // self.round_nearest * self.round_nearest)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                n = m.weight.size(1)
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

In [5]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_images_folder = "data/train_images/"
train_mask_folder = "data/train_lung_masks/"
train_csv_file = "data/train_answers.csv"
batch_size = 32
num_epochs = 10

In [6]:
train_dataset = ChestXrayDataset(train_images_folder, train_mask_folder, train_csv_file, transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

model = MobileNetV2(num_classes=3, width_mult=1.0, round_nearest=8).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [7]:
model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    tqdm_loader = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}')
    for images, labels in tqdm_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        tqdm_loader.set_postfix({'Loss': loss.item()})
    epoch_loss = running_loss / len(train_dataset)
    torch.save(model.state_dict(), f'mobilenet2_fixed_{epoch}.pth')
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}')

Epoch 1/10: 100%|██████████| 844/844 [48:16<00:00,  3.43s/it, Loss=0.839] 


Epoch [1/10], Loss: 0.8202


Epoch 2/10: 100%|██████████| 844/844 [48:27<00:00,  3.45s/it, Loss=0.761]


Epoch [2/10], Loss: 0.6538


Epoch 3/10: 100%|██████████| 844/844 [47:43<00:00,  3.39s/it, Loss=0.387]


Epoch [3/10], Loss: 0.5911


Epoch 4/10: 100%|██████████| 844/844 [47:33<00:00,  3.38s/it, Loss=0.561]


Epoch [4/10], Loss: 0.5500


Epoch 5/10: 100%|██████████| 844/844 [47:25<00:00,  3.37s/it, Loss=0.631]


Epoch [5/10], Loss: 0.5157


Epoch 6/10: 100%|██████████| 844/844 [47:27<00:00,  3.37s/it, Loss=0.323]


Epoch [6/10], Loss: 0.4915


Epoch 7/10: 100%|██████████| 844/844 [48:07<00:00,  3.42s/it, Loss=0.429]


Epoch [7/10], Loss: 0.4617


Epoch 8/10: 100%|██████████| 844/844 [49:12<00:00,  3.50s/it, Loss=0.575]


Epoch [8/10], Loss: 0.4414


Epoch 9/10: 100%|██████████| 844/844 [51:38<00:00,  3.67s/it, Loss=0.407]


Epoch [9/10], Loss: 0.4293


Epoch 10/10:  20%|██        | 172/844 [10:58<42:54,  3.83s/it, Loss=0.369]


KeyboardInterrupt: 

In [ ]:
model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    tqdm_loader = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}')
    for images, labels in tqdm_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        tqdm_loader.set_postfix({'Loss': loss.item()})
    epoch_loss = running_loss / len(train_dataset)
    torch.save(model.state_dict(), f'mobilenet2_fixed_{epoch}.pth')
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}')

In [9]:
submission_df = pd.DataFrame(columns=['id', 'target_feature'])
test_images_folder = "data/test_images/"
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

submission_data = []

for i, filename in enumerate(os.listdir(test_images_folder)):
    img_path = os.path.join(test_images_folder, filename)
    img = Image.open(img_path).convert('RGB')
    img = test_transform(img).unsqueeze(0).to(device)
    output = model(img)
    _, predicted = torch.max(output, 1)
    if predicted.item() is None or predicted.item() == '':
        target_feature = 0
    else:
        target_feature = predicted.item()
    submission_data.append({'id': i, 'target_feature': target_feature})

submission_df = pd.DataFrame(submission_data)
submission_df.to_csv('submission_file-mobilenet_fixed_2.csv', index=False)