In [150]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import pandas as pd
from PIL import Image
import os
from tqdm import tqdm
import math

In [151]:
def conv_bn(inp, oup, stride):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True)
    )


def conv_1x1_bn(inp, oup):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True)
    )


def conv_dw(inp, oup, stride):
    return nn.Sequential(
        nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
        nn.BatchNorm2d(inp),
        nn.ReLU6(inplace=True),

        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True),
    )


class InvertedResidual(nn.Module):
    def __init__(self, in_channels, out_channels, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        hidden_dim = round(in_channels * expand_ratio)
        self.use_res_connect = self.stride == 1 and in_channels == out_channels

        layers = []
        if expand_ratio != 1:
            layers.append(nn.Conv2d(in_channels, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False))
            layers.append(nn.BatchNorm2d(hidden_dim))
            layers.append(nn.ReLU6(inplace=True))

        layers.extend([
            nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, stride=stride, padding=1, groups=hidden_dim, bias=False),
            nn.BatchNorm2d(hidden_dim),
            nn.ReLU6(inplace=True),
            nn.Conv2d(hidden_dim, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(out_channels),
        ])

        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)

In [152]:
class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, width_mult=1.0, round_nearest=8):
        super(MobileNetV2, self).__init__()
        self.cfgs = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]
        self.round_nearest = round_nearest
        self.inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]
        self.last_channel = 1280
        input_channel = 32

        input_channel = self._make_divisible(input_channel * width_mult)
        self.features = [conv_bn(3, input_channel, 2)]

        for t, c, n, s in self.inverted_residual_setting:
            output_channel = self._make_divisible(c * width_mult)
            for i in range(n):
                stride = s if i == 0 else 1
                self.features.append(InvertedResidual(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel

        self.features.append(conv_1x1_bn(input_channel, self.last_channel))
        self.features = nn.Sequential(*self.features)

        self.classifier = nn.Linear(self.last_channel, num_classes)

        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.mean([2, 3])
        x = self.classifier(x)
        return x

    def _make_divisible(self, v):
        return int((v + self.round_nearest / 2) // self.round_nearest * self.round_nearest)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                n = m.weight.size(1)
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

In [153]:
from torchvision.transforms import ToPILImage


class ChestXrayDataset(Dataset):
    def __init__(self, image_folder, csv_file, transform=None):
        self.image_folder = image_folder
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, f'img_{self.data.iloc[idx, 0]}.png')
        image = Image.open(img_name)
        if image.mode != 'RGB':
            image = image.convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = int(self.data.iloc[idx, 1])
        return image, label

In [154]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

train_images_folder = "data/train_images/"
train_csv_file = "data/train_answers.csv"
batch_size = 32
num_epochs = 10

train_dataset = ChestXrayDataset(train_images_folder, train_csv_file, transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

model = MobileNetV2(num_classes=3, width_mult=1.0, round_nearest=8).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [155]:
model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    tqdm_loader = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}')
    for images, labels in tqdm_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        tqdm_loader.set_postfix({'Loss': loss.item()})
    epoch_loss = running_loss / len(train_dataset)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}')

Epoch 1/10: 100%|██████████| 844/844 [1:16:27<00:00,  5.44s/it, Loss=0.646]


Epoch [1/10], Loss: 0.6353


Epoch 2/10: 100%|██████████| 844/844 [1:19:33<00:00,  5.66s/it, Loss=0.553]


Epoch [2/10], Loss: 0.4379


Epoch 3/10: 100%|██████████| 844/844 [1:19:53<00:00,  5.68s/it, Loss=0.303]


Epoch [3/10], Loss: 0.3716


Epoch 4/10: 100%|██████████| 844/844 [1:01:48<00:00,  4.39s/it, Loss=0.554]


Epoch [4/10], Loss: 0.3310


Epoch 5/10: 100%|██████████| 844/844 [1:00:33<00:00,  4.31s/it, Loss=0.179] 


Epoch [5/10], Loss: 0.3140


Epoch 6/10: 100%|██████████| 844/844 [1:00:26<00:00,  4.30s/it, Loss=0.295]


Epoch [6/10], Loss: 0.2888


Epoch 7/10: 100%|██████████| 844/844 [1:00:20<00:00,  4.29s/it, Loss=0.158]


Epoch [7/10], Loss: 0.2708


Epoch 8/10: 100%|██████████| 844/844 [1:02:44<00:00,  4.46s/it, Loss=0.217] 


Epoch [8/10], Loss: 0.2647


Epoch 9/10: 100%|██████████| 844/844 [1:08:45<00:00,  4.89s/it, Loss=0.0741]


Epoch [9/10], Loss: 0.2436


Epoch 10/10: 100%|██████████| 844/844 [1:20:10<00:00,  5.70s/it, Loss=0.0586]

Epoch [10/10], Loss: 0.2363





In [156]:
torch.save(model.state_dict(), 'mobilenet2.pth')

In [158]:
submission_df = pd.DataFrame(columns=['id', 'target_feature'])
test_images_folder = "data/test_images/"
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

submission_data = []

for i, filename in enumerate(os.listdir(test_images_folder)):
    img_path = os.path.join(test_images_folder, filename)
    img = Image.open(img_path).convert('RGB')
    img = test_transform(img).unsqueeze(0).to(device)
    output = model(img)
    _, predicted = torch.max(output, 1)
    if predicted.item() is None or predicted.item() == '':
        target_feature = 0
    else:
        target_feature = predicted.item()
    submission_data.append({'id': i, 'target_feature': target_feature})

submission_df = pd.DataFrame(submission_data)
submission_df.to_csv('submission_file-mobilenet2.csv', index=False)

In [159]:
model.train()
for epoch in range(10):
    running_loss = 0.0
    tqdm_loader = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}')
    for images, labels in tqdm_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        tqdm_loader.set_postfix({'Loss': loss.item()})
    epoch_loss = running_loss / len(train_dataset)
    torch.save(model.state_dict(), f'mobilenet2_10_{epoch}.pth')
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}')

Epoch 1/10: 100%|██████████| 844/844 [1:23:26<00:00,  5.93s/it, Loss=0.518] 


Epoch [1/10], Loss: 0.2238


Epoch 2/10: 100%|██████████| 844/844 [1:21:07<00:00,  5.77s/it, Loss=0.223] 


Epoch [2/10], Loss: 0.2182


Epoch 3/10: 100%|██████████| 844/844 [1:21:12<00:00,  5.77s/it, Loss=0.252] 


Epoch [3/10], Loss: 0.2094


Epoch 4/10: 100%|██████████| 844/844 [1:17:40<00:00,  5.52s/it, Loss=0.119] 


Epoch [4/10], Loss: 0.2030


Epoch 5/10: 100%|██████████| 844/844 [1:05:59<00:00,  4.69s/it, Loss=0.159] 


Epoch [5/10], Loss: 0.1970


Epoch 6/10: 100%|██████████| 844/844 [1:03:26<00:00,  4.51s/it, Loss=0.136] 


Epoch [6/10], Loss: 0.1862


Epoch 7/10: 100%|██████████| 844/844 [1:10:18<00:00,  5.00s/it, Loss=0.314] 


Epoch [7/10], Loss: 0.1805


Epoch 8/10: 100%|██████████| 844/844 [1:09:35<00:00,  4.95s/it, Loss=0.242] 


Epoch [8/10], Loss: 0.1758


Epoch 9/10: 100%|██████████| 844/844 [1:10:06<00:00,  4.98s/it, Loss=0.0465]


Epoch [9/10], Loss: 0.1694


Epoch 10/10: 100%|██████████| 844/844 [1:06:05<00:00,  4.70s/it, Loss=0.289] 

Epoch [10/10], Loss: 0.1639





In [162]:
submission_df = pd.DataFrame(columns=['id', 'target_feature'])
test_images_folder = "data/test_images/"
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

submission_data = []

for i, filename in enumerate(os.listdir(test_images_folder)):
    img_path = os.path.join(test_images_folder, filename)
    img = Image.open(img_path).convert('RGB')
    img = test_transform(img).unsqueeze(0).to(device)
    output = model(img)
    print(output)
    _, predicted = torch.max(output, 1)
    if predicted.item() is None or predicted.item() == '':
        target_feature = 0
    else:
        target_feature = predicted.item()
    submission_data.append({'id': i, 'target_feature': target_feature})

submission_df = pd.DataFrame(submission_data)
submission_df.to_csv('submission_file-mobilenet2_20.csv', index=False)

tensor([[-1.7600, -3.9354, -0.4874]], grad_fn=<AddmmBackward0>)
tensor([[-2.1699, -2.6880, -0.7744]], grad_fn=<AddmmBackward0>)
tensor([[-1.4527, -3.6255, -0.7188]], grad_fn=<AddmmBackward0>)
tensor([[-1.5525, -4.3656, -0.3571]], grad_fn=<AddmmBackward0>)
tensor([[-1.9511, -3.2798, -0.3650]], grad_fn=<AddmmBackward0>)
tensor([[-2.0792, -3.6979, -0.3408]], grad_fn=<AddmmBackward0>)
tensor([[-1.3174, -2.8294, -0.8744]], grad_fn=<AddmmBackward0>)
tensor([[-1.9301, -3.2540, -0.5345]], grad_fn=<AddmmBackward0>)
tensor([[-1.4894, -2.4799, -1.0467]], grad_fn=<AddmmBackward0>)
tensor([[-1.8420, -2.8804, -0.6657]], grad_fn=<AddmmBackward0>)
tensor([[-2.2392, -2.2685, -0.7448]], grad_fn=<AddmmBackward0>)
tensor([[-1.3493, -4.3650, -0.5471]], grad_fn=<AddmmBackward0>)
tensor([[-1.9722, -3.7063, -0.3451]], grad_fn=<AddmmBackward0>)
tensor([[-1.7038, -2.3794, -0.9755]], grad_fn=<AddmmBackward0>)
tensor([[-1.6391, -3.4760, -0.6105]], grad_fn=<AddmmBackward0>)
tensor([[-1.2349, -3.7238, -0.5929]], gr

In [163]:
model.train()
for epoch in range(15):
    running_loss = 0.0
    tqdm_loader = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}')
    for images, labels in tqdm_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        tqdm_loader.set_postfix({'Loss': loss.item()})
    epoch_loss = running_loss / len(train_dataset)
    torch.save(model.state_dict(), f'mobilenet2_20_{epoch}.pth')
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}')

Epoch 1/10: 100%|██████████| 844/844 [1:05:37<00:00,  4.67s/it, Loss=0.137] 


Epoch [1/10], Loss: 0.1536


Epoch 2/10: 100%|██████████| 844/844 [1:02:29<00:00,  4.44s/it, Loss=0.265] 


Epoch [2/10], Loss: 0.1478


Epoch 3/10: 100%|██████████| 844/844 [1:04:05<00:00,  4.56s/it, Loss=0.109] 


Epoch [3/10], Loss: 0.1503


Epoch 4/10: 100%|██████████| 844/844 [1:05:07<00:00,  4.63s/it, Loss=0.276] 


Epoch [4/10], Loss: 0.1388


Epoch 5/10: 100%|██████████| 844/844 [1:05:10<00:00,  4.63s/it, Loss=0.188] 


Epoch [5/10], Loss: 0.1363


Epoch 6/10: 100%|██████████| 844/844 [1:05:06<00:00,  4.63s/it, Loss=0.188] 


Epoch [6/10], Loss: 0.1343


Epoch 7/10: 100%|██████████| 844/844 [1:05:35<00:00,  4.66s/it, Loss=0.241] 


Epoch [7/10], Loss: 0.1266


Epoch 8/10: 100%|██████████| 844/844 [1:04:50<00:00,  4.61s/it, Loss=0.0714]


Epoch [8/10], Loss: 0.1190


Epoch 9/10: 100%|██████████| 844/844 [1:05:03<00:00,  4.62s/it, Loss=0.201]  


Epoch [9/10], Loss: 0.1168


Epoch 10/10: 100%|██████████| 844/844 [1:04:49<00:00,  4.61s/it, Loss=0.182] 


Epoch [10/10], Loss: 0.1090


Epoch 11/10: 100%|██████████| 844/844 [1:04:53<00:00,  4.61s/it, Loss=0.043] 


Epoch [11/10], Loss: 0.1116


Epoch 12/10:  26%|██▌       | 221/844 [18:20<51:41,  4.98s/it, Loss=0.0389]   


KeyboardInterrupt: 

In [164]:
submission_df = pd.DataFrame(columns=['id', 'target_feature'])
test_images_folder = "data/test_images/"
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

submission_data = []

for i, filename in enumerate(os.listdir(test_images_folder)):
    img_path = os.path.join(test_images_folder, filename)
    img = Image.open(img_path).convert('RGB')
    img = test_transform(img).unsqueeze(0).to(device)
    output = model(img)
    print(output)
    _, predicted = torch.max(output, 1)
    if predicted.item() is None or predicted.item() == '':
        target_feature = 0
    else:
        target_feature = predicted.item()
    submission_data.append({'id': i, 'target_feature': target_feature})

submission_df = pd.DataFrame(submission_data)
submission_df.to_csv('submission_file-mobilenet2_30.csv', index=False)

tensor([[-2.1616, -7.4334, -0.8505]], grad_fn=<AddmmBackward0>)
tensor([[-2.7094, -4.0990, -1.6613]], grad_fn=<AddmmBackward0>)
tensor([[-1.2677, -6.1810, -1.5739]], grad_fn=<AddmmBackward0>)
tensor([[-2.0772, -5.9610, -1.1862]], grad_fn=<AddmmBackward0>)
tensor([[-1.9383, -6.8430, -0.5513]], grad_fn=<AddmmBackward0>)
tensor([[-2.1649, -6.2523, -1.0200]], grad_fn=<AddmmBackward0>)
tensor([[-1.9336, -4.8013, -1.6676]], grad_fn=<AddmmBackward0>)
tensor([[-2.1558, -4.5339, -1.6632]], grad_fn=<AddmmBackward0>)
tensor([[-2.0247, -3.9645, -2.0277]], grad_fn=<AddmmBackward0>)
tensor([[-1.1611, -5.6898, -1.4213]], grad_fn=<AddmmBackward0>)
tensor([[-2.3388, -4.2430, -1.4620]], grad_fn=<AddmmBackward0>)
tensor([[-2.0373, -5.0732, -1.6092]], grad_fn=<AddmmBackward0>)
tensor([[-2.4361, -3.7666, -1.9089]], grad_fn=<AddmmBackward0>)
tensor([[-1.7847, -4.1921, -2.0738]], grad_fn=<AddmmBackward0>)
tensor([[-1.5893, -5.2387, -1.5990]], grad_fn=<AddmmBackward0>)
tensor([[-1.4248, -5.1676, -1.5307]], gr